# Experiment 1

In [6]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Imports

In [7]:
import gc
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from torch import nn, optim

from plant_village_dataset import PlantVillageDataset
from runner import Runner
from resnet50 import ResNet50

## Prepare Data

In [8]:
BATCH_SIZE = 128

In [9]:
def split(dataset, batch_size, labeled_ratio, test_ratio):    
    labels = np.array([label for _, label in dataset])

    unlabeled_indices, labeled_indices = train_test_split(np.arange(len(dataset)),
                                                          test_size=labeled_ratio,
                                                          stratify=labels)   
    
    relative_test_ratio = test_ratio / labeled_ratio
    
    train_val_indices, test_indices = train_test_split(labeled_indices,
                                                       test_size=relative_test_ratio,
                                                       stratify=labels[labeled_indices])
    
    train_indices, val_indices = train_test_split(train_val_indices,
                                                  test_size=0.2,
                                                  stratify=labels[train_val_indices])

    unlabeled_sampler = SubsetRandomSampler(unlabeled_indices)
    train_sampler = SubsetRandomSampler(train_indices)
    val_sampler = SubsetRandomSampler(val_indices)
    test_sampler = SubsetRandomSampler(test_indices)

    unlabeled_loader = DataLoader(dataset, batch_size=batch_size, sampler=unlabeled_sampler)
    train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
    val_loader = DataLoader(dataset, batch_size=batch_size, sampler=val_sampler)
    test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler)

    return unlabeled_loader, train_loader, val_loader, test_loader

In [10]:
dataset = PlantVillageDataset('images')

Loading Plant Village
 - Normalizing dataset


 - Calculating mean and standard deviation: 100%|██████████| 434/434 [01:06<00:00,  6.54batch/s]

 - Normalized dataset:
  - Mean: [0.4671, 0.4895, 0.4123]
  - Standard deviation: [0.1709, 0.1443, 0.1880]





## Run 1

In [11]:
unlabeled_loader, train_loader, val_loader, test_loader = split(dataset, batch_size=BATCH_SIZE, labeled_ratio=0.2, test_ratio=0.1)

##### CNN

In [12]:
cnn = ResNet50(num_classes=len(dataset.classes))
cnn_optim = optim.Adam(cnn.parameters(), lr=1e-3)
cnn_criterion = nn.CrossEntropyLoss()
cnn_runner = Runner('cnn_1', cnn, cnn_optim, cnn_criterion, device='mps')
cnn_runner.train(train_loader, val_loader, num_epochs=3)
cnn_runner.test(test_loader)
pass

Training:   0%|          | 0/3 [00:00<?, ? epoch/s]

Training:   0%|          | 0/35 [00:00<?, ?batch/s]

Validating:   0%|          | 0/9 [00:00<?, ?batch/s]

Epoch 1/3 - Train accuracy: 0.7710, Train Loss: 0.9095, Validation Loss: 0.8575


Training:   0%|          | 0/35 [00:00<?, ?batch/s]

Validating:   0%|          | 0/9 [00:00<?, ?batch/s]

Epoch 2/3 - Train accuracy: 0.9450, Train Loss: 0.1795, Validation Loss: 0.3286


Training:   0%|          | 0/35 [00:00<?, ?batch/s]

Validating:   0%|          | 0/9 [00:00<?, ?batch/s]

Epoch 3/3 - Train accuracy: 0.9716, Train Loss: 0.0856, Validation Loss: 0.3307


Testing:   0%|          | 0/44 [00:00<?, ?batch/s]

Test accuracy: 0.8925, Test Loss: 0.3854


##### Autoencoder

In [13]:
# Declare UNetAutoEncoder
# Train UNetAutoEncoder
# Extract Encoder from UNetAutoEncoder

##### Frozen Encoder + MLP

In [14]:
# Train one Frankenstein with the Encoder's weights set to eval() (Frozen)

##### Live Encoder + MLP

In [15]:
# Train the second Frankenstein normally

##### Cleanup

In [21]:
del unlabeled_loader, train_loader, val_loader, test_loader
gc.collect()

## Run 2

In [16]:
unlabeled_loader, train_loader, val_loader, test_loader = split(dataset, batch_size=BATCH_SIZE, labeled_ratio=0.5, test_ratio=0.15)

##### CNN

In [17]:
cnn = ResNet50(num_classes=len(dataset.classes))
cnn_optim = optim.Adam(cnn.parameters(), lr=1e-3)
cnn_criterion = nn.CrossEntropyLoss()
cnn_runner = Runner('cnn_2', cnn, cnn_optim, cnn_criterion, device='mps')
cnn_runner.train(train_loader, val_loader, num_epochs=3)
cnn_runner.test(test_loader)
pass

Training:   0%|          | 0/3 [00:00<?, ? epoch/s]

Training:   0%|          | 0/122 [00:00<?, ?batch/s]

Validating:   0%|          | 0/31 [00:00<?, ?batch/s]

Epoch 1/3 - Train accuracy: 0.8884, Train Loss: 0.4073, Validation Loss: 0.3107


Training:   0%|          | 0/122 [00:00<?, ?batch/s]

Validating:   0%|          | 0/31 [00:00<?, ?batch/s]

Epoch 2/3 - Train accuracy: 0.9762, Train Loss: 0.0761, Validation Loss: 0.1272


Training:   0%|          | 0/122 [00:00<?, ?batch/s]

Validating:   0%|          | 0/31 [00:00<?, ?batch/s]

Epoch 3/3 - Train accuracy: 0.9886, Train Loss: 0.0368, Validation Loss: 0.0646


Testing:   0%|          | 0/65 [00:00<?, ?batch/s]

Test accuracy: 0.9827, Test Loss: 0.0590


##### Autoencoder

In [18]:
# Declare UNetAutoEncoder
# Train UNetAutoEncoder
# Extract Encoder from UNetAutoEncoder

##### Frozen Encoder + MLP

In [19]:
# Train one Frankenstein with the Encoder's weights set to eval() (Frozen)

##### Live Encoder + MLP

In [20]:
# Train the second Frankenstein normally