In [70]:
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import DataLoader, Subset
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
from tqdm import tqdm
import time
import os
import PIL.Image as Image
from IPython.display import display
import random
import pandas as pd

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
print(torch.cuda.get_device_name(device))

cuda:0
NVIDIA GeForce RTX 2080 Ti


In [71]:
dataset_dir = "/Data/federated_learning/large_vlm_distillation_ood/Resnet18_classification/s_cars_ood_adding_ood/"

train_tfms = transforms.Compose([transforms.Resize((400, 400)),
                                 transforms.RandomHorizontalFlip(),
                                 transforms.RandomRotation(15),
                                 transforms.ToTensor(),
                                 transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
test_tfms = transforms.Compose([transforms.Resize((400, 400)),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

dataset = torchvision.datasets.ImageFolder(root=dataset_dir+"train", transform = train_tfms)
trainloader = torch.utils.data.DataLoader(dataset, batch_size = 32, shuffle=True, num_workers = 2)

dataset2 = torchvision.datasets.ImageFolder(root=dataset_dir+"test", transform = test_tfms)
testloader = torch.utils.data.DataLoader(dataset2, batch_size = 32, shuffle=False, num_workers = 2)

dataset3 = torchvision.datasets.ImageFolder(root=dataset_dir+"test_2add", transform = test_tfms)
test_2add_loader = torch.utils.data.DataLoader(dataset3, batch_size = 32, shuffle=False, num_workers = 2)

In [72]:
def add_samples_to_train_dataset(dataset, dataset3, samples_to_add):
    new_indices = random.sample(range(len(dataset3)), samples_to_add)
    new_samples = [dataset3.samples[i] for i in new_indices]  # Note the use of `samples` attribute
    dataset.samples.extend(new_samples)
    dataset.targets.extend([dataset3.targets[i] for i in new_indices])
    return dataset  # Return the updated dataset

In [73]:
def train_model(dataset, dataset3, trainloader, testloader, test_2add_loader, model, criterion, optimizer, scheduler,
                n_epochs=5):
    losses = []
    train_accuracies = []
    test_accuracies = []
    model = model.to(device)
    for epoch in range(n_epochs):
        since = time.time()
        running_loss = 0.0
        running_correct = 0.0
        model.train()
        for i, data in tqdm(enumerate(trainloader, 0)):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            running_correct += (labels == predicted).sum().item()

        epoch_duration = time.time() - since
        epoch_loss = running_loss / len(trainloader)
        epoch_acc = 100 * running_correct / len(trainloader.dataset)
        print(f"Epoch {epoch + 1}, duration: {epoch_duration:.2f} s, loss: {epoch_loss:.4f}, Train acc: {epoch_acc:.2f}")

        losses.append(epoch_loss)
        train_accuracies.append(epoch_acc)
        
        model.eval()
        test_acc = eval_model(model)
        test_accuracies.append(test_acc)
        
        scheduler.step(test_acc)
        since = time.time()
        
        # Add samples to train_dataset after each epoch
        dataset = add_samples_to_train_dataset(dataset, dataset3, 5)  # Update the dataset
        trainloader = DataLoader(dataset, batch_size=32, shuffle=True)  # Reinitialize the DataLoader with the updated dataset

    print('Finished Training')
    return model, losses, train_accuracies, test_accuracies

In [74]:
def eval_model(model):
    correct = 0.0
    total = 0.0
    with torch.no_grad():
        for i, data in enumerate(testloader, 0):
            images, labels = data
            
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        test_acc = 100.0 * correct / total
    print(f'Test acc: {test_acc:.2f}')
    return test_acc

In [75]:
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 196)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_ft.parameters(), lr=0.01,momentum=0.9)
lrscheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=3, threshold = 0.9)



In [None]:
model_ft, training_losses, training_accs, test_accs = train_model(dataset, dataset3, trainloader, testloader, test_2add_loader, model_ft, criterion, optimizer, lrscheduler, n_epochs=90)

481it [01:13,  6.58it/s]

Epoch 1, duration: 73.23 s, loss: 3.1771, Train acc: 31.46





Test acc: 1.21


481it [02:51,  2.80it/s]

Epoch 2, duration: 171.70 s, loss: 0.9935, Train acc: 74.95





Test acc: 0.20


481it [02:52,  2.79it/s]

Epoch 3, duration: 172.41 s, loss: 0.5417, Train acc: 85.72





Test acc: 0.00


481it [02:52,  2.79it/s]

Epoch 4, duration: 172.46 s, loss: 0.3432, Train acc: 91.26





Test acc: 0.20


481it [02:49,  2.83it/s]

Epoch 5, duration: 170.00 s, loss: 0.2505, Train acc: 93.47





Test acc: 0.00


481it [02:50,  2.83it/s]

Epoch 6, duration: 170.15 s, loss: 0.1237, Train acc: 97.48





Test acc: 0.80


482it [02:52,  2.79it/s]

Epoch 7, duration: 172.92 s, loss: 0.0909, Train acc: 98.17





Test acc: 1.41


482it [02:54,  2.76it/s]

Epoch 8, duration: 174.72 s, loss: 0.0830, Train acc: 98.49





Test acc: 1.41


482it [03:10,  2.53it/s]

Epoch 9, duration: 190.82 s, loss: 0.0803, Train acc: 98.59





Test acc: 3.62


482it [03:13,  2.49it/s]

Epoch 10, duration: 193.70 s, loss: 0.0782, Train acc: 98.48





Test acc: 4.43


482it [03:00,  2.67it/s]

Epoch 11, duration: 180.31 s, loss: 0.0716, Train acc: 98.74





Test acc: 4.83


482it [03:13,  2.49it/s]

Epoch 12, duration: 193.64 s, loss: 0.0699, Train acc: 98.83





Test acc: 6.84


483it [03:14,  2.48it/s]

Epoch 13, duration: 194.41 s, loss: 0.0690, Train acc: 98.73





Test acc: 8.65


483it [03:13,  2.49it/s]

Epoch 14, duration: 193.89 s, loss: 0.0658, Train acc: 98.76





Test acc: 10.26


483it [02:59,  2.68it/s]

Epoch 15, duration: 179.91 s, loss: 0.0604, Train acc: 98.92





Test acc: 10.06


483it [02:58,  2.70it/s]

Epoch 16, duration: 178.99 s, loss: 0.0591, Train acc: 99.08





Test acc: 12.07


483it [03:07,  2.58it/s]

Epoch 17, duration: 187.48 s, loss: 0.0555, Train acc: 99.07





Test acc: 13.88


483it [03:03,  2.63it/s]

Epoch 18, duration: 183.95 s, loss: 0.0507, Train acc: 99.23





Test acc: 13.08


483it [03:01,  2.65it/s]

Epoch 19, duration: 181.93 s, loss: 0.0519, Train acc: 99.07





Test acc: 14.08


484it [03:10,  2.54it/s]

Epoch 20, duration: 190.56 s, loss: 0.0535, Train acc: 99.17





Test acc: 14.29


484it [03:33,  2.27it/s]

Epoch 21, duration: 213.49 s, loss: 0.0542, Train acc: 99.24





Test acc: 14.89


484it [02:58,  2.71it/s]

Epoch 22, duration: 178.35 s, loss: 0.0547, Train acc: 99.15





Test acc: 14.29


484it [03:10,  2.54it/s]

Epoch 23, duration: 190.51 s, loss: 0.0539, Train acc: 99.11





Test acc: 15.29


484it [03:01,  2.67it/s]

Epoch 24, duration: 181.17 s, loss: 0.0566, Train acc: 99.10





Test acc: 15.09


484it [03:13,  2.50it/s]

Epoch 25, duration: 193.25 s, loss: 0.0586, Train acc: 99.06





Test acc: 15.09


485it [03:07,  2.59it/s]

Epoch 26, duration: 187.23 s, loss: 0.0597, Train acc: 99.03





Test acc: 14.29


485it [03:03,  2.65it/s]

Epoch 27, duration: 183.33 s, loss: 0.0611, Train acc: 98.99





Test acc: 16.10


485it [02:59,  2.70it/s]

Epoch 28, duration: 179.66 s, loss: 0.0627, Train acc: 99.03





Test acc: 14.69


485it [03:19,  2.43it/s]

Epoch 29, duration: 199.35 s, loss: 0.0622, Train acc: 98.96





Test acc: 15.49


485it [03:11,  2.53it/s]

Epoch 30, duration: 191.34 s, loss: 0.0637, Train acc: 98.99





Test acc: 15.29


485it [03:21,  2.40it/s]

Epoch 31, duration: 201.78 s, loss: 0.0648, Train acc: 98.83





Test acc: 15.09


486it [02:57,  2.73it/s]

Epoch 32, duration: 177.82 s, loss: 0.0793, Train acc: 98.92





Test acc: 14.69


486it [03:02,  2.66it/s]

Epoch 33, duration: 182.98 s, loss: 0.0688, Train acc: 98.80





Test acc: 16.50


486it [03:18,  2.44it/s]

Epoch 34, duration: 199.00 s, loss: 0.0705, Train acc: 98.91





Test acc: 15.69


486it [03:04,  2.64it/s]

Epoch 35, duration: 184.41 s, loss: 0.0693, Train acc: 98.93





Test acc: 15.29


486it [03:00,  2.69it/s]

Epoch 36, duration: 180.40 s, loss: 0.0712, Train acc: 98.76





Test acc: 14.89


486it [03:14,  2.49it/s]

Epoch 37, duration: 194.82 s, loss: 0.0714, Train acc: 98.85





Test acc: 14.08


486it [03:20,  2.42it/s]

Epoch 38, duration: 200.44 s, loss: 0.0736, Train acc: 98.78





Test acc: 15.90


487it [03:12,  2.53it/s]

Epoch 39, duration: 192.59 s, loss: 0.0780, Train acc: 98.77





Test acc: 16.50


487it [03:13,  2.52it/s]

Epoch 40, duration: 193.20 s, loss: 0.0760, Train acc: 98.81





Test acc: 15.09


487it [03:21,  2.41it/s]

Epoch 41, duration: 201.89 s, loss: 0.0772, Train acc: 98.77





Test acc: 14.69


487it [03:10,  2.56it/s]

Epoch 42, duration: 190.50 s, loss: 0.0769, Train acc: 98.68





Test acc: 14.89


487it [03:25,  2.37it/s]

Epoch 43, duration: 205.70 s, loss: 0.0803, Train acc: 98.57





Test acc: 15.09


487it [03:26,  2.35it/s]

Epoch 44, duration: 206.82 s, loss: 0.0811, Train acc: 98.61





Test acc: 15.90


488it [03:17,  2.47it/s]

Epoch 45, duration: 197.69 s, loss: 0.0837, Train acc: 98.58





Test acc: 14.69


488it [03:06,  2.61it/s]

Epoch 46, duration: 186.97 s, loss: 0.0845, Train acc: 98.64





Test acc: 14.49


488it [03:08,  2.59it/s]

Epoch 47, duration: 188.41 s, loss: 0.0845, Train acc: 98.55





Test acc: 15.29


488it [03:02,  2.68it/s]

Epoch 48, duration: 182.22 s, loss: 0.0850, Train acc: 98.59





Test acc: 14.89


488it [03:01,  2.69it/s]

Epoch 49, duration: 181.57 s, loss: 0.0896, Train acc: 98.50





Test acc: 14.89


488it [03:00,  2.70it/s]

Epoch 50, duration: 180.61 s, loss: 0.0909, Train acc: 98.37





Test acc: 15.29


488it [03:15,  2.50it/s]

Epoch 51, duration: 195.01 s, loss: 0.0920, Train acc: 98.44





Test acc: 15.09


489it [02:56,  2.78it/s]

Epoch 52, duration: 176.02 s, loss: 0.0928, Train acc: 98.46





Test acc: 15.69


489it [03:03,  2.66it/s]

Epoch 53, duration: 183.97 s, loss: 0.0947, Train acc: 98.43





Test acc: 14.49


489it [03:04,  2.65it/s]

Epoch 54, duration: 184.82 s, loss: 0.0958, Train acc: 98.37





Test acc: 15.09


489it [02:55,  2.79it/s]

Epoch 55, duration: 175.39 s, loss: 0.0969, Train acc: 98.42





Test acc: 15.09


489it [02:57,  2.75it/s]

Epoch 56, duration: 177.84 s, loss: 0.0989, Train acc: 98.38





Test acc: 15.09


489it [03:00,  2.70it/s]

Epoch 57, duration: 180.89 s, loss: 0.1006, Train acc: 98.31





Test acc: 15.09


490it [03:06,  2.63it/s]

Epoch 58, duration: 186.43 s, loss: 0.1061, Train acc: 98.15





Test acc: 16.50


490it [03:25,  2.38it/s]

Epoch 59, duration: 205.62 s, loss: 0.1024, Train acc: 98.15





Test acc: 14.49


490it [03:10,  2.58it/s]

Epoch 60, duration: 190.13 s, loss: 0.1048, Train acc: 98.27





Test acc: 14.89


490it [03:25,  2.38it/s]

Epoch 61, duration: 205.76 s, loss: 0.1076, Train acc: 98.12





Test acc: 14.49


490it [03:09,  2.58it/s]

Epoch 62, duration: 189.89 s, loss: 0.1031, Train acc: 98.23





Test acc: 14.49


490it [03:17,  2.49it/s]

Epoch 63, duration: 197.11 s, loss: 0.1068, Train acc: 98.14





Test acc: 15.49


135it [00:53,  2.52it/s]

In [None]:
df_train_acc, df_test_acc = pd.DataFrame(training_accs, columns='Train_accuracies'), pd.DataFrame(training_accs, columns='Test_accuracies')

In [None]:
# f, axarr = plt.subplots(2,2, figsize = (12, 8))
# axarr[0, 0].plot(training_losses)
# axarr[0, 0].set_title("Training loss")
# axarr[0, 1].plot(training_accs)
# axarr[0, 1].set_title("Training acc")
# axarr[1, 0].plot(test_accs)

# axarr[1, 0].set_title("Test acc")

## Setting for new experiment

In [None]:
""" goal: see how many test samples needed to get accuracy convergance in test set
1. Create test to copy set by taking ~half of test set, half of each class and copy to test to copy
2. create empty folders in test names at train, hopefully dataloader wouold not recognize it as error
3.  Create empty lists for Train acc, vall acc and create index of copied data and setting it to zero
4. After each epoch, copy randomly 5 samples from test to copy to relevant folders on train and copy the new 5 instances to anno_train retrain and recalculate 
    train and val accuracies
5. save logs of train val and indices of adding data