In [3]:
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import DataLoader, Subset
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
from tqdm import tqdm
import time
import os
import PIL.Image as Image
from IPython.display import display
import random
import pandas as pd
import wandb  # Import wandb

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
print(torch.cuda.get_device_name(device))
wandb.init(project="classify_r50_on_s_cars_ood_ind_a")  # Replace with your project name

cuda:0
NVIDIA GeForce RTX 3090


[34m[1mwandb[0m: Currently logged in as: [33mariel_solomon[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [2]:

dataset_dir = "/home/user1/ariel/fed_learn/large_vlm_distillation_ood/s_cars_ood_ind/"

train_tfms = transforms.Compose([transforms.Resize((400, 400)),
                                 transforms.RandomHorizontalFlip(),
                                 transforms.RandomRotation(15),
                                 transforms.ToTensor(),
                                 transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
test_tfms = transforms.Compose([transforms.Resize((400, 400)),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

test_ind_tfms = transforms.Compose([transforms.Resize((400, 400)),
                                 transforms.RandomHorizontalFlip(),
                                 transforms.RandomRotation(15),
                                 transforms.ToTensor(),
                                 transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

dataset = torchvision.datasets.ImageFolder(root=dataset_dir+"train", transform = train_tfms)
trainloader = torch.utils.data.DataLoader(dataset, batch_size = 32, shuffle=True, num_workers = 2)

dataset2 = torchvision.datasets.ImageFolder(root=dataset_dir+"val", transform = test_tfms)
testloader = torch.utils.data.DataLoader(dataset2, batch_size = 32, shuffle=False, num_workers = 2)
dataset3 = torchvision.datasets.ImageFolder(root=dataset_dir+"val_on_train", transform = test_tfms)
testloader_ind = torch.utils.data.DataLoader(dataset3, batch_size = 32, shuffle=True, num_workers = 2)

In [8]:
def train_model(dataset, trainloader, testloader, testloader_ind, model, criterion, optimizer, scheduler, n_epochs=5):
    losses = []
    train_accuracies = []
    test_accuracies = []
    test_ind_accuracies = []
    model = model.to(device)
    for epoch in range(n_epochs):
        since = time.time()
        running_loss = 0.0
        running_correct = 0.0
        model.train()
        for i, data in tqdm(enumerate(trainloader, 0)):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            running_correct += (labels == predicted).sum().item()

        epoch_duration = time.time() - since
        epoch_loss = running_loss / len(trainloader)
        epoch_acc = 100 * running_correct / len(trainloader.dataset)
        print(f"Epoch {epoch + 1}, duration: {epoch_duration:.2f} s, loss: {epoch_loss:.4f}, Train acc: {epoch_acc:.2f}")
        wandb.log({"epoch": epoch + 1, "train_loss": epoch_loss, "train_acc": epoch_acc})
        losses.append(epoch_loss)
        train_accuracies.append(epoch_acc)
        torch.save(model, "model_epoch_{}.pt".format(epoch))
        model.eval()
        test_acc_ood_name = 'test_acc_ood'
        test_acc = eval_model(model, testloader,test_acc_ood_name)
        test_accuracies.append(test_acc)


        test_acc_ind_name = 'test_acc_ind'
        test_acc_ind = eval_model(model,testloader_ind, test_acc_ind_name)
        test_ind_accuracies.append(test_acc_ind)

        
        scheduler.step(test_acc)
        since = time.time()
        # Log test accuracy to wandb
        wandb.log({"epoch": epoch + 1, "test_ood_acc": test_acc})
                # Log test accuracy to wandb
        wandb.log({"epoch": epoch + 1, "test_ind_acc": test_acc_ind})

    print('Finished Training')
    return model, losses, train_accuracies, test_accuracies, test_ind_accuracies

In [9]:
def eval_model(model, testloader, name):
    correct = 0.0
    total = 0.0
    testloader=testloader
    with torch.no_grad():
        for i, data in enumerate(testloader, 0):
            images, labels = data
            
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        test_acc = 100.0 * correct / total
    print(f'{name}: {test_acc:.2f}')
    return test_acc

In [10]:
model_ft = models.resnet50(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 196)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_ft.parameters(), lr=0.01,momentum=0.9)
lrscheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=3, threshold = 0.9)

In [11]:
model_ft, training_losses, training_accs, test_ood_accs, test_ind_accuracies = train_model(dataset, trainloader, testloader,testloader_ind, model_ft, criterion, optimizer, lrscheduler, n_epochs=74)

413it [01:33,  4.43it/s]


Epoch 1, duration: 93.20 s, loss: 3.1971, Train acc: 28.15
test_acc_ood: 0.00
test_acc_ind: 59.02


413it [01:33,  4.41it/s]


Epoch 2, duration: 93.77 s, loss: 1.0028, Train acc: 73.15
test_acc_ood: 0.85
test_acc_ind: 71.44


413it [01:33,  4.40it/s]


Epoch 3, duration: 93.81 s, loss: 0.5409, Train acc: 85.18
test_acc_ood: 0.00
test_acc_ind: 84.82


413it [01:33,  4.41it/s]


Epoch 4, duration: 93.76 s, loss: 0.3478, Train acc: 90.30
test_acc_ood: 0.00
test_acc_ind: 83.99


413it [01:33,  4.40it/s]


Epoch 5, duration: 93.81 s, loss: 0.2560, Train acc: 92.93
test_acc_ood: 0.12
test_acc_ind: 86.11


413it [01:33,  4.40it/s]


Epoch 6, duration: 93.85 s, loss: 0.1882, Train acc: 94.83
test_acc_ood: 0.00
test_acc_ind: 87.45


413it [01:33,  4.41it/s]


Epoch 7, duration: 93.77 s, loss: 0.0824, Train acc: 98.06
test_acc_ood: 0.00
test_acc_ind: 93.82


413it [01:33,  4.40it/s]


Epoch 8, duration: 93.81 s, loss: 0.0526, Train acc: 98.98
test_acc_ood: 0.00
test_acc_ind: 94.00


413it [01:33,  4.40it/s]


Epoch 9, duration: 93.93 s, loss: 0.0466, Train acc: 99.02
test_acc_ood: 0.00
test_acc_ind: 94.28


413it [01:33,  4.40it/s]


Epoch 10, duration: 93.95 s, loss: 0.0410, Train acc: 99.23
test_acc_ood: 0.00
test_acc_ind: 94.00


413it [01:33,  4.40it/s]


Epoch 11, duration: 93.91 s, loss: 0.0380, Train acc: 99.28
test_acc_ood: 0.00
test_acc_ind: 94.19


413it [01:33,  4.40it/s]


Epoch 12, duration: 93.99 s, loss: 0.0375, Train acc: 99.32
test_acc_ood: 0.00
test_acc_ind: 94.05


413it [01:34,  4.39it/s]


Epoch 13, duration: 94.15 s, loss: 0.0373, Train acc: 99.30
test_acc_ood: 0.00
test_acc_ind: 94.14


413it [01:34,  4.39it/s]


Epoch 14, duration: 94.10 s, loss: 0.0340, Train acc: 99.45
test_acc_ood: 0.00
test_acc_ind: 94.00


413it [01:34,  4.39it/s]


Epoch 15, duration: 94.12 s, loss: 0.0365, Train acc: 99.34
test_acc_ood: 0.00
test_acc_ind: 94.46


413it [01:34,  4.39it/s]


Epoch 16, duration: 94.20 s, loss: 0.0339, Train acc: 99.48
test_acc_ood: 0.00
test_acc_ind: 94.32


413it [01:34,  4.39it/s]


Epoch 17, duration: 94.06 s, loss: 0.0350, Train acc: 99.39
test_acc_ood: 0.00
test_acc_ind: 94.14


413it [01:34,  4.39it/s]


Epoch 18, duration: 94.16 s, loss: 0.0348, Train acc: 99.42
test_acc_ood: 0.00
test_acc_ind: 94.19


413it [01:34,  4.39it/s]


Epoch 19, duration: 94.11 s, loss: 0.0353, Train acc: 99.29
test_acc_ood: 0.00
test_acc_ind: 94.23


413it [01:34,  4.39it/s]


Epoch 20, duration: 94.22 s, loss: 0.0363, Train acc: 99.36
test_acc_ood: 0.00
test_acc_ind: 94.46


413it [01:34,  4.39it/s]


Epoch 21, duration: 94.10 s, loss: 0.0348, Train acc: 99.40
test_acc_ood: 0.00
test_acc_ind: 94.32


413it [01:34,  4.38it/s]


Epoch 22, duration: 94.24 s, loss: 0.0354, Train acc: 99.31
test_acc_ood: 0.00
test_acc_ind: 94.28


413it [01:34,  4.39it/s]


Epoch 23, duration: 94.18 s, loss: 0.0349, Train acc: 99.38
test_acc_ood: 0.00
test_acc_ind: 94.14


413it [01:34,  4.38it/s]


Epoch 24, duration: 94.30 s, loss: 0.0357, Train acc: 99.30
test_acc_ood: 0.00
test_acc_ind: 94.28


413it [01:34,  4.38it/s]


Epoch 25, duration: 94.26 s, loss: 0.0354, Train acc: 99.39
test_acc_ood: 0.00
test_acc_ind: 94.42


413it [01:34,  4.38it/s]


Epoch 26, duration: 94.28 s, loss: 0.0348, Train acc: 99.29
test_acc_ood: 0.00
test_acc_ind: 94.28


413it [01:34,  4.38it/s]


Epoch 27, duration: 94.28 s, loss: 0.0347, Train acc: 99.43
test_acc_ood: 0.00
test_acc_ind: 94.28


413it [01:33,  4.39it/s]


Epoch 28, duration: 94.04 s, loss: 0.0356, Train acc: 99.31
test_acc_ood: 0.00
test_acc_ind: 94.09


413it [01:34,  4.39it/s]


Epoch 29, duration: 94.16 s, loss: 0.0354, Train acc: 99.29
test_acc_ood: 0.00
test_acc_ind: 94.19


413it [01:34,  4.38it/s]


Epoch 30, duration: 94.32 s, loss: 0.0371, Train acc: 99.24
test_acc_ood: 0.00
test_acc_ind: 94.32


413it [01:34,  4.39it/s]


Epoch 31, duration: 94.20 s, loss: 0.0348, Train acc: 99.32
test_acc_ood: 0.00
test_acc_ind: 94.46


413it [01:34,  4.38it/s]


Epoch 32, duration: 94.28 s, loss: 0.0365, Train acc: 99.33
test_acc_ood: 0.00
test_acc_ind: 94.28


413it [01:34,  4.38it/s]


Epoch 33, duration: 94.32 s, loss: 0.0346, Train acc: 99.39
test_acc_ood: 0.00
test_acc_ind: 94.28


413it [01:34,  4.39it/s]


Epoch 34, duration: 94.19 s, loss: 0.0349, Train acc: 99.33
test_acc_ood: 0.00
test_acc_ind: 94.37


413it [01:34,  4.38it/s]


Epoch 35, duration: 94.35 s, loss: 0.0349, Train acc: 99.41
test_acc_ood: 0.00
test_acc_ind: 94.28


413it [01:34,  4.38it/s]


Epoch 36, duration: 94.27 s, loss: 0.0348, Train acc: 99.42
test_acc_ood: 0.00
test_acc_ind: 94.23


413it [01:34,  4.38it/s]


Epoch 37, duration: 94.22 s, loss: 0.0349, Train acc: 99.29
test_acc_ood: 0.00
test_acc_ind: 94.28


413it [01:34,  4.38it/s]


Epoch 38, duration: 94.39 s, loss: 0.0358, Train acc: 99.26
test_acc_ood: 0.00
test_acc_ind: 94.42


413it [01:34,  4.37it/s]


Epoch 39, duration: 94.45 s, loss: 0.0356, Train acc: 99.36
test_acc_ood: 0.00
test_acc_ind: 94.28


413it [01:34,  4.38it/s]


Epoch 40, duration: 94.27 s, loss: 0.0355, Train acc: 99.27
test_acc_ood: 0.00
test_acc_ind: 94.23


413it [01:34,  4.38it/s]


Epoch 41, duration: 94.40 s, loss: 0.0359, Train acc: 99.34
test_acc_ood: 0.00
test_acc_ind: 94.37


413it [01:34,  4.38it/s]


Epoch 42, duration: 94.34 s, loss: 0.0351, Train acc: 99.37
test_acc_ood: 0.00
test_acc_ind: 94.05


413it [01:34,  4.38it/s]


Epoch 43, duration: 94.38 s, loss: 0.0344, Train acc: 99.43
test_acc_ood: 0.00
test_acc_ind: 94.28


413it [01:34,  4.38it/s]


Epoch 44, duration: 94.34 s, loss: 0.0361, Train acc: 99.27
test_acc_ood: 0.00
test_acc_ind: 94.23


413it [01:34,  4.38it/s]


Epoch 45, duration: 94.31 s, loss: 0.0350, Train acc: 99.44
test_acc_ood: 0.00
test_acc_ind: 94.32


413it [01:34,  4.38it/s]


Epoch 46, duration: 94.38 s, loss: 0.0347, Train acc: 99.32
test_acc_ood: 0.00
test_acc_ind: 94.55


413it [01:34,  4.37it/s]


Epoch 47, duration: 94.45 s, loss: 0.0350, Train acc: 99.38
test_acc_ood: 0.00
test_acc_ind: 94.42


413it [01:34,  4.38it/s]


Epoch 48, duration: 94.34 s, loss: 0.0342, Train acc: 99.39
test_acc_ood: 0.00
test_acc_ind: 94.37


413it [01:34,  4.37it/s]


Epoch 49, duration: 94.54 s, loss: 0.0348, Train acc: 99.35
test_acc_ood: 0.00
test_acc_ind: 94.42


413it [01:34,  4.38it/s]


Epoch 50, duration: 94.40 s, loss: 0.0358, Train acc: 99.32
test_acc_ood: 0.00
test_acc_ind: 94.32


413it [01:34,  4.38it/s]


Epoch 51, duration: 94.25 s, loss: 0.0349, Train acc: 99.45
test_acc_ood: 0.00
test_acc_ind: 94.19


413it [01:34,  4.39it/s]


Epoch 52, duration: 94.10 s, loss: 0.0341, Train acc: 99.36
test_acc_ood: 0.00
test_acc_ind: 94.32


413it [01:34,  4.38it/s]


Epoch 53, duration: 94.23 s, loss: 0.0351, Train acc: 99.38
test_acc_ood: 0.00
test_acc_ind: 94.05


413it [01:34,  4.37it/s]


Epoch 54, duration: 94.45 s, loss: 0.0346, Train acc: 99.39
test_acc_ood: 0.00
test_acc_ind: 94.19


413it [01:34,  4.38it/s]


Epoch 55, duration: 94.31 s, loss: 0.0355, Train acc: 99.40
test_acc_ood: 0.00
test_acc_ind: 94.19


413it [01:34,  4.38it/s]


Epoch 56, duration: 94.37 s, loss: 0.0341, Train acc: 99.41
test_acc_ood: 0.00
test_acc_ind: 94.32


413it [01:34,  4.38it/s]


Epoch 57, duration: 94.42 s, loss: 0.0346, Train acc: 99.44
test_acc_ood: 0.00
test_acc_ind: 94.32


413it [01:34,  4.38it/s]


Epoch 58, duration: 94.31 s, loss: 0.0350, Train acc: 99.34
test_acc_ood: 0.00
test_acc_ind: 94.37


413it [01:34,  4.38it/s]


Epoch 59, duration: 94.42 s, loss: 0.0352, Train acc: 99.32
test_acc_ood: 0.00
test_acc_ind: 94.51


413it [01:34,  4.38it/s]


Epoch 60, duration: 94.41 s, loss: 0.0360, Train acc: 99.27
test_acc_ood: 0.00
test_acc_ind: 94.55


413it [01:34,  4.38it/s]


Epoch 61, duration: 94.33 s, loss: 0.0354, Train acc: 99.35
test_acc_ood: 0.00
test_acc_ind: 94.19


413it [01:34,  4.38it/s]


Epoch 62, duration: 94.32 s, loss: 0.0348, Train acc: 99.38
test_acc_ood: 0.00
test_acc_ind: 94.42


413it [01:34,  4.38it/s]


Epoch 63, duration: 94.39 s, loss: 0.0358, Train acc: 99.28
test_acc_ood: 0.00
test_acc_ind: 94.28


413it [01:34,  4.38it/s]


Epoch 64, duration: 94.41 s, loss: 0.0349, Train acc: 99.36
test_acc_ood: 0.00
test_acc_ind: 94.32


413it [01:34,  4.38it/s]


Epoch 65, duration: 94.40 s, loss: 0.0351, Train acc: 99.33
test_acc_ood: 0.00
test_acc_ind: 94.46


413it [01:34,  4.39it/s]


Epoch 66, duration: 94.19 s, loss: 0.0351, Train acc: 99.38
test_acc_ood: 0.00
test_acc_ind: 94.46


413it [01:34,  4.38it/s]


Epoch 67, duration: 94.37 s, loss: 0.0349, Train acc: 99.45
test_acc_ood: 0.00
test_acc_ind: 94.19


413it [01:34,  4.38it/s]


Epoch 68, duration: 94.26 s, loss: 0.0350, Train acc: 99.32
test_acc_ood: 0.00
test_acc_ind: 94.32


413it [01:34,  4.38it/s]


Epoch 69, duration: 94.34 s, loss: 0.0359, Train acc: 99.39
test_acc_ood: 0.00
test_acc_ind: 94.28


413it [01:34,  4.39it/s]


Epoch 70, duration: 94.24 s, loss: 0.0344, Train acc: 99.40
test_acc_ood: 0.00
test_acc_ind: 94.32


413it [01:34,  4.38it/s]


Epoch 71, duration: 94.35 s, loss: 0.0338, Train acc: 99.40
test_acc_ood: 0.00
test_acc_ind: 94.05


413it [01:34,  4.38it/s]


Epoch 72, duration: 94.31 s, loss: 0.0357, Train acc: 99.36
test_acc_ood: 0.00
test_acc_ind: 94.23


413it [01:34,  4.38it/s]


Epoch 73, duration: 94.33 s, loss: 0.0348, Train acc: 99.37
test_acc_ood: 0.00
test_acc_ind: 94.23


413it [01:34,  4.38it/s]


Epoch 74, duration: 94.26 s, loss: 0.0362, Train acc: 99.36
test_acc_ood: 0.00
test_acc_ind: 94.42
Finished Training


In [1]:
#model_ft

In [2]:
df_train_acc, df_test_acc = pd.DataFrame(training_accs, columns='Train_accuracies'), pd.DataFrame(training_accs, columns='Test_accuracies')

In [17]:
model = torch.load('epoch_8_best_model.pt')

In [19]:
model_ft = model
model_ft, training_losses, training_accs, test_ood_accs, test_ind_accuracies = train_model(dataset, trainloader, testloader,testloader_ind, model_ft, criterion, optimizer, lrscheduler, n_epochs=74)


123it [00:27,  4.44it/s]wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
124it [00:28,  4.47it/s]wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
256it [00:57,  4.44it/s]wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
257it [00:58,  4.40it/s]wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
389it [01:27,  4.39it/s]wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
413it [01:33,  4.43it/s]


Epoch 1, duration: 93.37 s, loss: 0.0415, Train acc: 99.20


Thread SenderThread:
Traceback (most recent call last):
  File "/home/user1/.local/lib/python3.10/site-packages/wandb/sdk/internal/internal_util.py", line 49, in run
    self._run()
  File "/home/user1/.local/lib/python3.10/site-packages/wandb/sdk/internal/internal_util.py", line 100, in _run
    self._process(record)
  File "/home/user1/.local/lib/python3.10/site-packages/wandb/sdk/internal/internal.py", line 328, in _process
    self._sm.send(record)
  File "/home/user1/.local/lib/python3.10/site-packages/wandb/sdk/internal/sender.py", line 385, in send
    send_handler(record)
  File "/home/user1/.local/lib/python3.10/site-packages/wandb/sdk/internal/sender.py", line 407, in send_request
    send_handler(record)
  File "/home/user1/.local/lib/python3.10/site-packages/wandb/sdk/internal/sender.py", line 1123, in send_request_summary_record
    self._update_summary_record(record.request.summary_record.summary)
  File "/home/user1/.local/lib/python3.10/site-packages/wandb/sdk/internal/

test_acc_ood: 0.00
test_acc_ind: 94.28


BrokenPipeError: [Errno 32] Broken pipe

Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7962c4108400>> (for post_run_cell), with arguments args (<ExecutionResult object at 796392d67d30, execution_count=19 error_before_exec=None error_in_exec=[Errno 32] Broken pipe info=<ExecutionInfo object at 7962a1c4b100, raw_cell="model_ft = model
model_ft, training_losses, traini.." store_history=True silent=False shell_futures=True cell_id=124090c9-18bc-43df-b35d-82c5e04ded9d> result=None>,),kwargs {}:


BrokenPipeError: [Errno 32] Broken pipe