In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.optim.lr_scheduler 
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
import timm
from torchvision.models import resnet18, ResNet18_Weights
import time
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from sklearn.metrics import confusion_matrix, accuracy_score
import wandb
torch.manual_seed(42)
torch.cuda.manual_seed(42)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

  warn(


cuda


In [2]:
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mirfanmasoudi[0m ([33mdillema[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
train_dir = 'imagenet-combine-train/train/'
valid_dir = 'imagenet-combine-train/val/'
save_path = 'model_ckpt/model.pt'

In [4]:
class DILLEMADataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data = ImageFolder(data_dir, transform=transform)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx]
   
    @property
    def classes(self):
        return self.data.classes
    
    @property
    def imgs(self):
        return self.data.imgs
    
    @property
    def class_to_idx(self):
        return self.data.class_to_idx

In [5]:
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [6]:
data_train = DILLEMADataset(
    data_dir=train_dir,
    transform=preprocess
)
data_valid = DILLEMADataset(
    data_dir=valid_dir,
    transform=preprocess
)

In [7]:
print(len(data_train)+len(data_valid))

150000


In [8]:
image, label = data_train[140]
print(label)
image

1


tensor([[[ 0.2624,  0.3138,  0.3652,  ..., -0.4054, -0.4054, -0.4226],
         [ 0.1768,  0.3138,  0.4166,  ..., -0.4054, -0.4226, -0.4397],
         [ 0.2624,  0.3994,  0.4337,  ..., -0.3883, -0.4054, -0.4054],
         ...,
         [ 0.2282,  0.2453,  0.2282,  ...,  1.4269,  1.5810,  1.6838],
         [ 0.2282,  0.2453,  0.2453,  ...,  1.4440,  1.4954,  1.6153],
         [ 0.2282,  0.2453,  0.2453,  ...,  1.5468,  1.4954,  1.5297]],

        [[-0.1275, -0.0924, -0.0574,  ..., -0.7577, -0.7577, -0.7752],
         [-0.2500, -0.0924, -0.0049,  ..., -0.7752, -0.7927, -0.8102],
         [-0.1625,  0.0301,  0.0476,  ..., -0.7577, -0.7752, -0.7752],
         ...,
         [-0.7052, -0.6702, -0.6527,  ...,  1.0630,  1.0980,  1.1331],
         [-0.6877, -0.6527, -0.6352,  ...,  1.2731,  1.2206,  1.2731],
         [-0.6877, -0.6527, -0.6352,  ...,  1.5357,  1.4132,  1.3782]],

        [[-0.4101, -0.3753, -0.3230,  ..., -0.9504, -0.9504, -0.9853],
         [-0.5321, -0.3753, -0.2881,  ..., -0

In [9]:
idx_to_class = {v: k for k, v in data_train.class_to_idx.items()}

In [10]:
train_dataloader = DataLoader(data_train, batch_size=100, num_workers=8, shuffle=True)
valid_dataloader = DataLoader(data_valid, batch_size=100, num_workers=8, shuffle=False)

In [11]:
# Initialize model
weights = ResNet18_Weights.DEFAULT
model = resnet18(weights=weights)

In [12]:
# Unfreeze all layers
for param in model.parameters():
    param.requires_grad = True


In [None]:
# # Unfreeze last layer
# for param in model.fc.parameters():
#     param.requires_grad = True

In [None]:
# class EarlyStopping:
#     def __init__(self, tolerance=2, min_delta=0):

#         self.tolerance = tolerance
#         self.min_delta = min_delta
#         self.counter = 0
#         self.early_stop = False

#     def __call__(self, train_loss, validation_loss):
#         if (validation_loss - train_loss) > self.min_delta:
#             self.counter +=1
#             if self.counter >= self.tolerance:  
#                 self.early_stop = True

In [13]:
def save_checkpoint(model, optimizer, save_path, epoch):
    torch.save({
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'epoch': epoch
    }, save_path)
    
    
def load_checkpoint(model, optimizer, load_path):
    checkpoint = torch.load(load_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    
    return model, optimizer, epoch

In [14]:
# Define loss and optimizer

optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
criterion = nn.CrossEntropyLoss()

In [None]:
#### Train model
train_loss=[]
train_accuracy=[]
valid_loss=[]
valid_accuracy=[]

wandb.init(
    project="DILLEMA", 
    config={"architecture": "ResNet18",
            "dataset": "Imagenet1K",
            "epochs": 90,
      })

num_epochs = 90   #(set no of epochs)
start_time = time.time() #(for showing time)
model.to(device)
# Start loop
for epoch in range(num_epochs): #(loop for every epoch)
    print("Epoch {} running".format(epoch)) #(printing message)
    """ Training Phase """
    model.train()    #(training model)
    running_loss = 0.   #(set loss 0)
    running_corrects = 0 
    # load a batch data of images
    for inputs, labels in tqdm(train_dataloader, desc="Training"):
        inputs, labels = inputs.to(device), labels.to(device)
        # forward inputs and get output
        optimizer.zero_grad()
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)
        # get loss value and update the network weights
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data).item()
    train_epoch_loss = running_loss / len(data_train)
    train_epoch_acc = running_corrects / len(data_train) * 100.
    # Append result
    train_loss.append(train_epoch_loss)
    train_accuracy.append(train_epoch_acc)
    # Print progress
    print('[Train #{}] Loss: {:.4f} Acc: {:.4f}% Time: {:.4f}s'.format(epoch, train_epoch_loss, train_epoch_acc, time.time() - start_time))
    """ Validation Phase """
    model.eval()
    with torch.no_grad():
        running_loss = 0.
        running_corrects = 0
        for inputs, labels in tqdm(valid_dataloader, desc="Validation"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            running_corrects += torch.sum(preds == labels.data).item()
        valid_epoch_loss = running_loss / len(data_valid)
        valid_epoch_acc = running_corrects / len(data_valid) * 100.
        # Append result
        valid_loss.append(valid_epoch_loss)
        valid_accuracy.append(valid_epoch_acc)
        # Print progress
        print('[Valid #{}] Loss: {:.4f} Acc: {:.4f}% Time: {:.4f}s'.format(epoch, valid_epoch_loss, valid_epoch_acc, time.time() - start_time))
    wandb.log({"Train loss": train_epoch_loss, "Valid loss": valid_epoch_loss,
              "Train acc": train_epoch_acc, "Valid acc": valid_epoch_acc, "epoch": epoch})
    save_checkpoint(model, optimizer, save_path, epoch)
    scheduler.step()

cat: /sys/module/amdgpu/initstate: No such file or directory
ERROR:root:Driver not initialized (amdgpu not found in modules)


Epoch 0 running


Training:   0%|          | 0/1200 [00:00<?, ?it/s]

[Train #0] Loss: 0.0447 Acc: 15.9142% Time: 143.7249s


Validation:   0%|          | 0/300 [00:00<?, ?it/s]

[Valid #0] Loss: 0.0345 Acc: 26.7200% Time: 176.2316s
Epoch 1 running


Training:   0%|          | 0/1200 [00:00<?, ?it/s]

[Train #1] Loss: 0.0237 Acc: 44.8925% Time: 324.3369s


Validation:   0%|          | 0/300 [00:00<?, ?it/s]

[Valid #1] Loss: 0.0243 Acc: 43.7700% Time: 356.1232s
Epoch 2 running


Training:   0%|          | 0/1200 [00:00<?, ?it/s]

[Train #2] Loss: 0.0130 Acc: 66.8683% Time: 492.6542s


Validation:   0%|          | 0/300 [00:00<?, ?it/s]

[Valid #2] Loss: 0.0165 Acc: 58.9367% Time: 524.0300s
Epoch 3 running


Training:   0%|          | 0/1200 [00:00<?, ?it/s]

[Train #3] Loss: 0.0065 Acc: 82.7725% Time: 671.0013s


Validation:   0%|          | 0/300 [00:00<?, ?it/s]

[Valid #3] Loss: 0.0132 Acc: 66.2467% Time: 702.5880s
Epoch 4 running


Training:   0%|          | 0/1200 [00:00<?, ?it/s]

[Train #4] Loss: 0.0032 Acc: 91.8558% Time: 838.2601s


Validation:   0%|          | 0/300 [00:00<?, ?it/s]

[Valid #4] Loss: 0.0096 Acc: 75.0067% Time: 870.1852s
Epoch 5 running


Training:   0%|          | 0/1200 [00:00<?, ?it/s]

[Train #5] Loss: 0.0016 Acc: 96.3500% Time: 1005.8755s


Validation:   0%|          | 0/300 [00:00<?, ?it/s]

[Valid #5] Loss: 0.0068 Acc: 82.4567% Time: 1037.5688s
Epoch 6 running


Training:   0%|          | 0/1200 [00:00<?, ?it/s]

[Train #6] Loss: 0.0007 Acc: 98.9058% Time: 1186.2472s


Validation:   0%|          | 0/300 [00:00<?, ?it/s]

[Valid #6] Loss: 0.0034 Acc: 91.8867% Time: 1217.9366s
Epoch 7 running


Training:   0%|          | 0/1200 [00:00<?, ?it/s]

[Train #7] Loss: 0.0002 Acc: 99.8933% Time: 1367.8048s


Validation:   0%|          | 0/300 [00:00<?, ?it/s]

[Valid #7] Loss: 0.0016 Acc: 97.1233% Time: 1399.8439s
Epoch 8 running


Training:   0%|          | 0/1200 [00:00<?, ?it/s]

[Train #8] Loss: 0.0001 Acc: 99.9850% Time: 1548.0314s


Validation:   0%|          | 0/300 [00:00<?, ?it/s]

[Valid #8] Loss: 0.0016 Acc: 97.7067% Time: 1580.0668s
Epoch 9 running


Training:   0%|          | 0/1200 [00:00<?, ?it/s]

[Train #9] Loss: 0.0001 Acc: 99.9925% Time: 1729.8351s


Validation:   0%|          | 0/300 [00:00<?, ?it/s]

[Valid #9] Loss: 0.0018 Acc: 97.8233% Time: 1761.4512s
Epoch 10 running


Training:   0%|          | 0/1200 [00:00<?, ?it/s]

[Train #10] Loss: 0.0001 Acc: 99.9933% Time: 1910.0851s


Validation:   0%|          | 0/300 [00:00<?, ?it/s]

[Valid #10] Loss: 0.0019 Acc: 97.8267% Time: 1942.5050s
Epoch 11 running


Training:   0%|          | 0/1200 [00:00<?, ?it/s]

[Train #11] Loss: 0.0003 Acc: 99.9117% Time: 2090.1973s


Validation:   0%|          | 0/300 [00:00<?, ?it/s]

[Valid #11] Loss: 0.0194 Acc: 54.1267% Time: 2122.1813s
Epoch 12 running


Training:   0%|          | 0/1200 [00:00<?, ?it/s]

[Train #12] Loss: 0.0209 Acc: 52.3033% Time: 2261.3466s


Validation:   0%|          | 0/300 [00:00<?, ?it/s]

[Valid #12] Loss: 0.0165 Acc: 59.8733% Time: 2292.5287s
Epoch 13 running


Training:   0%|          | 0/1200 [00:00<?, ?it/s]

[Train #13] Loss: 0.0052 Acc: 86.5958% Time: 2440.3897s


Validation:   0%|          | 0/300 [00:00<?, ?it/s]

[Valid #13] Loss: 0.0112 Acc: 71.4700% Time: 2471.8030s
Epoch 14 running


Training:   0%|          | 0/1200 [00:00<?, ?it/s]

[Train #14] Loss: 0.0017 Acc: 95.9325% Time: 2609.8796s


Validation:   0%|          | 0/300 [00:00<?, ?it/s]

[Valid #14] Loss: 0.0053 Acc: 86.4133% Time: 2642.3340s
Epoch 15 running


Training:   0%|          | 0/1200 [00:00<?, ?it/s]

[Train #15] Loss: 0.0004 Acc: 99.4725% Time: 2791.8138s


Validation:   0%|          | 0/300 [00:00<?, ?it/s]

[Valid #15] Loss: 0.0017 Acc: 96.4500% Time: 2823.4891s
Epoch 16 running


Training:   0%|          | 0/1200 [00:00<?, ?it/s]

[Train #16] Loss: 0.0001 Acc: 99.9783% Time: 2962.7630s


Validation:   0%|          | 0/300 [00:00<?, ?it/s]

[Valid #16] Loss: 0.0014 Acc: 97.9233% Time: 2994.4506s
Epoch 17 running


Training:   0%|          | 0/1200 [00:00<?, ?it/s]

[Train #17] Loss: 0.0001 Acc: 99.9917% Time: 3133.9185s


Validation:   0%|          | 0/300 [00:00<?, ?it/s]

[Valid #17] Loss: 0.0016 Acc: 98.1133% Time: 3165.6885s
Epoch 18 running


Training:   0%|          | 0/1200 [00:00<?, ?it/s]

[Train #18] Loss: 0.0001 Acc: 99.9875% Time: 3305.5278s


Validation:   0%|          | 0/300 [00:00<?, ?it/s]

[Valid #18] Loss: 0.0019 Acc: 98.0333% Time: 3336.8630s
Epoch 19 running


Training:   0%|          | 0/1200 [00:00<?, ?it/s]

[Train #19] Loss: 0.0002 Acc: 99.9792% Time: 3485.8184s


Validation:   0%|          | 0/300 [00:00<?, ?it/s]

[Valid #19] Loss: 0.0021 Acc: 97.6000% Time: 3517.5316s
Epoch 20 running


Training:   0%|          | 0/1200 [00:00<?, ?it/s]

[Train #20] Loss: 0.0001 Acc: 99.9850% Time: 3657.6650s


Validation:   0%|          | 0/300 [00:00<?, ?it/s]

In [None]:
# save_path = 'model/ResNet18.pth'
# torch.save(model.state_dict(), save_path)

In [None]:
# Plot
plt.figure(figsize=(6,6))
plt.plot(np.arange(1,num_epochs), train_accuracy,'-o')
plt.plot(np.arange(1,num_epochs), valid_accuracy,'-o')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train','Test'])
plt.title('Cross Validation')
plt.show()

In [None]:
y_pred = []
y_true = []
total_correct = 0
total_error = 0
total_instances = 0
model.to(device)
model.eval()
with torch.inference_mode():
    for inputs, labels in tqdm(test_dataloader, desc="Testing"):
            inputs, labels = inputs.to(device), labels.to(device)
            output = model(inputs)

            output = (torch.max(torch.exp(output), 1)[1]).data.cpu().numpy()
            y_pred.extend(output)

            labels = labels.data.cpu().numpy()
            y_true.extend(labels)
            
            correct_predictions = sum(output == labels).item()
            error_predictions = sum(output != labels).item()
            total_correct += correct_predictions
            total_error += error_predictions
            total_datapoints += len(inputs)       

In [None]:
print('accuracy: ', accuracy_score(y_true, y_pred))
print('total correct: ', total_correct)
print('total error: ', total_error)
print('total datapoints: ', total_datapoints)

In [None]:
cf_matrix = confusion_matrix(y_true, y_pred)

label_name = []
for label in ImageFolder(data_dir).class_to_idx.items():
    label_n = mapping['class'][label[0]]
    label_name.append(label_n)

In [None]:
df_cm = pd.DataFrame(cf_matrix / np.sum(cf_matrix, axis=1)[:, None], index = [i for i in label_name],
                     columns = [i for i in label_name])

df_cm.to_excel("cm_original.xlsx",
             sheet_name='Sheet_name_1') 