In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
import timm
from torchvision.models import resnet18, ResNet18_Weights

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [2]:
df = pd.read_csv('map_clsloc.txt', sep=" ", names=["nwid", "id", "class"]).set_index('nwid')
mapping = df.to_dict()

In [3]:
data_dir = 'imagenet-gen/'

In [4]:
class DILLEMADataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data = ImageFolder(data_dir, transform=transform)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx]
   
    @property
    def classes(self):
        return self.data.classes
    
    @property
    def imgs(self):
        return self.data.imgs
    
    @property
    def class_to_idx(self):
        return self.data.class_to_idx

In [5]:
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [6]:
dataset = DILLEMADataset(
    data_dir=data_dir,
    transform=preprocess
)

In [7]:
len(dataset)

125000

In [8]:
image, label = dataset[50]
print(label, image.shape)
image

0 torch.Size([3, 224, 224])


tensor([[[-0.5938,  0.0912, -0.0801,  ..., -0.8678, -0.5767, -0.5596],
         [-0.4397, -0.0972, -0.2684,  ...,  0.0569, -0.3198, -0.2856],
         [-0.2342, -0.3369, -0.0972,  ..., -0.3541, -0.4911, -0.5253],
         ...,
         [-0.9877, -1.3644, -1.6213,  ..., -0.4226, -0.7308, -0.7308],
         [-0.6281, -0.5767, -1.1932,  ...,  0.5022,  0.0398, -0.0972],
         [-1.1760, -0.6281, -0.9363,  ..., -0.2513, -0.0801,  0.0569]],

        [[-0.2150,  0.4678,  0.2927,  ..., -0.4601, -0.1450, -0.1099],
         [-0.0749,  0.2577,  0.0826,  ...,  0.5203,  0.1352,  0.2052],
         [ 0.1527,  0.0126,  0.2752,  ...,  0.1001, -0.0049, -0.0224],
         ...,
         [-0.8277, -1.1954, -1.4055,  ..., -0.1099, -0.4776, -0.5126],
         [-0.4601, -0.3901, -0.9853,  ...,  0.7654,  0.2577,  0.0651],
         [-1.0378, -0.4601, -0.7052,  ..., -0.0224,  0.1001,  0.2052]],

        [[-0.4973,  0.1651, -0.0092,  ..., -0.6541, -0.4275, -0.4450],
         [-0.3578, -0.0615, -0.2184,  ...,  0

In [9]:
idx_to_class = {v: k for k, v in dataset.class_to_idx.items()}

In [10]:
dataloader = DataLoader(dataset, batch_size=100, num_workers=8, shuffle=False)

In [11]:
for images, labels in dataloader:
    break
print(labels)

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0])


In [12]:
# Initialize model
weights = ResNet18_Weights.DEFAULT
model = resnet18(weights=weights)

In [13]:
def load_model(model, load_path):
    checkpoint = torch.load(load_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    # optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    # epoch = checkpoint['epoch']
    
    return model

In [14]:
model = load_model(model, 'model_ckpt_4/model_89.pt')

In [15]:
y_pred = []
y_true = []
total_correct = 0
total_error = 0
total_datapoints = 0
model.to(device)
model.eval()
with torch.inference_mode():
    for inputs, labels in tqdm(dataloader, desc="Testing"):
            inputs, labels = inputs.to(device), labels.to(device)
            output = model(inputs)

            output = (torch.max(torch.exp(output), 1)[1]).data.cpu().numpy()
            y_pred.extend(output)

            labels = labels.data.cpu().numpy()
            y_true.extend(labels)
            
            correct_predictions = sum(output == labels).item()
            error_predictions = sum(output != labels).item()
            total_correct += correct_predictions
            total_error += error_predictions
            total_datapoints += len(inputs)       

Testing:   0%|          | 0/1250 [00:00<?, ?it/s]

In [16]:
print('accuracy: ', accuracy_score(y_true, y_pred))
print('total correct: ', total_correct)
print('total error: ', total_error)
print('total datapoints: ', total_datapoints)

accuracy:  0.998016
total correct:  124752
total error:  248
total datapoints:  125000


In [17]:
columns =['y_true', 'y_pred']
lst = []

for i in range(len(dataset)):
    true = y_true[i]
    pred = y_pred[i]
    lst.append([true, pred])
df_true_pred = pd.DataFrame(lst, columns=columns)
df_true_pred.to_excel("retrain_test_true_pred_aug_1.xlsx",
             sheet_name='Sheet_name_1') 
df_true_pred.head()

Unnamed: 0,y_true,y_pred
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0


In [18]:
df_true_pred = pd.read_excel("retrain_test_true_pred_aug_1.xlsx", sheet_name='Sheet_name_1', index_col=0) 
y_true = df_true_pred.iloc[:,0].tolist()
y_pred = df_true_pred.iloc[:,1].tolist()

In [19]:
print('accuracy: ', accuracy_score(y_true, y_pred))
print('precision: ', precision_score(y_true, y_pred, average="macro"))
print('recall: ', recall_score(y_true, y_pred, average="macro"))
print('f1: ', f1_score(y_true, y_pred, average="macro"))

accuracy:  0.998016
precision:  0.9980369352313541
recall:  0.9980160000000001
f1:  0.9980159888846712


In [21]:
cf_matrix = confusion_matrix(y_true, y_pred)

label_name = []
for label in ImageFolder(data_dir).class_to_idx.items():
    label_n = mapping['class'][label[0]]
    label_name.append(label_n)

In [22]:
df_cm = pd.DataFrame(cf_matrix, index = [i for i in label_name],
                     columns = [i for i in label_name])

df_cm.to_excel("cm_original_1.xlsx",
             sheet_name='Sheet_name_1') 

In [23]:
import pandas as pd

excel_data_df = pd.read_excel("cm_original_1.xlsx", sheet_name='Sheet_name_1') 

In [24]:
df_cm_percentage = pd.DataFrame(cf_matrix / np.sum(cf_matrix, axis=1)[:, None]*100, index = [i for i in label_name],
                     columns = [i for i in label_name])

df_cm_percentage.to_excel("cm_original_percentage_1.xlsx",
             sheet_name='Sheet_name_1') 

In [25]:
excel_data_df.head()

Unnamed: 0.1,Unnamed: 0,tench,goldfish,great_white_shark,tiger_shark,hammerhead,electric_ray,stingray,cock,hen,...,buckeye,coral_fungus,agaric,gyromitra,stinkhorn,earthstar,hen-of-the-woods,bolete,ear,toilet_tissue
0,tench,22,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,goldfish,0,22,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,great_white_shark,0,0,22,2,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,tiger_shark,0,0,2,19,2,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,hammerhead,0,0,1,2,21,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [26]:
columns =['label', 'name', 'pred', 'T/F']
lst = []

for i in range(len(dataset)):
    label = dataset.imgs[i][1]
    name = dataset.imgs[i][0]
    prediction = y_pred[i]
    if label == prediction:
        value = 'True'
    else :
        value = 'False'
    lst.append([label, name, prediction, value])

df_list = pd.DataFrame(lst, columns=columns)
df_list.to_excel("list_test_ori_1.xlsx",
             sheet_name='Sheet_name_1') 
df_list.head()

Unnamed: 0,label,name,pred,T/F
0,0,lim25-imagenet-image/n01440764/n01440764_10026...,0,True
1,0,lim25-imagenet-image/n01440764/n01440764_10027...,0,True
2,0,lim25-imagenet-image/n01440764/n01440764_10029...,0,True
3,0,lim25-imagenet-image/n01440764/n01440764_10040...,0,True
4,0,lim25-imagenet-image/n01440764/n01440764_10042...,389,False


### Testing Scratch

In [79]:
select_df = excel_data_df.loc[7:7, label_name]
select_df.head()

Unnamed: 0,tench,goldfish,great_white_shark,tiger_shark,hammerhead,electric_ray,stingray,cock,hen,ostrich,...,buckeye,coral_fungus,agaric,gyromitra,stinkhorn,earthstar,hen-of-the-woods,bolete,ear,toilet_tissue
7,0,0,0,0,0,0,0,23,1,0,...,0,0,0,0,0,0,0,0,0,0


In [50]:
label_name = []
for label in ImageFolder(data_dir).class_to_idx.items():
    label_n = mapping['class'][label[0]]
    label_name.append(label_n)

In [80]:
filter = (excel_data_df.loc[7:7,:] != 0).any()
df_sel = excel_data_df.loc[7:7, filter].rename(columns={'Unnamed: 0': ' '})
df_sel

Unnamed: 0,Unnamed: 1,cock,hen,Irish_terrier
7,cock,23,1,1


In [83]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [84]:
for i in range(5):
    filter = (excel_data_df.loc[i:i,:] != 0).any()
    df_sel = excel_data_df.loc[i:i, filter].rename(columns={'Unnamed: 0': ' '})
    df_sel

Unnamed: 0,Unnamed: 1,tench,barracouta,coho
0,tench,22,2,1


Unnamed: 0,Unnamed: 1,goldfish,cicada,trifle,volcano
1,goldfish,22,1,1,1


Unnamed: 0,Unnamed: 1,great_white_shark,tiger_shark,grey_whale
2,great_white_shark,22,2,1


Unnamed: 0,Unnamed: 1,great_white_shark,tiger_shark,hammerhead,scuba_diver
3,tiger_shark,2,19,2,2


Unnamed: 0,Unnamed: 1,great_white_shark,tiger_shark,hammerhead,sturgeon
4,hammerhead,1,2,21,1
