In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
import timm
from torchvision.models import resnet18, ResNet18_Weights

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [2]:
df = pd.read_csv('map_clsloc.txt', sep=" ", names=["nwid", "id", "class"]).set_index('nwid')
mapping = df.to_dict()

In [3]:
data_dir = 'imagenet-gen/'

In [4]:
class DILLEMADataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data = ImageFolder(data_dir, transform=transform)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx]
   
    @property
    def classes(self):
        return self.data.classes
    
    @property
    def imgs(self):
        return self.data.imgs
    
    @property
    def class_to_idx(self):
        return self.data.class_to_idx

In [5]:
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [6]:
dataset = DILLEMADataset(
    data_dir=data_dir,
    transform=preprocess
)

In [7]:
len(dataset)

125000

In [8]:
image, label = dataset[50]
print(label)
image

0


tensor([[[-0.5938,  0.0912, -0.0801,  ..., -0.8678, -0.5767, -0.5596],
         [-0.4397, -0.0972, -0.2684,  ...,  0.0569, -0.3198, -0.2856],
         [-0.2342, -0.3369, -0.0972,  ..., -0.3541, -0.4911, -0.5253],
         ...,
         [-0.9877, -1.3644, -1.6213,  ..., -0.4226, -0.7308, -0.7308],
         [-0.6281, -0.5767, -1.1932,  ...,  0.5022,  0.0398, -0.0972],
         [-1.1760, -0.6281, -0.9363,  ..., -0.2513, -0.0801,  0.0569]],

        [[-0.2150,  0.4678,  0.2927,  ..., -0.4601, -0.1450, -0.1099],
         [-0.0749,  0.2577,  0.0826,  ...,  0.5203,  0.1352,  0.2052],
         [ 0.1527,  0.0126,  0.2752,  ...,  0.1001, -0.0049, -0.0224],
         ...,
         [-0.8277, -1.1954, -1.4055,  ..., -0.1099, -0.4776, -0.5126],
         [-0.4601, -0.3901, -0.9853,  ...,  0.7654,  0.2577,  0.0651],
         [-1.0378, -0.4601, -0.7052,  ..., -0.0224,  0.1001,  0.2052]],

        [[-0.4973,  0.1651, -0.0092,  ..., -0.6541, -0.4275, -0.4450],
         [-0.3578, -0.0615, -0.2184,  ...,  0

In [9]:
idx_to_class = {v: k for k, v in dataset.class_to_idx.items()}
# print(idx_to_class)

In [10]:
dataloader = DataLoader(dataset, batch_size=100, num_workers=8, shuffle=False)

In [11]:
for images, labels in dataloader:
    break

In [12]:
# print(images.shape, labels.shape)
print(labels)

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0])


In [13]:
# Initialize model
weights = ResNet18_Weights.DEFAULT
model = resnet18(weights=weights)

model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [14]:
y_pred = []
y_true = []
total_correct = 0
total_error = 0
total_datapoints = 0
model.to(device)
model.eval()
with torch.inference_mode():
    for inputs, labels in tqdm(dataloader, desc="Testing"):
            inputs, labels = inputs.to(device), labels.to(device)
            output = model(inputs)

            output = (torch.max(torch.exp(output), 1)[1]).data.cpu().numpy()
            y_pred.extend(output)

            labels = labels.data.cpu().numpy()
            y_true.extend(labels)
            
            correct_predictions = sum(output == labels).item()
            error_predictions = sum(output != labels).item()
            total_correct += correct_predictions
            total_error += error_predictions
            total_datapoints += len(inputs)       

Testing:   0%|          | 0/1250 [00:00<?, ?it/s]

In [15]:
print('accuracy: ', accuracy_score(y_true, y_pred))
print('total correct: ', total_correct)
print('total error: ', total_error)
print('total datapoints: ', total_datapoints)

accuracy:  0.467048
total correct:  58381
total error:  66619
total datapoints:  125000


In [16]:
columns =['y_true', 'y_pred']
lst = []

for i in range(len(dataset)):
    true = y_true[i]
    pred = y_pred[i]
    lst.append([true, pred])
df_true_pred = pd.DataFrame(lst, columns=columns)
df_true_pred.to_excel("test_true_pred_aug_1.xlsx",
             sheet_name='Sheet_name_1') 
df_true_pred.head()

Unnamed: 0,y_true,y_pred
0,0,394
1,0,0
2,0,0
3,0,149
4,0,0


In [2]:
df_true_pred = pd.read_excel("test_true_pred_aug_1.xlsx", sheet_name='Sheet_name_1', index_col=0) 
y_true = df_true_pred.iloc[:,0].tolist()
y_pred = df_true_pred.iloc[:,1].tolist()
y_true

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,


In [3]:
print('accuracy: ', accuracy_score(y_true, y_pred))
print('precision: ', precision_score(y_true, y_pred, average="macro"))
print('recall: ', recall_score(y_true, y_pred, average="macro"))
print('f1: ', f1_score(y_true, y_pred, average="macro"))

accuracy:  0.467048
precision:  0.5044638026404964
recall:  0.467048
f1:  0.4600119098447453


In [17]:
columns =['label', 'name', 'pred', 'T/F']
lst = []

for i in range(len(dataset)):
    label = dataset.imgs[i][1]
    name = dataset.imgs[i][0]
    prediction = y_pred[i]
    if label == prediction:
        value = 'True'
    else :
        value = 'False'
    lst.append([label, name, prediction, value])

df_list = pd.DataFrame(lst, columns=columns)
df_list.to_excel("list_test_aug_1.xlsx",
             sheet_name='Sheet_name_1') 
df_list.head()

Unnamed: 0,label,name,pred,T/F
0,0,imagenet-gen/n01440764/n01440764_10026_0.JPEG,394,False
1,0,imagenet-gen/n01440764/n01440764_10026_1.JPEG,0,True
2,0,imagenet-gen/n01440764/n01440764_10026_2.JPEG,0,True
3,0,imagenet-gen/n01440764/n01440764_10026_3.JPEG,149,False
4,0,imagenet-gen/n01440764/n01440764_10026_4.JPEG,0,True


In [18]:
df_true_pred = pd.read_excel("test_true_pred_aug_1.xlsx", sheet_name='Sheet_name_1', index_col=0) 
df_true_pred.head()

Unnamed: 0,y_true,y_pred
0,0,394
1,0,0
2,0,0
3,0,149
4,0,0


In [21]:
y_true = df_true_pred.iloc[:,0].tolist()
y_pred = df_true_pred.iloc[:,1].tolist()
# y_pred

In [22]:
print('Augmented_accuracy_score :', accuracy_score(y_true, y_pred))

Augmented_accuracy_score : 0.467048


In [23]:
cf_matrix = confusion_matrix(y_true, y_pred)

label_name = []
for label in dataset.class_to_idx.items():
    label_n = mapping['class'][label[0]]
    label_name.append(label_n)

In [24]:
df_cm = pd.DataFrame(cf_matrix, index = [i for i in label_name],
                     columns = [i for i in label_name])

df_cm.to_excel("cm_aug_1.xlsx",
             sheet_name='Sheet_name_1') 

In [25]:
df_cm_percentage = pd.DataFrame(cf_matrix / np.sum(cf_matrix, axis=1)[:, None]*100, index = [i for i in label_name],
                     columns = [i for i in label_name])

df_cm_percentage.to_excel("cm_aug_percentage_1.xlsx",
             sheet_name='Sheet_name_1') 

In [26]:
df_cm_percentage

Unnamed: 0,tench,goldfish,great_white_shark,tiger_shark,hammerhead,electric_ray,stingray,cock,hen,ostrich,...,buckeye,coral_fungus,agaric,gyromitra,stinkhorn,earthstar,hen-of-the-woods,bolete,ear,toilet_tissue
tench,50.4,1.6,0.0,1.6,1.6,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
goldfish,0.8,46.4,0.0,0.0,1.6,0.0,1.6,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
great_white_shark,0.0,1.6,40.0,20.0,9.6,0.0,8.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
tiger_shark,0.0,1.6,12.8,38.4,21.6,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
hammerhead,0.0,0.0,3.2,7.2,74.4,0.0,0.8,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
earthstar,0.0,0.0,0.0,0.0,0.0,0.8,0.0,0.0,0.8,0.0,...,2.4,0.0,0.0,0.0,2.4,21.6,1.6,0.8,0.8,0.0
hen-of-the-woods,0.0,0.0,0.0,0.0,0.0,0.0,1.6,0.0,0.0,0.0,...,0.0,0.8,0.0,4.8,0.0,0.8,30.4,0.0,0.0,0.0
bolete,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.6,0.0,0.0,0.0,1.6,2.4,0.8,35.2,0.0,0.0
ear,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,55.2,0.0


In [24]:
import pandas as pd

excel_data_df = pd.read_excel("cm_aug_1.xlsx", sheet_name='Sheet_name_1') 

In [27]:
excel_data_df.head()

Unnamed: 0.1,Unnamed: 0,tench,goldfish,great_white_shark,tiger_shark,hammerhead,electric_ray,stingray,cock,hen,...,buckeye,coral_fungus,agaric,gyromitra,stinkhorn,earthstar,hen-of-the-woods,bolete,ear,toilet_tissue
0,tench,63,2,0,2,2,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,goldfish,1,58,0,0,2,0,2,0,0,...,0,0,0,0,0,0,0,0,0,0
2,great_white_shark,0,2,50,25,12,0,10,0,0,...,0,0,0,0,0,0,0,0,0,0
3,tiger_shark,0,2,16,48,27,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,hammerhead,0,0,4,9,93,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [40]:
for i in range(5):
    label_n = excel_data_df.iloc[i,0]
    true_number = excel_data_df.iloc[i,i+1]
    error_number = 125 - true_number
    df_TF = pd.DataFrame([(label_n, true_number, error_number)],
               columns =['Label', 'True', 'False'])
    df_TF

Unnamed: 0,Label,True,False
0,tench,63,62


Unnamed: 0,Label,True,False
0,goldfish,58,67


Unnamed: 0,Label,True,False
0,great_white_shark,50,75


Unnamed: 0,Label,True,False
0,tiger_shark,48,77


Unnamed: 0,Label,True,False
0,hammerhead,93,32


In [41]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [42]:
for i in range(5):
    filter = (excel_data_df.loc[i:i,:] != 0).any()
    df_sel = excel_data_df.loc[i:i, filter].rename(columns={'Unnamed: 0': ' '})
    df_sel

Unnamed: 0,Unnamed: 1,tench,goldfish,tiger_shark,hammerhead,tree_frog,killer_whale,dugong,whippet,Weimaraner,...,gar,bathing_cap,jean,jigsaw_puzzle,piggy_bank,swing,tub,umbrella,wig,ice_lolly
0,tench,63,2,2,2,3,1,2,1,1,...,9,1,1,1,1,1,1,1,1,1


Unnamed: 0,Unnamed: 1,tench,goldfish,hammerhead,stingray,axolotl,green_lizard,sea_snake,platypus,sea_slug,...,jigsaw_puzzle,piggy_bank,pinwheel,shower_cap,French_loaf,bell_pepper,cardoon,bubble,coral_reef,geyser
1,goldfish,1,58,2,2,1,2,1,1,1,...,2,1,1,1,1,1,2,2,14,1


Unnamed: 0,Unnamed: 1,goldfish,great_white_shark,tiger_shark,hammerhead,stingray,banded_gecko,jellyfish,pelican,killer_whale,...,ice_bear,sturgeon,baseball,bathing_cap,fireboat,maillot.1,snorkel,bubble,sandbar,scuba_diver
2,great_white_shark,2,50,25,12,10,1,1,2,3,...,1,1,3,2,1,1,3,1,1,1


Unnamed: 0,Unnamed: 1,goldfish,great_white_shark,tiger_shark,hammerhead,jay,killer_whale,dugong,sea_lion,flagpole,quill,snorkel,bubble,coral_reef,scuba_diver
3,tiger_shark,2,16,48,27,1,16,1,1,1,1,1,1,6,3


Unnamed: 0,Unnamed: 1,great_white_shark,tiger_shark,hammerhead,stingray,kite,crayfish,albatross,dugong,eel,rock_beauty,gar,bathing_cap,hand_blower,jigsaw_puzzle,pencil_box,quill,snorkel,warplane
4,hammerhead,4,9,93,1,1,1,1,1,5,1,1,1,1,1,1,1,1,1
