In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
import timm
from torchvision.models import resnet50, ResNet50_Weights

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [2]:
df = pd.read_csv('map_clsloc.txt', sep=" ", names=["nwid", "id", "class"]).set_index('nwid')
mapping = df.to_dict()

In [3]:
data_dir = 'lim25-imagenet-image/'

In [4]:
class DILLEMADataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data = ImageFolder(data_dir, transform=transform)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx]
   
    @property
    def classes(self):
        return self.data.classes
    
    @property
    def imgs(self):
        return self.data.imgs
    
    @property
    def class_to_idx(self):
        return self.data.class_to_idx

In [5]:
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [6]:
dataset = DILLEMADataset(
    data_dir=data_dir,
    transform=preprocess
)

In [7]:
len(dataset)

25000

In [8]:
image, label = dataset[50]
print(label, image.shape)
image

2 torch.Size([3, 224, 224])


tensor([[[-0.0629, -0.0801, -0.1314,  ...,  0.6906,  0.4508,  0.3823],
         [-0.1314, -0.1486, -0.2513,  ...,  0.9474,  0.5364,  0.3138],
         [-0.3027, -0.3369, -0.4397,  ...,  0.5193,  0.3823,  0.3823],
         ...,
         [ 1.7352,  1.7523,  1.7523,  ...,  0.3481,  0.0398,  0.0741],
         [ 1.7180,  1.7180,  1.7180,  ...,  0.4508, -0.0116,  0.0227],
         [ 1.7009,  1.7180,  1.7009,  ...,  0.5364, -0.0972, -0.2342]],

        [[ 0.5378,  0.5203,  0.4678,  ...,  1.0105,  0.8179,  0.7829],
         [ 0.4853,  0.4678,  0.3627,  ...,  1.2906,  0.8880,  0.6954],
         [ 0.3102,  0.2752,  0.1702,  ...,  0.8354,  0.7479,  0.7829],
         ...,
         [ 1.8859,  1.9034,  1.9034,  ...,  0.5028,  0.1877,  0.2227],
         [ 1.8683,  1.8683,  1.8683,  ...,  0.6078,  0.1352,  0.1702],
         [ 1.8508,  1.8683,  1.8508,  ...,  0.6954,  0.0476, -0.0749]],

        [[ 0.7751,  0.7576,  0.7054,  ...,  1.2631,  1.0017,  0.9494],
         [ 0.7228,  0.7054,  0.6008,  ...,  1

In [9]:
idx_to_class = {v: k for k, v in dataset.class_to_idx.items()}

In [10]:
dataloader = DataLoader(dataset, batch_size=100, num_workers=8, shuffle=False)

In [11]:
for images, labels in dataloader:
    break
print(labels)

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3])


In [12]:
# Initialize model
weights = ResNet50_Weights.DEFAULT
model = resnet50(weights=weights)

In [13]:
y_pred = []
y_true = []
total_correct = 0
total_error = 0
total_datapoints = 0
model.to(device)
model.eval()
with torch.inference_mode():
    for inputs, labels in tqdm(dataloader, desc="Testing"):
            inputs, labels = inputs.to(device), labels.to(device)
            output = model(inputs)

            output = (torch.max(torch.exp(output), 1)[1]).data.cpu().numpy()
            y_pred.extend(output)

            labels = labels.data.cpu().numpy()
            y_true.extend(labels)
            
            correct_predictions = sum(output == labels).item()
            error_predictions = sum(output != labels).item()
            total_correct += correct_predictions
            total_error += error_predictions
            total_datapoints += len(inputs)       

Testing:   0%|          | 0/250 [00:00<?, ?it/s]

In [14]:
print('accuracy: ', accuracy_score(y_true, y_pred))
print('total correct: ', total_correct)
print('total error: ', total_error)
print('total datapoints: ', total_datapoints)

accuracy:  0.89796
total correct:  22449
total error:  2551
total datapoints:  25000


In [15]:
columns =['y_true', 'y_pred']
lst = []

for i in range(len(dataset)):
    true = y_true[i]
    pred = y_pred[i]
    lst.append([true, pred])
df_true_pred = pd.DataFrame(lst, columns=columns)
df_true_pred.to_excel("ResNet50_test_true_pred_ori_1.xlsx",
             sheet_name='Sheet_name_1') 
df_true_pred.head()

Unnamed: 0,y_true,y_pred
0,0,0
1,0,0
2,0,0
3,0,0
4,0,389


In [2]:
df_true_pred = pd.read_excel("ResNet50_test_true_pred_ori_1.xlsx", sheet_name='Sheet_name_1', index_col=0) 
y_true = df_true_pred.iloc[:,0].tolist()
y_pred = df_true_pred.iloc[:,1].tolist()

In [3]:
print('accuracy: ', accuracy_score(y_true, y_pred))
print('precision: ', precision_score(y_true, y_pred, average="macro"))
print('recall: ', recall_score(y_true, y_pred, average="macro"))
print('f1: ', f1_score(y_true, y_pred, average="macro"))

accuracy:  0.89796
precision:  0.9049736900580836
recall:  0.8979600000000001
f1:  0.8970495428765787


In [19]:
cf_matrix = confusion_matrix(y_true, y_pred)

label_name = []
for label in dataset.class_to_idx.items():
    label_n = mapping['class'][label[0]]
    label_name.append(label_n)

In [20]:
df_cm = pd.DataFrame(cf_matrix, index = [i for i in label_name],
                     columns = [i for i in label_name])

df_cm.to_excel("ResNet50_cm_original_1.xlsx",
             sheet_name='Sheet_name_1') 

In [21]:
df_cm_percentage = pd.DataFrame(cf_matrix / np.sum(cf_matrix, axis=1)[:, None]*100, index = [i for i in label_name],
                     columns = [i for i in label_name])

df_cm_percentage.to_excel("ResNet50_cm_original_percentage_1.xlsx",
             sheet_name='Sheet_name_1') 

In [22]:
import pandas as pd

excel_data_df = pd.read_excel("ResNet50_cm_original_1.xlsx", sheet_name='Sheet_name_1') 

In [23]:
excel_data_df.head()

Unnamed: 0.1,Unnamed: 0,tench,goldfish,great_white_shark,tiger_shark,hammerhead,electric_ray,stingray,cock,hen,...,buckeye,coral_fungus,agaric,gyromitra,stinkhorn,earthstar,hen-of-the-woods,bolete,ear,toilet_tissue
0,tench,23,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,goldfish,0,25,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,great_white_shark,0,0,24,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,tiger_shark,0,0,0,25,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,hammerhead,0,0,0,0,25,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
columns =['label', 'name', 'pred', 'T/F']
lst = []

for i in range(len(dataset)):
    label = dataset.imgs[i][1]
    name = dataset.imgs[i][0]
    prediction = y_pred[i]
    if label == prediction:
        value = 'True'
    else :
        value = 'False'
    lst.append([label, name, prediction, value])

df_list = pd.DataFrame(lst, columns=columns)
df_list.to_excel("ResNet50_list_test_ori_1.xlsx",
             sheet_name='Sheet_name_1') 
df_list.head()

Unnamed: 0,label,name,pred,T/F
0,0,lim25-imagenet-image/n01440764/n01440764_10026...,0,True
1,0,lim25-imagenet-image/n01440764/n01440764_10027...,0,True
2,0,lim25-imagenet-image/n01440764/n01440764_10029...,0,True
3,0,lim25-imagenet-image/n01440764/n01440764_10040...,0,True
4,0,lim25-imagenet-image/n01440764/n01440764_10042...,389,False
