In [17]:
import torch
import torch.nn as nn
from torchvision import models, transforms, datasets
from PIL import Image
from torchvision.datasets import ImageFolder
from torchvision.transforms import Compose, ToTensor, Resize, Normalize, Grayscale
from torch.utils.data import DataLoader
import os
from sklearn.metrics import classification_report
import torch.nn.functional as F
import resnet
import pandas as pd

In [21]:
test_path = "./Data/cmnist/random/test/"

In [19]:
device = torch.device(f'cuda:0' if torch.cuda.is_available() else 'cpu')

In [130]:
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),

    transforms.RandomApply([transforms.RandomCrop(200)], p=0.5),    
    transforms.RandomHorizontalFlip(p=0.5),                        
    transforms.RandomRotation(degrees=15),                         
    transforms.ColorJitter(brightness=0.2, contrast=0.2, 
                            saturation=0.2, hue=0.1),               
    transforms.RandomApply([transforms.Lambda(
        lambda img: img + torch.randn_like(img) * 0.05)], p=0.5),  

    transforms.ToTensor()                                     
    # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])               
])

In [99]:
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert to tensor
    transforms.Normalize((0.5,), (0.5,))  # Normalize grayscale images (mean, std)
])
test_dataset = datasets.MNIST(root='./Data', train=False, transform=transform, download=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [131]:
train_dataset = ImageFolder(test_path, transform=transform_train)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, num_workers=4, shuffle=True)

In [22]:
dataset = ImageFolder(test_path, transform=Compose([Resize((224,224)), 
                                                                    ToTensor(),    
                                                                    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]))

In [23]:
data_loader = torch.utils.data.DataLoader(dataset, batch_size=128, num_workers=4, shuffle=True)

In [24]:
!ls output/models

best_resnet_18_cmnist.pth	 best_resnet_34_cmnist.pth
best_resnet_18_cmnist_fixed.pth


In [100]:
model_18 = resnet.ResNet18()
model_18.load_state_dict(torch.load('output_gray/models/best_resnet_18_mnist.pth'))
model_18.eval()

  model_18.load_state_dict(torch.load('output_gray/models/best_resnet_18_mnist.pth'))


ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=

In [101]:
all_preds = []
all_labels = []
test = []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        model_18.to(device)
        outputs = model_18(images)

        softmax_scores = F.softmax(outputs, dim=1)
        
        max_values, preds = torch.max(softmax_scores, dim=1)
        
        preds = torch.argmax(outputs, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
        test.extend(max_values.cpu().numpy())

In [105]:
df = pd.DataFrame({'y_true':all_labels, 'y_pred':all_preds, 'score':test})

In [107]:
filtered_df = df[df.y_true == df.y_pred]

In [114]:
filtered_df.groupby('y_true')['score'].agg(['mean', 'std']).reset_index()

Unnamed: 0,y_true,mean,std
0,0,0.995369,0.030924
1,1,0.998843,0.016785
2,2,0.985709,0.068013
3,3,0.998087,0.021226
4,4,0.978837,0.074607
5,5,0.997109,0.027488
6,6,0.989252,0.051817
7,7,0.99193,0.045349
8,8,0.974121,0.089166
9,9,0.982569,0.072547


In [103]:
print(classification_report(all_preds, all_labels, target_names=dataset.classes))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99       980
           1       1.00      0.96      0.98      1181
           2       0.98      1.00      0.99      1019
           3       1.00      0.98      0.99      1029
           4       0.97      0.99      0.98       967
           5       0.99      0.95      0.97       928
           6       0.97      0.99      0.98       939
           7       0.99      0.99      0.99      1027
           8       0.97      1.00      0.98       943
           9       0.97      0.99      0.98       987

    accuracy                           0.98     10000
   macro avg       0.98      0.98      0.98     10000
weighted avg       0.98      0.98      0.98     10000



In [25]:
model = models.resnet18(pretrained=False)
model.fc = nn.Linear(model.fc.in_features, 10)
model.load_state_dict(torch.load('output/models/best_resnet_18_cmnist_fixed.pth'))
model.eval()

  model.load_state_dict(torch.load('output/models/best_resnet_18_cmnist_fixed.pth'))


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [26]:
results = []
y_true = []
test = []
with torch.no_grad():
    for images, labels in data_loader:
        model.to(device)
        images = images.to(device)
        outputs = model(images)

        softmax_scores = F.softmax(outputs, dim=1)
        
        max_values, preds = torch.max(softmax_scores, dim=1)
        
        preds = torch.argmax(outputs, dim=1)
        results.extend(preds.cpu().numpy())
        y_true.extend(labels.cpu().numpy())
        test.extend(max_values.cpu().numpy())


In [27]:
print(classification_report(y_true, results, target_names=dataset.classes))

              precision    recall  f1-score   support

           0       0.99      0.39      0.55       875
           1       0.44      0.34      0.38      1170
           2       0.32      0.76      0.45      1095
           3       0.46      0.47      0.47      1035
           4       0.07      0.04      0.05      1085
           5       0.32      0.69      0.44       895
           6       0.79      0.18      0.29       890
           7       0.16      0.15      0.15      1025
           8       0.05      0.01      0.01       960
           9       0.21      0.26      0.23       970

    accuracy                           0.33     10000
   macro avg       0.38      0.33      0.30     10000
weighted avg       0.37      0.33      0.30     10000



In [28]:
df = pd.DataFrame({'y_true':y_true, 'y_pred':results, 'score':test})
filtered_df = df[df.y_true == df.y_pred]
filtered_df.groupby('y_true')['score'].agg(['mean', 'std']).reset_index()

Unnamed: 0,y_true,mean,std
0,0,0.764925,0.189216
1,1,0.926951,0.133001
2,2,0.941718,0.123314
3,3,0.982778,0.067785
4,4,0.587959,0.148277
5,5,0.967887,0.095677
6,6,0.731159,0.172446
7,7,0.761096,0.17421
8,8,0.643452,0.080573
9,9,0.644023,0.192744
