# **İsmail Kağan Acar** - SE452 Assignment

# **Libraries**

(Development and Training processes initially made from google colab but moved to personal computer some commented code snippets are about that)

In [None]:
#from google.colab import drive
#drive.mount('/content/drive')

In [None]:
#!pwd
#!ls
#!cd /content/drive/MyDrive/Colab\ Notebooks/
%pip install medmnist torchsummary
#!python -m medmnist save --flag=pathmnist --folder=/content/drive/MyDrive/Colab\ Notebooks/pathmnist/ --postfix=png --download=True --size=28

In [None]:
from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision.transforms as transforms
from torchsummary import summary

import medmnist
from medmnist import INFO, Evaluator

print(f"MedMNIST v{medmnist.__version__} @ {medmnist.HOMEPAGE}")

# **MedMNIST (PathMNIST) Dataset**

Offical Explanation of Dataset: The PathMNIST is based on a prior study for predicting survival from colorectal cancer histology slides, providing a dataset (NCT-CRC-HE-100K) of 100,000 non-overlapping image patches from hematoxylin & eosin stained histological images, and a test dataset (CRC-VAL-HE-7K) of 7,180 image patches from a different clinical center. The dataset is comprised of 9 types of tissues, resulting in a multi-class classification task. We resize the source images of 3×224×224 into 3×28×28, and split NCT-CRC-HE-100K into training and validation set with a ratio of 9:1. The CRC-VAL-HE-7K is treated as the test set.

#### **Batch Size**
Batch size should specified before datasets loaded 

In [None]:
BATCH_SIZE = 64

#### **Dataset Initialization**

In [None]:
data_flag = 'pathmnist'

info = INFO[data_flag]
task = info['task']
n_channels = info['n_channels']
n_classes = len(info['label'])

DataClass = getattr(medmnist, info['python_class'])

#### **Data Preprocessing**

In [None]:
data_transform = transforms.Compose([
    #transforms.Resize((28, 28)), images that are already preprocessed to 28x28 images

    # Augmentation
    #transforms.RandomHorizontalFlip(p=0.5),
    #transforms.RandomRotation(degrees=(-20, 20)),
    #transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),

    # Normalization
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

```Compose([,])```: Allows to chain the transformations.

```ToTensor()```: Turns images to PyTorch tensor.

```Normalize()```: normalized_pixel = (original_pixel - mean) / std


---



**Augmentation:**

*Dataset is pretty large so augmentation may decrease the computational cost dramatically.*

```RandomHorizontalFlip(),```: Flips image vertically but it may not be benefitial. (Experiment it)

```RandomRotation()```: Randomly rotates the image respect to given parameters.

```ColorJitter()```: Randomly adjusts brightness, contrast, saturation, and hue.

**Data Augmentation affects very bad this model it is not used**

#### **Load the Dataset**

(random split effects validation in bad way)

In [None]:
# load the data
#this part works on colab not on local.
"""
!ls /content/drive/MyDrive/Colab\ Notebooks/pathmnist
train_dataset = DataClass(root='./drive/MyDrive/Colab Notebooks/pathmnist/',split='train', transform=data_transform, download=True)
test_dataset = DataClass(root='./drive/MyDrive/Colab Notebooks/pathmnist/',split='test', transform=data_transform, download=True)
val_dataset = DataClass(root='./drive/MyDrive/Colab Notebooks/pathmnist/',split='val', transform=data_transform, download=True)
"""

train_dataset = DataClass(split='train', transform=data_transform, download=True)
test_dataset = DataClass(split='test', transform=data_transform, download=True)
val_dataset = DataClass(split='val', transform=data_transform, download=True)

# Using Random Split affected bad so it is deprecarted at early stages.
#train_size = int(len(train_dataset)*0.8)
#val_size = int(len(train_dataset) - train_size)
#train_dataset, val_dataset = data.random_split(train_dataset, [train_size, val_size])

# encapsulate data into dataloader form
train_loader = data.DataLoader(dataset=train_dataset,
                               # Data will be divided to parts (batches).
                               # Batch size refers how many sample will be in each batch.
                               batch_size=BATCH_SIZE,

                               # It will shuffle the data before each epoch to prevent overfitting
                               shuffle=True,
                               num_workers=5,
                               pin_memory=True) # Suppose to utilize more gpu

val_loader = data.DataLoader(dataset=val_dataset,
                            batch_size=BATCH_SIZE,
                            num_workers=5,
                            shuffle=False)
 
test_loader = data.DataLoader(dataset=test_dataset,
                              batch_size=BATCH_SIZE,
                              num_workers=5,
                              shuffle=False)

train_size = len(train_dataset)
val_size = len(val_dataset)
test_size = len(test_dataset)

print("Train size: ",train_size," Val Size: ", val_size," Test Size: ", test_size)


# **Hyperparameters**

## Base Model Config
**This part is deactivated after experiments ended and decided for best parameters.**

In [None]:
base_augmentation_conf = {
    "randomrotation":False,
    "colorjitter": False,
    "randomhorizontalflip":False
}

base_conv_layer_conf = {
    "conv_layer_count": 2,
    "filters": 128,
    "kernel_size": 3,
    "padding": 'same',
    "pooling": {
        "kernel_size": 3,
        "stride": 2
    },
    "dropout": 0.5,
    "activation": 'relu',
}

base_classifier_conf = {
    "neurons": 512,
    "activation": 'relu',
    "dropout":0.5
}

base_general_conf = {
    "epochs": 20,
    "batch_size": BATCH_SIZE,

    "optimizer": 'adam',  
    "loss_function":"CrossEntropyLoss",
}

base_optimizer_conf = {
    "learning_rate": 0.0001, # x10 if sdg
    "momentum_for_sgd": 0.9,
    "weight_decay_for_sgd": 0.0001, 
}

base_dataset_info = {
    "train_size": train_size,
    "val_size": val_size,
    "test_size": len(test_dataset),
}

base_early_stop_conf = {
    "early_stop": False,
    "early_stop_patience": 5,
    "early_stop_min_delta":0.001,
}

## Best model Hyperparameters
These are the hyperparameters that decided after detailed experiments. 

In [None]:
augmentation_conf = {
    "randomrotation":False,
    "colorjitter": False,
    "randomhorizontalflip":False
}

conv_layer_conf = {
    "conv_layer_count": 2,
    "filters": 128,
    "kernel_size": 5,
    "padding": 'same',
    "pooling": {
        "kernel_size": 4,
        "stride": 2
    },
    "dropout": 0.2,
    "activation": 'relu',
}

classifier_conf = {
    "neurons": 2048,
    "activation": 'relu',
    "dropout":0.2
}

general_conf = {
    "epochs": 50,
    "batch_size": BATCH_SIZE,
    "optimizer": 'adam',  
    "loss_function":"CrossEntropyLoss",
}

optimizer_conf = {
    "learning_rate": 0.0001, # x10 if sdg
}

dataset_info = {
    "train_size": train_size,
    "val_size": val_size,
    "test_size": len(test_dataset),
}

early_stop_conf = {
    "early_stop": False,
    "early_stop_patience": 5,
    "early_stop_min_delta":0.001,
}

## Helper Functions for Automation

In [None]:
def optimizer_function(model):
    optimizer = general_conf['optimizer']
    learning_rate =  optimizer_conf['learning_rate']
  
    if optimizer == 'adam':
        return optim.Adam(model.parameters(), lr=learning_rate)
    
    elif optimizer == 'sgd':
        return optim.SGD(model.parameters(), 
                        lr=optimizer_conf['learning_rate'],
                        momentum=optimizer_conf['momentum_for_sgd'],
                        weight_decay=optimizer_conf['weight_decay_for_sgd'],
                        nesterov=True)

    elif optimizer == 'rmsprop':
        return optim.RMSprop(model.parameters(), lr=0.001, alpha=0.99, eps=1e-08, 
                               weight_decay=0, momentum=0.9, centered=False)
    

def loss_function():
  loss_function = general_conf['loss_function']
  if loss_function == 'CrossEntropyLoss':
      return nn.CrossEntropyLoss()

# **Model Architecture**

In [None]:
class Model(nn.Module):
    def __init__(self):
        super(Model,self).__init__()
        

        # Feature Extraction
        self.features = nn.Sequential( # For adding components to the model end to end
            # Layer 1
            nn.Conv2d(3, # input channel, 3 channels RGB
                      conv_layer_conf['filters'], # output channel
                      kernel_size=conv_layer_conf['kernel_size'],
                      padding=conv_layer_conf['padding']), # 'same' preserves input size

            nn.BatchNorm2d(conv_layer_conf['filters']),
            self.get_activation(conv_layer_conf['activation']), # activation function
            nn.MaxPool2d(conv_layer_conf['pooling']['kernel_size'], stride=conv_layer_conf['pooling']['stride']), # Downsampling, reduces size
            nn.Dropout(conv_layer_conf['dropout']), # deletes random neurons
            
            # adds how many layers needed to be added (* operator used for unpack list so list elements can be used as arguments of Sequential)
            # First layer is special because it takes 3 as first argument os it is added manually
            # (if it is commented out, because after 3 layers max pooling becomes an issue. I will make experiments 4 5 6 conv layers manual)
            #*[self.additional_conv_layer() for _ in range(conv_layer_conf["conv_layer_count"]-1)],
            
            self.additional_conv_layer_without_pooling(), #2
            self.additional_conv_layer(), #3
            self.additional_conv_layer(), #4 # remove pooling if you add 5th layer
            #self.additional_conv_layer(), #5
            #self.additional_conv_layer_without_pooling(),#6
        )

        self._to_linear = None
        self.calculate_maxpool()

        # Classification
        self.classifier = nn.Sequential(
            nn.Flatten(), # Converts multidimensional array to one dimension
            
            nn.Linear(self._to_linear, classifier_conf['neurons']),#FC1 
            nn.BatchNorm1d(classifier_conf['neurons']),
            self.get_activation(classifier_conf['activation']), # activation function
            nn.Dropout(classifier_conf['dropout']),
            
            nn.Linear(classifier_conf['neurons'], 9),# FC2
            )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

    def calculate_maxpool(self):
        with torch.no_grad(): 
            dummy_input = torch.randn(1, 3, 28, 28)  # (batch_size, channels, height, width)
            dummy_output = self.features(dummy_input)
            
            self._to_linear = dummy_output.view(1, -1).size(1) # the size after all opearations
   
    def additional_conv_layer(self):
        return nn.Sequential(
            nn.Conv2d(conv_layer_conf['filters'],
                        conv_layer_conf['filters'],
                        kernel_size=conv_layer_conf['kernel_size'],
                        padding=conv_layer_conf['padding']),

            nn.BatchNorm2d(conv_layer_conf['filters']),
            self.get_activation(conv_layer_conf['activation']), # activation function
            nn.MaxPool2d(kernel_size=conv_layer_conf['pooling']['kernel_size'], stride=conv_layer_conf['pooling']['stride']),
            nn.Dropout(conv_layer_conf['dropout']),
        )

    def additional_conv_layer_without_pooling(self):
        # For the 4th and further conv layer counts;
        # MaxPooling creates an issue because dimension drops significantly
        return nn.Sequential(
            nn.Conv2d(conv_layer_conf['filters'],
                        conv_layer_conf['filters'],
                        kernel_size=conv_layer_conf['kernel_size'],
                        padding=conv_layer_conf['padding']),

            nn.BatchNorm2d(conv_layer_conf['filters']),
            self.get_activation(conv_layer_conf['activation']), # activation function
            # !!! #nn.MaxPool2d(kernel_size=conv_layer_conf['pooling']['kernel_size'], stride=conv_layer_conf['pooling']['stride']),
            nn.Dropout(conv_layer_conf['dropout']),
        )


    def get_activation(self, activation_name): # Helper Function for Automation
        activations = {
            'relu': nn.ReLU,
            'sigmoid': nn.Sigmoid,
            'tanh': nn.Tanh,
        }
        return activations[activation_name]()

Print out model structure:

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
dummy_model = Model().to(device)
summary(dummy_model,(3,28,28))

# **Training**

## **Early Stop**
This algorithm stops training when validation loss stopped improving.
probably it is unneccesary here in this model because of low epoch counts.

**Early stop implemented but never used after experimention guideline published**

In [None]:
class EarlyStopper:
    def __init__(self, patience=1, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = float('inf')

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

early_stopper = EarlyStopper(patience=early_stop_conf['early_stop_patience'], min_delta=early_stop_conf['early_stop_min_delta'])

## **Start Training**

Calling this function starts training for active HyperParameter config.
It returns val. and train losses and accuracies along with the used model

In [None]:
def start_train():
    # Connect to GPU
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(device)

    # Model initialization.
    model = Model().to(device)
    #summary(model,(3,28,28))
    
    # Loss Function
    criterion = loss_function()

    # Optimizer
    optimizer = optimizer_function(model)

    # Epoch counter
    number_of_epochs = general_conf['epochs']
    
    
    train_loss_list, val_loss_list = [], []
    train_accuracy_list, val_accuracy_list = [], []

    for epoch in range(number_of_epochs):
        # Train Mode activated
        model.train()

        # Initializing counters
        train_running_loss = 0.0
        train_correct = 0
        train_total = 0

        for inputs, targets in train_loader:
                inputs, targets = inputs.to(device), targets.to(device)

                # Input shape comes in [128,1] but optimizer only accept 1D
                # Squeeze function makes it 1D
                targets = targets.squeeze()

                # Actual learing happens here
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()

                # metric calculation for each sample
                train_running_loss += loss.item()

                # predict with max probability becomes the predict
                _, predicted = outputs.max(1)
                train_total += targets.size(0)
                train_correct += predicted.eq(targets).sum().item()

        # Loss and accuracy
        train_loss = train_running_loss / len(train_loader)
        train_accuracy = train_correct / train_total

        train_loss_list.append(train_loss)
        train_accuracy_list.append(train_accuracy)

        # Switches to Evaluation mode
        model.eval()

        # Initializing counters
        val_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():# prevents updating gradients
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                targets = targets.squeeze()
                outputs = model(inputs)
                loss = criterion(outputs, targets)

                val_loss += loss.item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()

        val_loss = val_loss / len(val_loader)
        val_acc = correct / total
        val_loss_list.append(val_loss)
        val_accuracy_list.append(val_acc)

        print(f"Epoch [{epoch+1}/{number_of_epochs}]",
            f"Train Loss: {train_loss_list[-1]:.4f}, Train Acc: {train_accuracy_list[-1]:.4f} |",
            f"Val Loss: {val_loss_list[-1]:.4f}, Val Acc: {val_accuracy_list[-1]:.4f}")
        
        if early_stop_conf['early_stop'] and early_stopper.early_stop(val_loss):    
            print("Early Stopped Training.")
            break

    return train_loss_list,val_loss_list,train_accuracy_list,val_accuracy_list, model

This function is used for automation. It logs training metrics and related data to the json files

In [None]:
import json
import os
from datetime import datetime
import time

def train_and_log(log_name):
    try:    
        print(log_name, "Started")

        start_time = time.time()
        train_loss, val_loss, train_acc, val_acc, model = start_train() # start training
        end_time = time.time()

        duration = end_time - start_time
        time_took = f"{duration // 60} min {duration % 60} sec"
        print(time_took, "Ended")

        log_entry = {
            "timestamp": datetime.now().isoformat(),
            "time_took":time_took,
            "metrics": {
                "train_loss": [float(i) for i in train_loss],
                "val_loss": [float(i) for i in val_loss],
                "train_accuracy": [float(i) for i in train_acc],
                "val_accuracy": [float(i) for i in val_acc]
            }
        }
        
        log_file = f"experiments/{log_name}.json"
        
        with open(log_file, "w+") as file:
            json.dump(log_entry, file, indent=2)
            file.write("\n")

    except Exception as e:
        print(log_name, "- Failed")
        with open("errors.log","a+") as file:
            file.write(str(e))
    return model
            

## **Customized Grid Search**

This algorithm is for making experiments automatically. Its biggest limitation is changing optimizer and batch sizes.The parameters will be used individually.

In [None]:
experiment_list = [ #training will take forever :(
    {"kernel_size":[1,2,3,5,7,9]},
    {"conv_dropout":[0.0,0.1,0.2,0.3,0.4,0.5]},
    {"conv_layer_count":[1,2,3,4,5,6]},
    {"neurons":[32,64,128,256,512,1024]},
    {"pool_kernel_size":[1,2,3,4,5,6]},
    {"pool_stride":[1,2,3,4,5,6]},
    {"classifier_dropout":[0.0,0.1,0.2,0.3,0.4,0.5]},
    {"epochs":[10,15,20,25,30,50]},
    {'activation_function':['relu','tanh','sigmoid']}
]

special_cases = [
    "batch_size",
    "optimizer_type"
]

In [None]:
import copy 
# i discovered that without this library and deepcopy(),
# copying dict to another varible creates a pointer so,
# my back up dict's updates when original dict updates.abs
# I wasted training 6 hours of 13 models without knowing that :'( 


conv_conf_backup = copy.deepcopy(conv_layer_conf)
classifier_conf_backup = copy.deepcopy(classifier_conf)
general_conf_backup = copy.deepcopy(general_conf)

# Automated Experiments 
experiment_check = False
if experiment_check == True:
        for parameter in experiment_list:
                key = list(parameter.items())[0][0]
                value_list = list(parameter.items())[0][1]
                
                for value in value_list:

                        if key == 'kernel_size':
                                conv_layer_conf["kernel_size"] = value

                        elif key == 'conv_dropout':
                                conv_layer_conf["dropout"] = value

                        elif key == 'conv_layer_count':
                                conv_layer_conf["conv_layer_count"] = value

                        elif key == 'neurons':
                                classifier_conf["neurons"] = value

                        elif key == 'pool_kernel_size':
                                conv_layer_conf["pooling"]["kernel_size"] = value

                        elif key == 'pool_stride':
                                conv_layer_conf["pooling"]["stride"] = value

                        elif key == 'classifier_dropout':
                                classifier_conf["dropout"] = value

                        elif key == 'epochs':
                                general_conf["epochs"] = value
                        
                        elif key == 'activation_function':
                                conv_layer_conf["activation"] = value
                                classifier_conf["activation"] = value

                                
                        log_name = f"{key}-{value}"
                        model = train_and_log(log_name=log_name)

                        
                        conv_layer_conf = copy.deepcopy(conv_conf_backup)
                        classifier_conf = copy.deepcopy(classifier_conf_backup)
                        general_conf = copy.deepcopy(general_conf_backup)

else:
        model = train_and_log("custom_model_name-0") # for manual training 


# **Evaluation and Test**

## Loading Saved Experiment Results

In [None]:
from os import listdir
import json

results_grouped = dict()

for index,file in enumerate(listdir("experiments")):
    path= f"experiments/{file}"
    exp_name = str(file).split('.json')[0]
    exp_type, exp_value =exp_name.split('-',1)
    with open(path,"r") as file_data:
        exp_result = json.load(file_data)

    if index == 0:
            previous_exp_type = exp_type
            results_grouped[exp_type] = [{exp_value: exp_result}]
    else:
        if exp_type == previous_exp_type:

            results_grouped[exp_type].append({exp_value: exp_result})
        else:
            previous_exp_type = exp_type
            
            try:
                results_grouped[exp_type].append({exp_value: exp_result})
            except:
                results_grouped[exp_type] = [{exp_value: exp_result}]

#print(json.dumps(results_grouped,indent=2))


## Plotting the Accuracy and Loss Graphs

In [None]:
import matplotlib.pyplot as plt

metrics_to_plot = ['train_loss', 'val_loss', 'train_accuracy', 'val_accuracy']  

#figure sizes 
plt.figure(figsize=(5 * 4, 5 * len(results_grouped)))  


for row_idx, exp_group in enumerate(results_grouped, 1):
    experiments = results_grouped[exp_group]
    
    for col_idx, metric in enumerate(metrics_to_plot, 1):
        plt.subplot(len(results_grouped), 4, (row_idx - 1) * 4 + col_idx)
        
        for experiment in experiments:
            for name in experiment:
                if metric in experiment[name]['metrics']:
                    plt.plot(
                        experiment[name]['metrics'][metric],
                        label=f'{name}'
                    )
         
        plt.ylabel("value")
        plt.xlabel("epochs")
        plt.title(f'{exp_group} - {metric}', fontsize=10)
        plt.grid(True)
        plt.legend(fontsize=8)

plt.tight_layout()
plt.show()


## Evaluation of Validation Metrics

(I got help from ai for how can i display and create pandas dataframe for better visualization) 

In [None]:
import pandas as pd


for exp_group in results_grouped:
    table_data = []
    for experiment in results_grouped[exp_group]:
        for exp_value in experiment:  
            
            metrics = experiment[exp_value]['metrics']
            table_data.append({
                'experiments': f"{exp_group}-{exp_value}",

                'mean_val_loss': sum(metrics['val_loss'])/len(metrics['val_loss']),
                'mean_val_acc': sum(metrics['val_accuracy'])/len(metrics['val_accuracy']),
                'best_val_loss': min(metrics['val_loss']),
                'best_val_acc': max(metrics['val_accuracy']),

            })

    df = pd.DataFrame(table_data)

    df = df.sort_values(by=['best_val_acc', 'best_val_loss'], ascending=[False, True])

    display(df.style
        .format({
            'mean_val_loss': '{:.4f}',
            'mean_val_acc': '{:.2%}',
            'best_val_loss': '{:.4f}',
            'best_val_acc': '{:.2%}'
        })
        .background_gradient(subset=['best_val_acc'], cmap='Greens')
    )


## Calculate and Plot Training Durations

In [None]:
import pandas as pd

table_data = []
total = 0
for exp_group in results_grouped:
    
    for experiment in results_grouped[exp_group]:
        for exp_value in experiment:  
            string = experiment[exp_value]["time_took"]
            
            minute , rest = string.split(" min ")
            seconds, _ = rest.split(" sec")
            time_in_seconds= float(minute)*60 + float(seconds)
            total += time_in_seconds
            time_in_minutes = round(time_in_seconds/60,2)
            time_in_minutes = float(time_in_minutes//1)+round(float((time_in_minutes % 1) *60 )/100,2)

            metrics = experiment[exp_value]['metrics']
            table_data.append({
                'experiments': f"{exp_group}-{exp_value}",

                'time_took (min.sec)': time_in_minutes

            })

df = pd.DataFrame(table_data)

df = df.sort_values(by=['time_took (min.sec)'], ascending=[False])

print((total/60)/60)

display(df.style
    .format({
        'time_took (min.sec)': '{}',
    })
    .background_gradient(subset=['time_took (min.sec)'], cmap='Greens')
)


## Testing from Trained Best Model

In [None]:
is_best_model_trained = True 
if is_best_model_trained:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(device)

    #model = Model().to(device)
    #model.load_state_dict(torch.load("trained_model.pt"))
    #model = torch.load('trained_model.pt')
    criterion = loss_function()
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            targets = targets.squeeze()

            outputs = model(inputs)
            loss = criterion(outputs, targets)
            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            all_preds.extend(predicted.cpu().numpy())
            all_targets.extend(targets.cpu().numpy())

    test_loss = test_loss / len(test_loader)
    test_acc = correct / total
    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}")

    from sklearn.metrics import confusion_matrix, classification_report, precision_score, recall_score, f1_score
    import numpy as np
    import matplotlib.pyplot as plt

    # Confusion Matrix
    cm = confusion_matrix(all_targets, all_preds)
    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title("Confusion Matrix")
    plt.colorbar()
    tick_marks = np.arange(10)
    plt.xticks(tick_marks, tick_marks)
    plt.yticks(tick_marks, tick_marks)
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.show()

    #Classification report
    precision = precision_score(all_targets, all_preds, average='weighted')
    recall = recall_score(all_targets, all_preds, average='weighted')
    f1 = f1_score(all_targets, all_preds, average='weighted')

    print(f"\nClassification Metrics:")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")

    print("\nClassification Report:")
    print(classification_report(all_targets, all_preds, target_names=[str(i) for i in range(9)]))

In [None]:
#torch.save(model.state_dict(), "model") # saving model for later uses