# Import of libraries


In [1]:
import numpy as np
import pandas as pd

import os

from tqdm import tqdm

import shutil
from sklearn.model_selection import train_test_split

import scipy.io as sio

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler

from torch.utils.data import Dataset, DataLoader
from torch.nn.parallel import DataParallel
from torchvision import datasets
from torchvision.transforms import ToTensor

import torch.nn.init as init

import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt

import time

import copy

import random
from random import shuffle

import matplotlib.pyplot as plt

from PIL import Image

In [14]:
import shutil
shutil.rmtree('/kaggle/working/weights/vgg13_bn_29CL_full')

# Split

In [None]:
# Original training set path definition
dataset_path = "C:/dataset_MC3/TrainingSet"

# Training set path after split
train_path = "C:/new_data/TRAIN"

# Validation set new path definition
val_path = "C:/new_data/VAL"

# Test set new path definition
test_path = "C:/new_data/TEST"

# Create training, validation and test folders if they don't already exist
for folder in [train_path, val_path, test_path]:
    if not os.path.exists(folder):
        os.makedirs(folder)

# List of problem's classes
classes = os.listdir(dataset_path)

# Set percentual
train_perc = 0.6
val_perc = 0.2
test_perc = 0.2
random_seed = 42

for class_ in classes:
    input_folder = os.path.join(dataset_path, class_)
    train_output_folder = os.path.join(train_path, class_)
    val_output_folder = os.path.join(val_path, class_)
    test_output_folder = os.path.join(test_path, class_)

    # Create classes subfolders for training,validation and test folders
    if not os.path.exists(train_output_folder):
        os.makedirs(train_output_folder)
    if not os.path.exists(val_output_folder):
        os.makedirs(val_output_folder)
    if not os.path.exists(test_output_folder):
        os.makedirs(test_output_folder)

    # Get the list of all files in the input folder
    files_list = []

    for file in os.listdir(input_folder):
        if file.endswith(".png"):
            files_list.append(file)

    # Split in training,validation and test (paths list)
    train_files, test_val_files = train_test_split(files_list, test_size=val_perc + test_perc, random_state=random_seed)
    val_files, test_files = train_test_split(test_val_files, test_size=test_perc / (val_perc + test_perc),
                                             random_state=random_seed)

    # Copy in corresponding training, validation and test set folder, given the path
    for file in train_files:
        src = os.path.join(input_folder, file)
        dst = os.path.join(train_path, val_output_folder, file)
        shutil.copy(src, dst)

    for file in val_files:
        src = os.path.join(input_folder, file)
        dst = os.path.join(val_path, val_output_folder, file)
        shutil.copy(src, dst)

    for file in test_files:
        src = os.path.join(input_folder, file)
        dst = os.path.join(test_path, val_output_folder, file)
        shutil.copy(src, dst)

print("Training set, validation set and test set created successfully")


# Check Class Distribution

In [3]:
def count_photos_per_class(dataset_path):
    """
    Conta il numero di foto in ciascuna cartella del dataset.

    Argomenti:
    - dataset_path (str): Il percorso del dataset.

    Ritorna:
    - num_photos_per_class (dict): Un dizionario contenente il numero di foto in ciascuna cartella, ordinato in ordine alfabetico.
    """
    # Lista di tutte le cartelle nel dataset, ordinate in modo alfabetico
    class_folders = sorted([folder for folder in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, folder))])

    # Dizionario per memorizzare il numero di foto in ogni cartella
    num_photos_per_class = {}

    # Iterazione su ogni cartella e conteggio del numero di foto
    for class_folder in class_folders:
        class_path = os.path.join(dataset_path, class_folder)
        num_photos = len(os.listdir(class_path))
        num_photos_per_class[class_folder] = num_photos

    return num_photos_per_class

In [5]:
# Utilizzo della funzione per contare il numero di foto in ciascuna cartella
train_path = "/kaggle/input/weatherdataset/TRAIN"
num_photos_per_class_train = count_photos_per_class(train_path)

val_path = "/kaggle/input/weatherdataset/VAL"
num_photos_per_class_val = count_photos_per_class(val_path)

test_path = "/kaggle/input/weatherdataset/TEST"
num_photos_per_class_test = count_photos_per_class(test_path)

orig_train_path = "/kaggle/input/dataset-mc3/TrainingSet"
num_photos_per_class_orig_train = count_photos_per_class(orig_train_path)

# Stampare il numero di foto in ogni cartella
print("TRAIN:")
for class_name, num_photos in num_photos_per_class_train.items():
    print(f"{class_name}: {num_photos} foto")
 
print("VAL:")
for class_name, num_photos in num_photos_per_class_val.items():
    print(f"{class_name}: {num_photos} foto")

print("TEST:")
for class_name, num_photos in num_photos_per_class_test.items():
    print(f"{class_name}: {num_photos} foto")
        
print("ORIGINAL_TRAIN:")
for class_name, num_photos in num_photos_per_class_orig_train.items():
    print(f"{class_name}: {num_photos} foto")
    

TRAIN:
BR: 502 foto
CL: 511 foto
DA: 495 foto
RA: 510 foto
SF: 494 foto
SH: 485 foto
VAL:
BR: 167 foto
CL: 171 foto
DA: 165 foto
RA: 170 foto
SF: 165 foto
SH: 162 foto
TEST:
BR: 168 foto
CL: 171 foto
DA: 165 foto
RA: 171 foto
SF: 165 foto
SH: 162 foto
ORIGINAL_TRAIN:
BR: 837 foto
CL: 853 foto
DA: 825 foto
RA: 851 foto
SF: 824 foto
SH: 809 foto


# Check Device

In [3]:
print(torch.__version__)
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

2.1.2
True
Tesla T4
cuda:0


# Dataset Definition

In [2]:
class CustomDataset(Dataset):
    
    def __init__(self, root_dir, transform = None):
        
        self.root_dir = root_dir
        self.transform = transform
        
        self.classes_list = sorted(os.listdir(root_dir))

        class_to_idx = {}

        for i, cls_name in enumerate(self.classes_list):
            class_to_idx[cls_name] = i
            
        class_to_idx['CL'] = 0
        class_to_idx['BR'] = 1

        self.class_to_idx = class_to_idx

        self.images = self.load_images()
        
        
    def load_images(self):
        images = []
        # voglio una lista percorso_immagine - classe di appartenenza
        
        for cls_name in self.classes_list:
            cls_dir = os.path.join(self.root_dir, cls_name)
            class_idx = self.class_to_idx[cls_name]
            
            for img_name in os.listdir(cls_dir):
                img_path = os.path.join(cls_dir, img_name) 
                images.append((img_path, class_idx))
        
        return images
        
    def __len__(self):
        return len(self.images)
        
    def __getitem__(self, idx): # deve ritornare la coppia immagine-label
        img_path, label = self.images[idx]
        img = Image.open(img_path).convert('RGB')
        
        if self.transform:
            img = self.transform(img)
              
        return img, label    
                                

# CustomDataset Test

In [None]:
dataset = CustomDataset(root_dir = "/kaggle/input/weatherdataset/TRAIN", transform = None)

print("Numero di immagini del dataset : ", len(dataset))

img, label = dataset[1]

print("Dimensione immagine : ",img.size)
print("Label : ",label)

plt.figure()
plt.imshow(img, clim = [0,1])



# Hyperparameters

In [3]:
#rendere l'esecuzione deterministica
torch.manual_seed(0)
np.random.seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


folder_path = '/kaggle/input/weatherdataset/TRAIN'
num_subdirectories = 0

for name in os.listdir(folder_path):
    if os.path.isdir(os.path.join(folder_path, name)):
        num_subdirectories += 1
        
print("Classes' number of the problem : ", num_subdirectories)

learning_rate = 0.0001   
batch_size = 16      
set_classes_number = num_subdirectories  
num_epoch = 20     

Classes' number of the problem :  6


# Dataloader Definition

In [4]:
train_set_path = "/kaggle/input/weatherdataset/TRAIN"
val_set_path = "/kaggle/input/weatherdataset/VAL"
test_set_path = "/kaggle/input/weatherdataset/TEST"
full_dataset_path = "/kaggle/input/dataset-mc3/TrainingSet"

data_transform_train = transforms.Compose([
    transforms.Resize((224,224), interpolation=transforms.InterpolationMode.BILINEAR),
    transforms.ToTensor(), 
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

data_transform_val = transforms.Compose([
    transforms.Resize((224,224), interpolation=transforms.InterpolationMode.BILINEAR),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

data_transform_test = transforms.Compose([
    transforms.Resize((224,224), interpolation=transforms.InterpolationMode.BILINEAR),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


train_dataset = CustomDataset(train_set_path, transform = data_transform_train)
val_dataset = CustomDataset(val_set_path, transform = data_transform_val)
test_dataset = CustomDataset(test_set_path, transform = data_transform_test)
full_dataset = CustomDataset(full_dataset_path, transform = data_transform_train)

train_dataloader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True, num_workers=3, prefetch_factor=3, persistent_workers=True) 
val_dataloader = DataLoader(val_dataset, batch_size = batch_size, shuffle = False, num_workers=3, prefetch_factor=3, persistent_workers=True)
test_dataloader = DataLoader(test_dataset, batch_size = batch_size, shuffle = False, num_workers=3, prefetch_factor=3, persistent_workers=True)
full_dataloader = DataLoader(full_dataset, batch_size = batch_size, shuffle = True, num_workers=3, prefetch_factor=3, persistent_workers=True) 

# Learning Rate Scheduler: Cosine Annealing Warm Restart

In [6]:
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts

class CustomCosineAnnealingWarmRestarts(CosineAnnealingWarmRestarts):
    def __init__(self, optimizer, T_0, T_mult=1, eta_min=0, save_model_callback=None):
        self.save_model_callback = save_model_callback
        self.counter = 0
        super(CustomCosineAnnealingWarmRestarts, self).__init__(optimizer, T_0, T_mult, eta_min)
        

    def step(self, epoch=None):
        super(CustomCosineAnnealingWarmRestarts, self).step(epoch)
        # T_cur è un attributo della classe CosineAnnealingWarmRestarts di PyTorch che tiene 
        # traccia del numero corrente di iterazioni nel ciclo di riscaldamento e raffreddamento.
        if self.T_cur == 0 and self.save_model_callback is not None:
            self.save_model_callback(self.last_epoch, self.counter)
            self.counter += 1

            
def save_model(epoch, counter): # epoca in cui vengono salvati i pesi + numero modello
    folder_path = '/kaggle/working/cosineWeights/'
    file_name = f'{counter}_model_weights.pth'
    file_path = os.path.join(folder_path, file_name)
    print(f"Saving model state at the end of epoch {epoch}")
    torch.save(model.state_dict(), file_path)

# Train Function (with Validation Loop)

In [5]:
def train_loop_validation(train_dataloader, val_dataloader, startEpoch, numEpochs, model_conv, criterionCNN, optimizer_conv, scheduler,
                          best_acc, best_loss, best_epoca, outputPath):
    print("Starting train...")
    if torch.cuda.device_count() > 1:
        print("Using", torch.cuda.device_count(), "GPUs!")
        model_conv = DataParallel(model_conv)
    
    for epochs in range(startEpoch, numEpochs + 1):
        since = time.time()
        
        modelLoss_train = 0.0 
        modelAcc_train = 0.0
        
        model_conv.train()
        
        totalSize = 0
        
        for idx, (inputs, labels) in enumerate(tqdm(train_dataloader)):
            inputs = inputs.type(torch.FloatTensor).cuda() # single batch pictures
            labels = labels.cuda() # corresponding labels
            
            optimizer_conv.zero_grad() 
            model_conv.zero_grad()
            
            y = model_conv(inputs) 
            
            outp, preds = torch.max(y, 1) 
            
            lossCNN = criterionCNN(y, labels) 
            modelLoss_train += lossCNN.item() * inputs.size(0)
            totalSize += inputs.size(0)
            modelAcc_train += torch.sum(preds== labels.data).item()
        
            lossCNN.backward() # calcolo dei gradienti
            optimizer_conv.step() # update weights
          
            
         
        modelLoss_epoch_train = modelLoss_train/totalSize
        modelAcc_epoch_train = modelAcc_train/totalSize
        
        if (scheduler is not None):
            scheduler.step()
        #print('Learning Rate Scheduler', scheduler.get_last_lr())
        #print('Learning Rate Optimizer,', optimizer_conv.param_groups[0]['lr'])
        
            
        torch.save(model_conv.state_dict(), outputPath + 'train_weights.pth')
    
        model_conv.eval() 
        totalSize_val = 0
        modelLoss_val = 0.0
        modelAcc_val = 0.0 
        
        for inputs, labels in val_dataloader:
            inputs = inputs.type(torch.FloatTensor).cuda()
            labels = labels.cuda()
            
            y = model_conv(inputs)
            outp, preds = torch.max(y,1)
            lossCNN = criterionCNN(y, labels)
            
            modelLoss_val += lossCNN.item() * inputs.size(0)
            totalSize_val += inputs.size(0)
            modelAcc_val += torch.sum(preds == labels.data).item()
        
        modelLoss_epoch_val = modelLoss_val/totalSize_val
        modelAcc_epoch_val = modelAcc_val / totalSize_val
        time_elapsed = time.time()-since
        
        
        print('[Epoch %d][TRAIN on %d [Loss: %.4f  ACC: %.4f]][VAL on %d [Loss: %.4f  ACC: %.4f]][TIME: %.0f m %.0f s]'
          %(epochs, totalSize, modelLoss_epoch_train, modelAcc_epoch_train, totalSize_val, modelLoss_epoch_val,
            modelAcc_epoch_val, time_elapsed // 60, time_elapsed % 60))
        
        #if epochs == 1 or modelLoss_epoch_val < best_loss:
        # salvo i pesi quando l'accuracy sul val aumenta o quando, a parità di acc, scende la loss
        if (modelAcc_epoch_val > best_acc) or (modelAcc_epoch_val == best_acc and modelLoss_epoch_val < best_loss) :
          print('     .... Saving best weights ....')
          best_acc = modelAcc_epoch_val
          best_loss = modelLoss_epoch_val
          best_epoca = epochs
          #salvataggio dei migliori pesi sul validation
          torch.save(model_conv.state_dict(), outputPath + 'best_model_weights.pth')

        with open(outputPath + 'lossTrain.txt', "a") as file_object:
          file_object.write(str(modelLoss_epoch_train) +'\n')

        with open(outputPath + 'AccTrain.txt', "a") as file_object:
          file_object.write(str(modelAcc_epoch_train)+'\n')

        with open(outputPath + 'lossVal.txt', "a") as file_object:
          file_object.write(str(modelLoss_epoch_val)+'\n')

        with open(outputPath + 'AccVal.txt', "a") as file_object:
          file_object.write(str(modelAcc_epoch_val)+'\n')

        sio.savemat(outputPath + 'check_point.mat', {'best_acc': best_acc,
                                                     'best_loss': best_loss,
                                                     'best_epoca': best_epoca,
                                                     'last_epoch': epochs})
        
        
    print("Ending train...")

# Train Function (without Validation)

In [None]:
def train_full_dataset(train_dataloader, startEpoch, numEpochs, model_conv, criterionCNN, optimizer_conv, scheduler, best_acc, best_loss, best_epoca, outputPath):
    
    print("Starting train...")
    
    if torch.cuda.device_count() > 1:
        print("Using", torch.cuda.device_count(), "GPUs!")
        model_conv = DataParallel(model_conv)
    
    for epochs in range(startEpoch, numEpochs + 1):
        since = time.time()
        
        modelLoss_train = 0.0 
        modelAcc_train = 0.0
        
        model_conv.train()
        
        totalSize = 0
        
        # for each batch
        for idx, (inputs, labels) in enumerate(tqdm(train_dataloader)):
            inputs = inputs.type(torch.FloatTensor).cuda() # single batch pictures
            labels = labels.cuda()
            
            optimizer_conv.zero_grad() 
            
            model_conv.zero_grad()
            
            y = model_conv(inputs)
            outp, preds = torch.max(y, 1)
            
            lossCNN = criterionCNN(y, labels)
            
            lossCNN.backward()
            optimizer_conv.step() 
            
            modelLoss_train += lossCNN.item() * inputs.size(0)
            totalSize += inputs.size(0)
            modelAcc_train += torch.sum(preds == labels.data).item()
            
        modelLoss_epoch_train = modelLoss_train / totalSize
        modelAcc_epoch_train = modelAcc_train / totalSize
            
        # salvataggio dei pesi ad ogni iterazione -> nel caso si blocchi e vogliamo riprendere il train
        torch.save(model_conv.state_dict(), outputPath + 'train_weights.pth')
        
        time_elapsed = time.time() - since
        
        scheduler.step()
        print('Learning Rate :', scheduler.get_last_lr())
        
        print('[Epoch %d][TRAIN on %d [Loss: %.4f  ACC: %.4f]][TIME: %.0f m %.0f s]'
              % (epochs, totalSize, modelLoss_epoch_train, modelAcc_epoch_train,
                 time_elapsed // 60, time_elapsed % 60))
        
        if (modelAcc_epoch_train > best_acc) or (modelAcc_epoch_train == best_acc and modelLoss_epoch_train < best_loss):
            print('     .... Saving best weights ....')
            best_acc = modelAcc_epoch_train
            best_loss = modelLoss_epoch_train
            best_epoca = epochs
            # salvataggio dei migliori pesi sul validation
            torch.save(model_conv.state_dict(), outputPath + 'best_model_weights.pth')

        with open(outputPath + 'lossTrain.txt', "a") as file_object:
            file_object.write(str(modelLoss_epoch_train) +'\n')

        with open(outputPath + 'AccTrain.txt', "a") as file_object:
            file_object.write(str(modelAcc_epoch_train)+'\n')

        sio.savemat(outputPath + 'check_point.mat', {'best_acc': best_acc,
                                                     'best_loss': best_loss,
                                                     'best_epoca': best_epoca,
                                                     'last_epoch': epochs})
        
    print("Ending train...")


# Train Model

In [6]:
networkName = 'vgg13_bn'
WeightPath = '/kaggle/working/weights/' + networkName + '_29CL_full/'

try:
    os.makedirs(WeightPath)
except:
    pass

model_ft = torchvision.models.vgg13_bn(weights='IMAGENET1K_V1') 
# change last fully connected layer
model_ft.classifier[-1] = nn.Linear(4096, set_classes_number)

# set requires_grad=false
for param in model_ft.parameters():
    param.requires_grad = False

# train fully connected layers
for param in model_ft.classifier.parameters():
    param.requires_grad = True
    
def init_weights(m):
    if isinstance(m, nn.Linear):
        init.xavier_uniform_(m.weight)
        if m.bias is not None:
            init.constant_(m.bias, 0)

#model_ft.classifier[-1].apply(init_weights)

#print(model_ft)


Downloading: "https://download.pytorch.org/models/vgg13_bn-abd245e5.pth" to /root/.cache/torch/hub/checkpoints/vgg13_bn-abd245e5.pth
100%|██████████| 508M/508M [00:03<00:00, 156MB/s]  


In [None]:
model_conv = model_ft.cuda()  # move model on GPU
criterion = nn.CrossEntropyLoss() # loss definition 
optimizer_conv = optim.Adam(model_conv.classifier.parameters(), lr=learning_rate)  # optimizer definition
scheduler = torch.optim.lr_scheduler.StepLR(optimizer_conv, step_size=5, gamma=0.1) # scheduler definition

#come è stato fissato T_0 = 96?
# dataset di training composto da 2997 immagini, batch size = 128, dunque per completare un epoca
# ci vogliono circa 24 iterazioni 
# scelgo di avere un warm restart ogni 4 epoche, quindi moltiplico per 4

#scheduler = CustomCosineAnnealingWarmRestarts(optimizer_conv, T_0=96, T_mult=1, eta_min=1e-8, save_model_callback=save_model)

# train model
startEpoch = 1
best_acc = 0
best_loss = 0
best_epoca = 0

train_loop_validation(train_dataloader, val_dataloader, startEpoch, num_epoch, model_conv, criterion, optimizer_conv, scheduler, best_acc, best_loss, best_epoca, WeightPath)

# Plots

In [None]:
lossModel_Train = []
lossModel_val = []
accModel_Train = []
accModel_val = []

#WeightPath = 'alexnet/'
file = open(WeightPath + 'lossTrain.txt', 'r')
Testo = file.readlines()
for element in Testo:
  lossModel_Train.append(float(element))

file = open(WeightPath + 'lossVal.txt', 'r')
Testo = file.readlines()
for element in Testo:
  lossModel_val.append(float(element))

plt.figure()
plt.title("Model: Training Vs Validation Losses")
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.plot(list(range(1,len(lossModel_Train)+1)), lossModel_Train, color='r', label="Training Loss")
plt.plot(list(range(1, len(lossModel_val)+1)), lossModel_val, color='g', label="Validation Loss")
plt.legend()
plt.savefig(WeightPath + 'LossTrainVal.png')

file = open(WeightPath + 'AccTrain.txt', 'r')
Testo = file.readlines()
for element in Testo:
  accModel_Train.append(float(element))

file = open(WeightPath + 'AccVal.txt', 'r')
Testo = file.readlines()
for element in Testo:
  accModel_val.append(float(element))

plt.figure()
plt.title("Training Vs Validation Accuracies")
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.plot(list(range(1, len(accModel_Train)+1)), accModel_Train, color='r', label="Training Accuracy")
plt.plot(list(range(1, len(accModel_val)+1)), accModel_val, color='g', label="Validation Accuracy")
plt.legend()
plt.savefig(WeightPath + 'AccTrainVal.png')

# Prediction Test

In [None]:
model_conv.eval()
totalSize_test = 0
modelLoss_test = 0.0
modelAcc_test = 0.0

for inputs, labels in test_dataloader:
    inputs = inputs.type(torch.FloatTensor).cuda()
    labels = labels.cuda()

    y = model_conv(inputs)
    outp, preds = torch.max(y, 1)
    lossCNN = criterion(y, labels)

    modelLoss_test += lossCNN.item() * inputs.size(0)
    totalSize_test += inputs.size(0)
    modelAcc_test += torch.sum(preds == labels.data).item()

modelLoss_epoch_test = modelLoss_test/totalSize_test
modelAcc_epoch_test = modelAcc_test/totalSize_test

print(f"Loss: {modelLoss_epoch_test}")
print(f"Acc: {modelAcc_epoch_test}")


# Model Ensemble

#### Calculation of the average loss and accuracy across the various models

In [None]:
losses = []
accuracies = []

model_weights_folder = '/kaggle/working/cosineWeights/'
model_files = os.listdir(model_weights_folder)

for model_file in model_files:
    model_path = os.path.join(model_weights_folder, model_file)
    
    # Carica il modello
    model_conv.load_state_dict(torch.load(model_path))
    model_conv.cuda() 
    model_conv.eval()
    
    totalSize_test = 0
    modelLoss_test = 0.0
    modelAcc_test = 0.0

    for inputs, labels in test_dataloader:
        inputs = inputs.type(torch.FloatTensor).cuda()
        labels = labels.cuda()

        y = model_conv(inputs)
        outp, preds = torch.max(y, 1)
        lossCNN = criterion(y, labels)

        modelLoss_test += lossCNN.item() * inputs.size(0)
        totalSize_test += inputs.size(0)
        modelAcc_test += torch.sum(preds == labels.data).item()

    # Calcola la loss e l'accuracy del modello corrente
    modelLoss_epoch_test = modelLoss_test / totalSize_test
    modelAcc_epoch_test = modelAcc_test / totalSize_test
    
    # Aggiungi la loss e l'accuracy alla lista
    losses.append(modelLoss_epoch_test)
    accuracies.append(modelAcc_epoch_test)

ensemble_loss = sum(losses) / len(losses)
ensemble_accuracy = sum(accuracies) / len(accuracies)

print(f"Ensemble Loss: {ensemble_loss}")
print(f"Ensemble Accuracy: {ensemble_accuracy}")


#### Prediction Example Using Most Voted Class

In [None]:
final_test_dir = "/kaggle/input/dataset-mc3/TestSet"
listImage = os.listdir(final_test_dir)
classes = ['CL', 'BR', 'DA', 'RA', 'SF', 'SH']

model_weights_folder = '/kaggle/working/cosineWeights/'
model_files = os.listdir(model_weights_folder)

models = []  
for model_file in model_files:  

    model_conv.load_state_dict(torch.load(model_file))
    model_conv = model_conv.to(device)
    model_conv.eval()
    models.append(model_conv)

predictions = []

for input in listImage:
    img = Image.open(os.path.join(final_test_dir, input))
    img = data_transform_test(img).to(device)
    img = img.unsqueeze_(0)
    
    individual_predictions = []
    
    for model in models:
        outputs = model_conv(img)
        _, predicted = torch.max(outputs, 1)
        individual_predictions.append(predicted.item())
    
    final_prediction = max(set(individual_predictions), key=individual_predictions.count)
    
    predictions.append({'RowID': input.split('.')[0], 'Class': classes[final_prediction]})

test_df = pd.DataFrame(predictions)
print(test_df)


# Retraining on the whole dataset

In [None]:
outputPath = "/kaggle/working/weights/resnet50/full_training_weights"

startEpoch = 1
best_acc = 0
best_loss = 0
best_epoca = 0

train_full_dataset(train_dataloader, startEpoch, numEpochs, model_conv, criterionCNN, optimizer_conv, scheduler, best_acc, best_loss, best_epoca, outputPath)

# Kaggle Final Submission

In [None]:
model_conv.load_state_dict(torch.load('/kaggle/working/weights/resnet50/full_training_weights/best_model_weights.pth'))

model_conv = model_conv.cuda()
model_conv.eval()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

final_test_dir = "/kaggle/input/dataset-mc3/TestSet"

Test = pd.DataFrame()
listImage = os.listdir(final_test_dir)
classes = ['CL','BR','DA','RA','SF','SH']

for input in listImage:
  img = Image.open("/kaggle/input/dataset-mc3/TestSet" + input)
  img = data_transform_test(img).to(device)
  img = img.unsqueeze_(0)
  outputs = model_conv(img)
  _, predicted = torch.max(outputs, 1)
  Test = Test._append({'RowID': input.split('.')[0],
                       'Class': (classes[predicted.item()])},ignore_index=True)

print(Test)
    