### Imports

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from urllib.request import urlopen
from PIL import Image
import torch
from transformers import AutoFeatureExtractor, ResNetForImageClassification, ResNetModel
import os
from PIL import Image
from torch.utils.data import TensorDataset, DataLoader
import time
import numpy as np
device = 'cuda'

### Load Data

In [None]:
# Chemin du répertoire racine
root_dir = '/content/drive/MyDrive/ColabNotebooks/Dataset_1'

# Dictionnaire pour stocker les images par sous-dossier
image_dict = {}

# Parcourir tous les sous-dossiers
for root, dirs, files in os.walk(root_dir):
    for file in files:
        # Construire le chemin complet du fichier
        file_path = os.path.join(root, file)

        # Vérifier si le fichier est une image en fonction de l'extension (par exemple, .png)
        if file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
            # Ouvrir l'image avec Pillow
            image = Image.open(file_path)

            # Convertir l'image en format JPG (si elle n'est pas déjà en JPG)
            if image.format != "JPEG":
                image = image.convert("RGB")

            # Obtenez le nom du sous-dossier parent
            parent_dir = os.path.basename(os.path.dirname(file_path))

            # Vérifiez si le sous-dossier existe dans le dictionnaire, sinon créez-le
            if parent_dir not in image_dict:
                image_dict[parent_dir] = []

            # Ajouter l'image à la liste du sous-dossier correspondant
            image = image.crop((54, 34, 390, 253))
            image_dict[parent_dir].append(np.array(image))


In [None]:
blues_list = image_dict['blues']
classical_list_GTZAN = image_dict['classical']
country_list = image_dict['country']
disco_list = image_dict['disco']
hiphop_list = image_dict['hiphop']
jazz_list = image_dict['jazz']
metal_list = image_dict['metal']
pop_list = image_dict['pop']
reggae_list = image_dict['reggae']
rock_list = image_dict['rock']


### Load the model

In [None]:
image_processor = AutoFeatureExtractor.from_pretrained("microsoft/resnet-18")
model = ResNetForImageClassification.from_pretrained("microsoft/resnet-18")

### Testing the model's prediction on data

In [None]:
inputs = image_processor(blues_list[0], return_tensors="pt")

print(inputs)

with torch.no_grad():
    logits = model(**inputs).logits

# model predicts one of the 1000 ImageNet classes
predicted_label = logits.argmax(-1).item()
print(model.config.id2label[predicted_label])

Of course it is a theater curtain.

### Fine tuning the model for our classes

In [None]:
num_each_class_sample=min(len(blues_list),len(classical_list_GTZAN),len(country_list),len(hiphop_list),len(jazz_list),len(pop_list),len(reggae_list),len(rock_list),len(metal_list),len(disco_list))

# Concaténez les listes verticalement (le long de l'axe 0)
tensor_x = torch.Tensor(np.concatenate((blues_list[:num_each_class_sample],
                                       classical_list_GTZAN[:num_each_class_sample],
                                       disco_list[:num_each_class_sample],
                                       country_list[:num_each_class_sample],
                                       hiphop_list[:num_each_class_sample],
                                       jazz_list[:num_each_class_sample],
                                       metal_list[:num_each_class_sample],
                                       pop_list[:num_each_class_sample],
                                       reggae_list[:num_each_class_sample],
                                       rock_list[:num_each_class_sample]), axis=0)).to(device)

tensor_y = torch.Tensor(np.concatenate((np.full(num_each_class_sample, 0),
                                       np.full(num_each_class_sample,  1),
                                       np.full(num_each_class_sample,  2),
                                       np.full(num_each_class_sample,  3),
                                       np.full(num_each_class_sample,  4),
                                       np.full(num_each_class_sample,  5),
                                       np.full(num_each_class_sample,  6),
                                       np.full(num_each_class_sample,  7),
                                       np.full(num_each_class_sample,  8),
                                       np.full(num_each_class_sample,  9)), axis=0)).to(device)


my_dataset = TensorDataset(tensor_x,tensor_y) # create your datset
my_dataloader = DataLoader(my_dataset,batch_size=10, shuffle=True) # create your dataloader


In [None]:
def train(net, train_dataloader, criterion, optimizer, scheduler=None, epochs=10, device=device, checkpoint_epochs=2):
    start = time.time()
    print(f'Training for {epochs} epochs on {device}')

    for epoch in range(1,epochs+1):
        print(f"Epoch {epoch}/{epochs}")

        net.train()  # put network in train mode for Dropout and Batch Normalization
        train_loss = torch.tensor(0., device=device)  # loss and accuracy tensors are on the GPU to avoid data transfers
        train_accuracy = torch.tensor(0., device=device)
        for X, y in train_dataloader:
            X = X.to(device)
            y = y.type(torch.LongTensor).to(device)
            preds = net(X)
            loss = criterion(preds, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            with torch.no_grad():
                train_loss += loss * train_dataloader.batch_size
                train_accuracy += (torch.argmax(preds, dim=1) == y).sum()

        if scheduler is not None:
            scheduler.step()

        print(f'Training loss: {train_loss/len(train_dataloader.dataset):.2f}')
        print(f'Training accuracy: {100*train_accuracy/len(train_dataloader.dataset):.2f}')


        if epoch%checkpoint_epochs==0:
            torch.save({
                'epoch': epoch,
                'state_dict': net.state_dict(),
                'optimizer': optimizer.state_dict(),
            }, './checkpoint.pth.tar')

        print()

    end = time.time()
    print(f'Total training time: {end-start:.1f} seconds')
    return net

In [None]:
from transformers import ResNetModel
import torch

# model definition
class Classifier_model(torch.nn.Module):
    # define model elements
    def __init__(self):
        super(Classifier_model, self).__init__()
        self.device = device
        self.image_processor = AutoFeatureExtractor.from_pretrained("microsoft/resnet-18",device=self.device)
        self.pre_trained_model = ResNetModel.from_pretrained("microsoft/resnet-18")
        resnet18_output_size=25088
        self.fc = torch.nn.Linear(resnet18_output_size, 10)
        self.activation = torch.nn.ReLU()

    # forward propagate input
    def forward(self, X):
        X = self.image_processor(X, return_tensors="pt").to(self.device)
        # print(X.pixel_value.is_cuda)
        X = self.pre_trained_model(**X).last_hidden_state.flatten(start_dim=1)
        X = self.activation(X)
        X = self.fc(X)

        return X.softmax(dim=1)

    def features_extractor(self, X):
        X = self.image_processor(X, return_tensors="pt").to(self.device)
        # print(X.pixel_value.is_cuda)
        X = self.pre_trained_model(**X).last_hidden_state.flatten(start_dim=1)

        return X.softmax(dim=1)

In [None]:
from transformers import ResNetModel
import torch

# model definition
class Classifier_model_2(torch.nn.Module):
    # define model elements
    def __init__(self,f_model):
        super(Classifier_model_2, self).__init__()
        self.device = device
        resnet18_output_size=25088
        self.f_model = f_model
        self.fc = torch.nn.Linear(resnet18_output_size, 2)
        self.activation = torch.nn.ReLU()

    # forward propagate input
    def forward(self, X):
        #X = self.image_processor(X, return_tensors="pt").to(self.device)
        # print(X.pixel_value.is_cuda)
        X = self.f_model.features_extractor(X)
        X = self.activation(X)
        X = self.fc(X)

        return X.softmax(dim=1)

### First Training

Entrainement du modèle sur les données de GTZAN : 10 classes - 100 fichiers par classes

In [None]:
# TRAINING 1

lr, weight_decay, epochs = 1e-5, 5e-4, 10

net = Classifier_model().to(device)

# Standard CrossEntropy Loss for multi-class classification problems
loss = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(net.parameters(),lr=lr, weight_decay=weight_decay)

net = train(net, my_dataloader, loss, optimizer, None, epochs, device)

In [None]:
torch.save(net, "/content/drive/MyDrive/ColabNotebooks/model_10classes_Clem_Colab.pt")

In [None]:

non_classical_list = np.concatenate((blues_list[:11], rock_list[:12], reggae_list[:11], country_list[:11], disco_list[:11], hiphop_list[:11], jazz_list[:11], metal_list[:11], pop_list[:11] ))
classical_list = classical_list_GTZAN


# Concaténez les listes verticalement (le long de l'axe 0)
tensor_x_2 = torch.Tensor(np.concatenate((classical_list,
                                       non_classical_list), axis=0)).to(device)

tensor_y_2 = torch.Tensor(np.concatenate((np.full(len(classical_list), 0),
                                       np.full(len(non_classical_list),  1)), axis=0)).to(device)


my_dataset_2 = TensorDataset(tensor_x_2,tensor_y_2) # create your datset
my_dataloader_2 = DataLoader(my_dataset_2,batch_size=10, shuffle=True) # create your dataloader

In [None]:
# Chemin du répertoire racine
import random
root_dir = '/content/drive/MyDrive/ColabNotebooks/mel'

# Dictionnaire pour stocker les images par sous-dossier
image_dict = {}

# Parcourir tous les sous-dossiers
for root, dirs, files in os.walk(root_dir):
    for file in files:
        # Construire le chemin complet du fichier
        file_path = os.path.join(root, file)

        # Vérifier si le fichier est une image en fonction de l'extension (par exemple, .png)
        if file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
            # Ouvrir l'image avec Pillow
            image = Image.open(file_path)

            # Convertir l'image en format JPG (si elle n'est pas déjà en JPG)
            if image.format != "JPEG":
                image = image.convert("RGB")

            # Obtenez le nom du sous-dossier parent
            parent_dir = os.path.basename(os.path.dirname(file_path))

            # Vérifiez si le sous-dossier existe dans le dictionnaire, sinon créez-le
            if parent_dir not in image_dict:
                image_dict[parent_dir] = []

            # Ajouter l'image à la liste du sous-dossier correspondant
            image = image.resize((336,219))
            image_dict[parent_dir].append(np.array(image))

classical_list_MG = image_dict['classic']
non_classical_list_MG = image_dict['non_classic']

percent_classical_MG = int(0.2*len(classical_list_MG))
percent_non_classical_MG = int(0.2*len(non_classical_list_MG))


evaluation_list_dic = []
for sample in classical_list_MG[:percent_classical_MG]:
    buffer = []
    buffer.append(sample)
    buffer.append(0)
    evaluation_list_dic.append(buffer)

for sample in non_classical_list_MG[:percent_non_classical_MG]:
    buffer = []
    buffer.append(sample)
    buffer.append(1)
    evaluation_list_dic.append(buffer)

random.shuffle(evaluation_list_dic)
evaluation_list = []
for sample in evaluation_list_dic:
    evaluation_list.append(sample[0])

activeL_classical_list = classical_list_MG[percent_classical_MG:]
activeL_non_classical_list = non_classical_list_MG[percent_non_classical_MG:]


In [None]:
lr, weight_decay, epochs = 1e-5, 5e-4, 10

net = torch.load("/content/drive/MyDrive/ColabNotebooks/model_10classes_Clem_Colab.pt")

net2 = Classifier_model_2(net).to(device)

# Standard CrossEntropy Loss for multi-class classification problems
loss = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(net2.parameters(),lr=lr, weight_decay=weight_decay)

net2 = train(net2, my_dataloader_2, loss, optimizer, None, epochs, device)

In [None]:
torch.save(net2, "/content/drive/MyDrive/ColabNotebooks/model_2_Clem_trainings_longEgale_Colab.pt")

### Evaluation

In [None]:
def Activedataloader(percent):
    tensor_x = torch.Tensor(np.concatenate((activeL_classical_list[:int(percent*len(activeL_classical_list))],
                                       non_classical_list_MG[:int(percent*len(activeL_non_classical_list))]), axis=0)).to(device)

    tensor_y = torch.Tensor(np.concatenate((np.full(int(percent*len(activeL_classical_list)), 0),
                                       np.full(int(percent*len(activeL_non_classical_list)),  1)), axis=0)).to(device)


    my_dataset = TensorDataset(tensor_x,tensor_y) # create your datset
    my_dataloader = DataLoader(my_dataset,batch_size=10, shuffle=True)
    return my_dataloader


In [None]:
torch.cuda.empty_cache()

model = torch.load("model_2_Clem_trainings_longEgale_Colab.pt")
model = model.eval()


test_data=torch.Tensor(np.array(evaluation_list)).to(device)

preds = model(test_data)

output=torch.argmax(preds, dim=1)

count = 0
for i in range(0,len(output)):
    if output[i].item() == evaluation_list_dic[i][1]:
        count+=1
print(f"Rand 20% accuracy : {(count/len(output))*100} %")
print(output)

# Active Learning

Dans cette partie, on cherche à déterminer quelle sera la stratégie la plus intéressante à utiliser en fonction du budget que l'on souhaite allouer au projet. Nous allons donc nous intéresser à plusieurs stratégies d'Active Learning. En parallèle, nous allons nous intéresser au pourcentage nécessaire, en fonction des stratégies sélectionnées, pour obtenir un taux de prédiction intéressant. Ceci aura pour effet de simuler le budget associé à chaque entraînement.  

In [None]:
from urllib.request import urlopen
from PIL import Image
import torch
from transformers import AutoFeatureExtractor, ResNetForImageClassification, ResNetModel
import os
from PIL import Image
from torch.utils.data import TensorDataset, DataLoader
import time
import numpy as np
device = 'cuda'
from transformers import ResNetModel
import torch

def train(net, train_dataloader, criterion, optimizer, scheduler=None, epochs=100, device=device, checkpoint_epochs=2, timeout=45):
    start = time.time()
    print(f'Training for {epochs} epochs on {device}')

    for epoch in range(1,epochs+1):
        print(f"Epoch {epoch}/{epochs}")

        net.train()  # put network in train mode for Dropout and Batch Normalization
        train_loss = torch.tensor(0., device=device)  # loss and accuracy tensors are on the GPU to avoid data transfers
        train_accuracy = torch.tensor(0., device=device)
        for X, y in train_dataloader:
            X = X.to(device)
            y = y.type(torch.LongTensor).to(device)
            preds = net(X)
            loss = criterion(preds, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            with torch.no_grad():
                train_loss += loss * train_dataloader.batch_size
                train_accuracy += (torch.argmax(preds, dim=1) == y).sum()

        if scheduler is not None:
            scheduler.step()

        print(f'Training loss: {train_loss/len(train_dataloader.dataset):.2f}')
        print(f'Training accuracy: {100*train_accuracy/len(train_dataloader.dataset):.2f}')


        if epoch%checkpoint_epochs==0:
            torch.save({
                'epoch': epoch,
                'state_dict': net.state_dict(),
                'optimizer': optimizer.state_dict(),
            }, './checkpoint.pth.tar')

        print()
        if((time.time() - start) >= timeout):
          break

    end = time.time()
    print(f'Total training time: {end-start:.1f} seconds')
    return net

# model definition
class Classifier_model(torch.nn.Module):
    # define model elements
    def __init__(self):
        super(Classifier_model, self).__init__()
        self.device = device
        self.image_processor = AutoFeatureExtractor.from_pretrained("microsoft/resnet-18",device=self.device)
        self.pre_trained_model = ResNetModel.from_pretrained("microsoft/resnet-18")
        resnet18_output_size=25088
        self.fc = torch.nn.Linear(resnet18_output_size, 10)
        self.activation = torch.nn.ReLU()

    # forward propagate input
    def forward(self, X):
        X = self.image_processor(X, return_tensors="pt").to(self.device)
        # print(X.pixel_value.is_cuda)
        X = self.pre_trained_model(**X).last_hidden_state.flatten(start_dim=1)
        X = self.activation(X)
        X = self.fc(X)

        return X.softmax(dim=1)

    def features_extractor(self, X):
        X = self.image_processor(X, return_tensors="pt").to(self.device)
        # print(X.pixel_value.is_cuda)
        X = self.pre_trained_model(**X).last_hidden_state.flatten(start_dim=1)

        return X.softmax(dim=1)

# model definition
class Classifier_model_2(torch.nn.Module):
    # define model elements
    def __init__(self,f_model):
        super(Classifier_model_2, self).__init__()
        self.device = device
        resnet18_output_size=25088
        self.f_model = f_model
        self.fc = torch.nn.Linear(resnet18_output_size, 2)
        self.activation = torch.nn.ReLU()

    # forward propagate input
    def forward(self, X):
        #X = self.image_processor(X, return_tensors="pt").to(self.device)
        # print(X.pixel_value.is_cuda)
        X = self.f_model.features_extractor(X)
        X = self.activation(X)
        X = self.fc(X)

        return X.softmax(dim=1)

def Activedataloader(percent, previousPercent):
    firstPoint_classical     = int(previousPercent * len(activeL_classical_list))
    lastPoint_classical      = int(percent * len(activeL_classical_list))
    firstPoint_non_classical = int(previousPercent * len(activeL_non_classical_list))
    lastPoint_non_classical  = int(percent * len(activeL_non_classical_list))


    tensor_x = torch.Tensor(np.concatenate((activeL_classical_list[firstPoint_classical:lastPoint_classical],
                                       non_classical_list_MG[firstPoint_non_classical:lastPoint_non_classical]), axis=0)).to(device)

    tensor_y = torch.Tensor(np.concatenate((np.full(lastPoint_classical - firstPoint_classical, 0),
                                       np.full(lastPoint_non_classical - firstPoint_non_classical,  1)), axis=0)).to(device)


    my_dataset = TensorDataset(tensor_x,tensor_y) # create your datset
    my_dataloader = DataLoader(my_dataset,batch_size=10, shuffle=True)
    return my_dataloader

# Chemin du répertoire racine
import random
root_dir = '/content/drive/MyDrive/ColabNotebooks/mel'

# Dictionnaire pour stocker les images par sous-dossier
image_dict = {}

# Parcourir tous les sous-dossiers
for root, dirs, files in os.walk(root_dir):
    for file in files:
        # Construire le chemin complet du fichier
        file_path = os.path.join(root, file)

        # Vérifier si le fichier est une image en fonction de l'extension (par exemple, .png)
        if file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
            # Ouvrir l'image avec Pillow
            image = Image.open(file_path)

            # Convertir l'image en format JPG (si elle n'est pas déjà en JPG)
            if image.format != "JPEG":
                image = image.convert("RGB")

            # Obtenez le nom du sous-dossier parent
            parent_dir = os.path.basename(os.path.dirname(file_path))

            # Vérifiez si le sous-dossier existe dans le dictionnaire, sinon créez-le
            if parent_dir not in image_dict:
                image_dict[parent_dir] = []

            # Ajouter l'image à la liste du sous-dossier correspondant
            image = image.resize((336,219))
            image_dict[parent_dir].append(np.array(image))

classical_list_MG = image_dict['classic']
non_classical_list_MG = image_dict['non_classic']

percent_classical_MG = int(0.2*len(classical_list_MG))
percent_non_classical_MG = int(0.2*len(non_classical_list_MG))


evaluation_list_dic = []
for sample in classical_list_MG[:percent_classical_MG]:
    buffer = []
    buffer.append(sample)
    buffer.append(0)
    evaluation_list_dic.append(buffer)

for sample in non_classical_list_MG[:percent_non_classical_MG]:
    buffer = []
    buffer.append(sample)
    buffer.append(1)
    evaluation_list_dic.append(buffer)

random.shuffle(evaluation_list_dic)
evaluation_list = []
for sample in evaluation_list_dic:
    evaluation_list.append(sample[0])

activeL_classical_list = classical_list_MG[percent_classical_MG:]
activeL_non_classical_list = non_classical_list_MG[percent_non_classical_MG:]


### Random picking

In [None]:
import os
import random
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# La fonction pour le random picking et l'évaluation
def random_picking(modelToTrain, evaluation_list,save_dir='/content/drive/MyDrive/ColabNotebooks/models/randomPicking'):

    # Créer un répertoire s'il n'existe pas pour y stocker les modèles
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    results = []

    # Pourcentages du dataset à sélectionner
    percentages = [0.02, 0.05,0.10, 0.20, 0.50, 0.70, 1.00]
    previousPercent = 0

    for percent in percentages:
        dataloader = Activedataloader(percent, previousPercent)

        # Entraîner le nouveau modèle
        lr, weight_decay, epochs,timeout = 1e-5, 5e-4, 1000, 30
        net = torch.load(modelToTrain)
        loss = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(net.parameters(),lr=lr, weight_decay=weight_decay)
        net2 = train(net, dataloader, loss, optimizer, None, epochs, device, timeout=timeout)
        torch.save(net2, "/content/drive/MyDrive/ColabNotebooks/models/randomPicking/random_pick_" + str(percent) + "_percent.pt")



        # Faire des prédictions sur la liste d'évaluation
        model = net2
        model.eval()
        test_data=torch.Tensor(np.array(evaluation_list)).to(device)
        with torch.no_grad():
          preds = model(test_data)
        output=torch.argmax(preds, dim=1)

        # Evaluation du modèle
        count = 0
        for i in range(0,len(output)):
            if output[i].item() == evaluation_list_dic[i][1]:
                count+=1
        print(f"Rand {percent} % accuracy :{(count/len(output))*100}) + %")
        print(output)

        accuracy = (count/len(output))*100, epochs

        # Ajouter les résultats à la liste
        results.append([percent, accuracy])
        previousPercent = percent
        # Utiliser le modèle précédent pour poursuivre l'entraînement
        modelToTrain = "/content/drive/MyDrive/ColabNotebooks/models/randomPicking/random_pick_" + str(percent) + "_percent.pt"

    # Créer un DataFrame à partir des résultats
    results_df = pd.DataFrame(results, columns=['Percentage', 'Accuracy'])

    # Sauvegarder les résultats dans un fichier CSV
    results_csv_path = os.path.join(save_dir, 'results.csv')
    results_df.to_csv(results_csv_path, index=False)

    return results_df

# Exemple d'utilisation avec le modèle de votre choix et le dataloader
# active_learning_random_picking(VotreModele, Activedataloader(0.2))



In [None]:
random_picking("/content/drive/MyDrive/ColabNotebooks/model_2_Clem_trainings_longEgale_Colab.pt", evaluation_list)

### Margin of Confidence

In [None]:
from google.colab import drive
drive.mount('/content/drive')

from urllib.request import urlopen
from PIL import Image
import torch
from transformers import AutoFeatureExtractor, ResNetForImageClassification, ResNetModel
import os
from PIL import Image
from torch.utils.data import TensorDataset, DataLoader
import time
import numpy as np
device = 'cuda'
from transformers import ResNetModel
import torch

def train(net, train_dataloader, criterion, optimizer, scheduler=None, epochs=100, device=device, checkpoint_epochs=2, timeout=45):
    start = time.time()
    print(f'Training for {epochs} epochs on {device}')

    for epoch in range(1,epochs+1):
        print(f"Epoch {epoch}/{epochs}")

        net.train()  # put network in train mode for Dropout and Batch Normalization
        train_loss = torch.tensor(0., device=device)  # loss and accuracy tensors are on the GPU to avoid data transfers
        train_accuracy = torch.tensor(0., device=device)
        for X, y in train_dataloader:
            X = X.to(device)
            y = y.type(torch.LongTensor).to(device)
            preds = net(X)
            loss = criterion(preds, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            with torch.no_grad():
                train_loss += loss * train_dataloader.batch_size
                train_accuracy += (torch.argmax(preds, dim=1) == y).sum()

        if scheduler is not None:
            scheduler.step()

        print(f'Training loss: {train_loss/len(train_dataloader.dataset):.2f}')
        print(f'Training accuracy: {100*train_accuracy/len(train_dataloader.dataset):.2f}')


        if epoch%checkpoint_epochs==0:
            torch.save({
                'epoch': epoch,
                'state_dict': net.state_dict(),
                'optimizer': optimizer.state_dict(),
            }, './checkpoint.pth.tar')

        print()
        if((time.time() - start) >= timeout):
          break

    end = time.time()
    print(f'Total training time: {end-start:.1f} seconds')
    return net

# model definition
class Classifier_model(torch.nn.Module):
    # define model elements
    def __init__(self):
        super(Classifier_model, self).__init__()
        self.device = device
        self.image_processor = AutoFeatureExtractor.from_pretrained("microsoft/resnet-18",device=self.device)
        self.pre_trained_model = ResNetModel.from_pretrained("microsoft/resnet-18")
        resnet18_output_size=25088
        self.fc = torch.nn.Linear(resnet18_output_size, 10)
        self.activation = torch.nn.ReLU()

    # forward propagate input
    def forward(self, X):
        X = self.image_processor(X, return_tensors="pt").to(self.device)
        # print(X.pixel_value.is_cuda)
        X = self.pre_trained_model(**X).last_hidden_state.flatten(start_dim=1)
        X = self.activation(X)
        X = self.fc(X)

        return X.softmax(dim=1)

    def features_extractor(self, X):
        X = self.image_processor(X, return_tensors="pt").to(self.device)
        # print(X.pixel_value.is_cuda)
        X = self.pre_trained_model(**X).last_hidden_state.flatten(start_dim=1)

        return X.softmax(dim=1)

# model definition
class Classifier_model_2(torch.nn.Module):
    # define model elements
    def __init__(self,f_model):
        super(Classifier_model_2, self).__init__()
        self.device = device
        resnet18_output_size=25088
        self.f_model = f_model
        self.fc = torch.nn.Linear(resnet18_output_size, 2)
        self.activation = torch.nn.ReLU()

    # forward propagate input
    def forward(self, X):
        #X = self.image_processor(X, return_tensors="pt").to(self.device)
        # print(X.pixel_value.is_cuda)
        X = self.f_model.features_extractor(X)
        X = self.activation(X)
        X = self.fc(X)

        return X.softmax(dim=1)

def Activedataloader(percent, previousPercent):
    firstPoint_classical     = int(previousPercent * len(activeL_classical_list))
    lastPoint_classical      = int(percent * len(activeL_classical_list))
    firstPoint_non_classical = int(previousPercent * len(activeL_non_classical_list))
    lastPoint_non_classical  = int(percent * len(activeL_non_classical_list))


    tensor_x = torch.Tensor(np.concatenate((activeL_classical_list[firstPoint_classical:lastPoint_classical],
                                       non_classical_list_MG[firstPoint_non_classical:lastPoint_non_classical]), axis=0)).to(device)

    tensor_y = torch.Tensor(np.concatenate((np.full(lastPoint_classical - firstPoint_classical, 0),
                                       np.full(lastPoint_non_classical - firstPoint_non_classical,  1)), axis=0)).to(device)


    my_dataset = TensorDataset(tensor_x,tensor_y) # create your datset
    my_dataloader = DataLoader(my_dataset,batch_size=10, shuffle=True)
    return my_dataloader

# Chemin du répertoire racine
import random
root_dir = '/content/drive/MyDrive/ColabNotebooks/mel'

# Dictionnaire pour stocker les images par sous-dossier
image_dict = {}

# Parcourir tous les sous-dossiers
for root, dirs, files in os.walk(root_dir):
    for file in files:
        # Construire le chemin complet du fichier
        file_path = os.path.join(root, file)

        # Vérifier si le fichier est une image en fonction de l'extension (par exemple, .png)
        if file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
            # Ouvrir l'image avec Pillow
            image = Image.open(file_path)

            # Convertir l'image en format JPG (si elle n'est pas déjà en JPG)
            if image.format != "JPEG":
                image = image.convert("RGB")

            # Obtenez le nom du sous-dossier parent
            parent_dir = os.path.basename(os.path.dirname(file_path))

            # Vérifiez si le sous-dossier existe dans le dictionnaire, sinon créez-le
            if parent_dir not in image_dict:
                image_dict[parent_dir] = []

            # Ajouter l'image à la liste du sous-dossier correspondant
            image = image.resize((336,219))
            image_dict[parent_dir].append(np.array(image))

classical_list_MG = image_dict['classic']
non_classical_list_MG = image_dict['non_classic']

percent_classical_MG = int(0.2*len(classical_list_MG))
percent_non_classical_MG = int(0.2*len(non_classical_list_MG))


evaluation_list_dic = []
for sample in classical_list_MG[:percent_classical_MG]:
    buffer = []
    buffer.append(sample)
    buffer.append(0)
    evaluation_list_dic.append(buffer)

for sample in non_classical_list_MG[:percent_non_classical_MG]:
    buffer = []
    buffer.append(sample)
    buffer.append(1)
    evaluation_list_dic.append(buffer)

random.shuffle(evaluation_list_dic)
evaluation_list = []
for sample in evaluation_list_dic:
    evaluation_list.append(sample[0])

activeL_classical_list = classical_list_MG[percent_classical_MG:]
activeL_non_classical_list = non_classical_list_MG[percent_non_classical_MG:]


In [None]:
# Margin Of Confidence
  # Prendre le full dataset
  # Avec le baseline, faire les prédictions
  # Une fois que les prédictions sont faites, Margin of Confidence
  # Grâce à Margin of Confidence, on obtient le dataset trié par intérêt.
  # On prend alors percent % de ce dataset trié et on entraîne le modèle.
  # On supprime ensuite ces samples du dataset original et on réitère pour le nouveau modèle obtenu.

# Fonctions à (ré)écrire :
  # Active Dataloader : Doit fournir les données sélectionnées par Margin Of Confidence au modèle pour l'entraînement
  # MarginOfConfidence : Calcule la margin of confidence à partir d'un dataset donné
  # prepareData : Prépare un nouveau dataset en fonction du pourcentage à voir pour le modèle


In [None]:
def MarginDataLoader(percent, previousPercent, uncertainties, dataset):
  """
  This function is used after having determined the Margin of Confidence of the base dataset.
  Keywords:
    percent         : The percentage of the base dataset to take account of
    previousPercent : The previous percentage that was used the last time this function was called
    uncertainties   : A dictionnary containing the uncertainties associated with each sample of dataset
    dataset         : The updated dataset containing only unseen samples
  Returns:
    my_dataloader   : pytorch compatible data corresponding to the dataset used for the model training
    dataset         : The new dataset containing unseen samples
  """

  # How many samples do I have to care about this time ?
  difference_in_percent = percent - previousPercent
  lastPoint_uncertainties = int(difference_in_percent * len(uncertainties))

  # Taking only that many samples into consideration
  samples_to_take = list(uncertainties.keys())[:lastPoint_uncertainties]
  print(samples_to_take[:10])
  # the indices inside the dataset of the most uncertained samples
  data = []
  for i in range(len(samples_to_take)):
    data.append(dataset[samples_to_take[i]])

  dataset = np.delete(dataset, samples_to_take, axis=0)

  # Now, we need to recreate two tensors knowing if each samples contained
  # inside the dataset is classical or not.
  new_active_classical = []
  new_active_non_classical = []
  for item in data:
      if any(np.array_equal(item, x) for x in activeL_classical_list):
          new_active_classical.append(item)
      else:
          new_active_non_classical.append(item)


  tensor_x = torch.Tensor(np.concatenate((new_active_classical, new_active_non_classical), axis = 0)).to(device)
  tensor_y = torch.Tensor(np.concatenate((np.full(len(new_active_classical), 0),
                                       np.full(len(new_active_non_classical),  1)), axis=0)).to(device)


  my_dataset = TensorDataset(tensor_x,tensor_y) # create your datset
  my_dataloader = DataLoader(my_dataset,batch_size=10, shuffle=True)
  return my_dataloader, dataset

In [None]:
def MarginOfConfidence(model, test_data):
  """
  Keywords:
    model : The model that gives predictions
    data  : The dataset to base the predictions on
    n     : the number of best candidates to select

  Returns:
    A sorted dictionnary containing the uncertainty score associated with a sample
  """
  uncertainty_dict = {}
  for i in range(len(test_data)):
    preds = model(test_data[i])
    preds_sorted = np.sort(preds[0].cpu().detach().numpy())
    uncertainty_dict[i] = (preds_sorted[0] - preds_sorted[1])
  res = dict(sorted(uncertainty_dict.items(),
                    key = lambda x: x[1], reverse = True))
  for key in list(res.keys())[:10]:
      print(f"{key}: {res[key]}")
  return res

In [None]:
import os
import random
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

def marginOfConfidence_Train(modelToTrain, evaluation_list,save_dir='/content/drive/MyDrive/ColabNotebooks/models/MarginOfConfidence'):

    # Créer un répertoire s'il n'existe pas pour y stocker les modèles
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    results = []

    # Pourcentages du dataset à sélectionner
    percentages = [0.02, 0.05,0.10, 0.20, 0.50, 0.70, 1.00]
    previousPercent = 0

    dataset = [activeL_classical_list, activeL_non_classical_list]

    for percent in percentages:
        print(f"""  for item in data:
      if any(np.array_equal(item, x) for x in activeL_classical_list):
          new_active_classical.append(item)
      else:
          new_active_non_classical.append(item)

        #############################
        #         NEW ROUND         #
        #     percent = {percent}   #
        #############################
        """)
        lr, weight_decay, epochs,timeout = 1e-5, 5e-4, 1000, 30
        net = torch.load(modelToTrain)
        loss = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(net.parameters(),lr=lr, weight_decay=weight_decay)

        # Active Learning
        print(f"""
        #############################
        #    Calculating Margin     #
        #############################
        """)
        uncertainties = MarginOfConfidence(net, dataset)
        dataloader, dataset = MarginDataLoader(percent, previousPercent, uncertainties, dataset)

        print(f"""
        #############################
        #       Begin Training      #
        #############################
        """)

        net2 = train(net, dataloader, loss, optimizer, None, epochs, device, timeout=timeout)
        torch.save(net2, "/content/drive/MyDrive/ColabNotebooks/models/MarginOfConfidence/margin_confidence_" + str(percent) + "_percent.pt")



        # Faire des prédictions sur la liste d'évaluation
        model = net2
        model.eval()
        test_data=torch.Tensor(np.array(evaluation_list)).to(device)
        with torch.no_grad():
          preds = model(test_data)
        output=torch.argmax(preds, dim=1)

        # Evaluation du modèle
        count = 0
        for i in range(0,len(output)):
            if output[i].item() == evaluation_list_dic[i][1]:
                count+=1
        print(f"Margin_Confidence {percent} % accuracy :{(count/len(output))*100}) + %")
        print(output)

        accuracy = (count/len(output))*100, epochs

        # Ajouter les résultats à la liste
        results.append([percent, accuracy])
        previousPercent = percent
        # Utiliser le modèle précédent pour poursuivre l'entraînement
        modelToTrain = "/content/drive/MyDrive/ColabNotebooks/models/MarginOfConfidence/margin_confidence_" + str(percent) + "_percent.pt"

    # Créer un DataFrame à partir des résultats
    results_df = pd.DataFrame(results, columns=['Percentage', 'Accuracy'])

    # Sauvegarder les résultats dans un fichier CSV
    results_csv_path = os.path.join(save_dir, 'results.csv')
    results_df.to_csv(results_csv_path, index=False)

    return results_df


In [None]:
marginOfConfidence_Train("/content/drive/MyDrive/ColabNotebooks/model_2_Clem_trainings_longEgale_Colab.pt", evaluation_list)

### Ratio Of Confidence