In [None]:
import torch
import torchvision
import numpy as np
from sklearn.model_selection import train_test_split
import sys
from torchvision import datasets, transforms, models
import torch.nn as nn
import torch.optim as optim
import time
import copy
import matplotlib.pyplot as plt
import os
import random

seed=42
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

KeyboardInterrupt: ignored

In [None]:
# Normalisation des images pour les modèles pré-entraînés PyTorch

mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])

data_transforms = transforms.Compose([
    transforms.Resize([224, 224]),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

In [None]:
train_directory = "/content/gdrive/MyDrive/GitHub/ProjetFromage/augmentated_data/train/"
test_directory = "/content/gdrive/MyDrive/GitHub/ProjetFromage/augmentated_data/test/"
dataset_train_full = datasets.ImageFolder(train_directory, data_transforms)
dataset_test = datasets.ImageFolder(test_directory, data_transforms)

# split en train, val et test à partir de la liste complète
np.random.seed(42)
samples_train, samples_val = train_test_split(dataset_train_full.samples,test_size=0.2)
samples_test = dataset_test.samples

print("Nombre d'images de train : %i" % len(samples_train))
print("Nombre d'images de val : %i" % len(samples_val))
print("Nombre d'images de test : %i" % len(samples_test))

In [None]:
BATCH_SIZE = 16

# on définit les datasets et loaders pytorch à partir des listes d'images de train / val / test
dataset_train = datasets.ImageFolder(train_directory, data_transforms)
dataset_train.samples = samples_train
dataset_train.imgs = samples_train
loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

dataset_val = datasets.ImageFolder(train_directory, data_transforms)
dataset_val.samples = samples_val
dataset_val.imgs = samples_val

torch.manual_seed(42)

In [None]:
# détermination du nombre de classes
# vérification que les labels sont bien dans [0, nb_classes]
labels=[x[1] for x in samples_train]
if np.min(labels) != 0:
    print("Error: labels should start at 0 (min is %i)" % np.min(labels))
    sys.exit(-1)
if np.max(labels) != (len(np.unique(labels))-1):
    print("Error: labels should go from 0 to Nclasses (max label = {}; Nclasse = {})".format(np.max(labels),len(np.unique(labels)))  )
    sys.exit(-1)
nb_classes = np.max(labels)+1
print("Apprentissage sur {} classes".format(nb_classes))

Apprentissage sur 43 classes


In [None]:
# on définit le device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(torch.cuda.is_available())

True


In [None]:
# on définit une fonction d'évaluation
def evaluate(model, dataset):
    avg_loss = 0.
    avg_accuracy = 0
    loader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=False, num_workers=2)
    for data in loader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        
        loss = criterion(outputs, labels)
        _, preds = torch.max(outputs, 1)
        n_correct = torch.sum(preds == labels)
        
        avg_loss += loss.item()
        avg_accuracy += n_correct
        
    return avg_loss / len(dataset), float(avg_accuracy) / len(dataset)

# fonction d'entraînement du modèle
PRINT_LOSS = False

def train_model(model, loader_train, data_val, optimizer, criterion, n_epochs=10, scheduler=None):
  
    since = time.time()
    best_acc=0
    best_model_wts = copy.deepcopy(model.state_dict())
    saved_acc=[]

    for epoch in range(n_epochs): 
        batch=0
        print("EPOCH % i" % epoch)
        for i, data in enumerate(loader_train):
            batch+=1
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device) 
            optimizer.zero_grad() 
            outputs = model(inputs) 
            
            loss = criterion(outputs, labels) 
            if PRINT_LOSS:
                model.train(False)
                loss_val, accuracy = evaluate(model, data_val)
                model.train(True)
                print("{} loss train: {:1.4f}\t val {:1.4f}\tAcc (val): {:.1%}".format(i, loss.item(), loss_val, accuracy   ))
            
            loss.backward() 
            optimizer.step()
            if scheduler:
              scheduler.step()
            #print(f'Batch {batch} done')

        model.train(False)
        loss_val, accuracy = evaluate(model, data_val)
        saved_acc.append(accuracy)

        #early stopping
        if accuracy>best_acc:
          best_acc=accuracy
          best_model_wts = copy.deepcopy(model.state_dict())
        model.train(True)
        print("{} loss train: {:1.4f}\t val {:1.4f}\tAcc (val): {:.1%}".format(i, loss.item(), loss_val, accuracy   ))

    model.load_state_dict(best_model_wts)
    return best_acc


In [None]:
#===== Transfer learning "simple" (sans fine tuning) =====

weight_decay = 0

my_net = models.mobilenet_v2(pretrained=True)

for param in my_net.parameters():
    param.requires_grad = False

my_net.classifier[1] = nn.Linear(in_features=my_net.classifier[1].in_features, out_features=nb_classes, bias=True)
my_net.to(device) 
my_net.train(True) 

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(my_net.classifier[1].parameters(), lr=0.0015, weight_decay=weight_decay)
lambda_ = lambda x: 0.9985 ** x
scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_)

print("Apprentissage en transfer learning")
my_net.train(True)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
best_acc=train_model(my_net, loader_train, dataset_val, optimizer, criterion, n_epochs=10, scheduler=scheduler)
print(f'Best Acc Val = {best_acc}\n')

# évaluation
my_net.train(False)
loss, accuracy = evaluate(my_net, dataset_test)
print("Accuracy (test): %.1f%%" % (100 * accuracy))

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth


  0%|          | 0.00/13.6M [00:00<?, ?B/s]

Apprentissage en transfer learning
EPOCH  0
857 loss train: 2.2705	 val 0.0784	Acc (val): 68.2%
EPOCH  1
857 loss train: 1.2318	 val 0.0702	Acc (val): 71.8%
EPOCH  2
857 loss train: 1.0618	 val 0.0679	Acc (val): 72.4%
EPOCH  3
857 loss train: 1.2006	 val 0.0673	Acc (val): 72.3%
EPOCH  4
857 loss train: 1.3226	 val 0.0674	Acc (val): 72.1%
EPOCH  5
857 loss train: 0.8340	 val 0.0677	Acc (val): 72.3%
EPOCH  6
857 loss train: 0.8182	 val 0.0676	Acc (val): 72.1%
EPOCH  7
857 loss train: 1.0705	 val 0.0677	Acc (val): 72.5%
EPOCH  8
857 loss train: 1.9723	 val 0.0672	Acc (val): 72.3%
EPOCH  9
857 loss train: 0.7195	 val 0.0677	Acc (val): 72.2%
Best Acc Val = 0.7249417249417249

Accuracy (test): 68.6%


In [None]:
torch.cuda.empty_cache()

#===== Fine tuning =====

weight_decay = 0.0001

# on réinitialise MobileNet
my_net_ft = models.mobilenet_v2(pretrained=True)
my_net_ft.classifier[1] = nn.Linear(in_features=my_net_ft.classifier[1].in_features, out_features=nb_classes, bias=True)
my_net_ft.to(device)

# cette fois on veut updater tous les paramètres

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(my_net_ft.parameters(), lr=0.0015, momentum=0.9, weight_decay=weight_decay)

lambda_ = lambda x: 0.9985 ** x
scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_)

# on ré-entraîne
my_net_ft.train(True)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
best_acc=train_model(my_net_ft, loader_train, dataset_val, optimizer, criterion, n_epochs=10, scheduler=scheduler)

# on ré-évalue les performances
my_net_ft.train(False)
loss, accuracy = evaluate(my_net_ft, dataset_test)
print("Accuracy (test): %.1f%%" % (100 * accuracy))

PATH="/content/gdrive/MyDrive/GitHub/ProjetFromage/MobileNetFT_weights.pth"
torch.save(my_net_ft.state_dict(), PATH)
torch.save(my_net_ft, "/content/gdrive/MyDrive/GitHub/ProjetFromage/MobileNetFT_model.pth")

EPOCH  0
857 loss train: 1.3494	 val 0.0472	Acc (val): 80.9%
EPOCH  1
857 loss train: 0.7327	 val 0.0352	Acc (val): 85.8%
EPOCH  2
857 loss train: 0.5113	 val 0.0323	Acc (val): 87.3%
EPOCH  3
857 loss train: 0.5597	 val 0.0323	Acc (val): 87.2%
EPOCH  4
857 loss train: 0.4426	 val 0.0320	Acc (val): 87.4%
EPOCH  5
857 loss train: 0.3108	 val 0.0322	Acc (val): 87.1%
EPOCH  6
857 loss train: 0.1863	 val 0.0317	Acc (val): 87.6%
EPOCH  7
857 loss train: 0.4172	 val 0.0320	Acc (val): 87.3%
EPOCH  8
857 loss train: 0.9946	 val 0.0315	Acc (val): 87.5%
EPOCH  9
857 loss train: 0.2549	 val 0.0322	Acc (val): 87.1%
Accuracy (test): 78.6%


In [None]:
new_model=torch.load("/content/gdrive/MyDrive/GitHub/ProjetFromage/ResNetTL_model.pth")
criterion = nn.CrossEntropyLoss()
loss, accuracy = evaluate(new_model, dataset_test)
print("Accuracy (test): %.1f%%" % (100 * accuracy))

new_model=torch.load("/content/gdrive/MyDrive/GitHub/ProjetFromage/MobileNetTL_model.pth")
criterion = nn.CrossEntropyLoss()
loss, accuracy = evaluate(new_model, dataset_test)
print("Accuracy (test): %.1f%%" % (100 * accuracy))

Accuracy (test): 64.2%
Accuracy (test): 68.7%
