In [1]:
import torch
import torchvision
import numpy as np
from sklearn.model_selection import train_test_split
import sys
from torchvision import datasets, transforms, models
import torch.nn as nn
import torch.optim as optim
import time
import copy
import matplotlib.pyplot as plt
import os
import random

seed=42
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [4]:
# Normalisation des images pour les modèles pré-entraînés PyTorch

mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])

data_transforms = transforms.Compose([
    transforms.Resize([224, 224]),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

In [5]:
train_directory = "/content/gdrive/MyDrive/GitHub/ProjetFromage/augmentated_data/train/"
test_directory = "/content/gdrive/MyDrive/GitHub/ProjetFromage/augmentated_data/test/"
dataset_train_full = datasets.ImageFolder(train_directory, data_transforms)
dataset_test = datasets.ImageFolder(test_directory, data_transforms)

# split en train, val et test à partir de la liste complète
np.random.seed(42)
samples_train, samples_val = train_test_split(dataset_train_full.samples,test_size=0.2)
samples_test = dataset_test.samples

print("Nombre d'images de train : %i" % len(samples_train))
print("Nombre d'images de val : %i" % len(samples_val))
print("Nombre d'images de test : %i" % len(samples_test))

Nombre d'images de train : 13728
Nombre d'images de val : 3432
Nombre d'images de test : 954


In [6]:
BATCH_SIZE = 16

# on définit les datasets et loaders pytorch à partir des listes d'images de train / val / test
dataset_train = datasets.ImageFolder(train_directory, data_transforms)
dataset_train.samples = samples_train
dataset_train.imgs = samples_train
loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

dataset_val = datasets.ImageFolder(train_directory, data_transforms)
dataset_val.samples = samples_val
dataset_val.imgs = samples_val

torch.manual_seed(42)

<torch._C.Generator at 0x7ff7ad2beeb0>

In [7]:
# détermination du nombre de classes
# vérification que les labels sont bien dans [0, nb_classes]
labels=[x[1] for x in samples_train]
if np.min(labels) != 0:
    print("Error: labels should start at 0 (min is %i)" % np.min(labels))
    sys.exit(-1)
if np.max(labels) != (len(np.unique(labels))-1):
    print("Error: labels should go from 0 to Nclasses (max label = {}; Nclasse = {})".format(np.max(labels),len(np.unique(labels)))  )
    sys.exit(-1)
nb_classes = np.max(labels)+1
print("Apprentissage sur {} classes".format(nb_classes))

Apprentissage sur 43 classes


In [8]:
# on définit le device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(torch.cuda.is_available())

True


In [20]:
# on définit une fonction d'évaluation
def evaluate(model, dataset):
    avg_loss = 0.
    avg_accuracy = 0
    loader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=False, num_workers=2)
    for data in loader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        
        loss = criterion(outputs, labels)
        _, preds = torch.max(outputs, 1)
        n_correct = torch.sum(preds == labels)
        
        avg_loss += loss.item()
        avg_accuracy += n_correct
        
    return avg_loss / len(dataset), float(avg_accuracy) / len(dataset)

# fonction d'entraînement du modèle
PRINT_LOSS = False

def train_model(model, loader_train, data_val, optimizer, criterion, n_epochs=10, scheduler=None):
  
    since = time.time()
    best_acc=0
    best_model_wts = copy.deepcopy(model.state_dict())
    saved_acc=[]

    for epoch in range(n_epochs): 
        batch=0
        print("EPOCH % i" % epoch)
        for i, data in enumerate(loader_train):
            batch+=1
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device) 
            optimizer.zero_grad() 
            outputs = model(inputs) 
            
            loss = criterion(outputs, labels) 
            if PRINT_LOSS:
                model.train(False)
                loss_val, accuracy = evaluate(model, data_val)
                model.train(True)
                print("{} loss train: {:1.4f}\t val {:1.4f}\tAcc (val): {:.1%}".format(i, loss.item(), loss_val, accuracy   ))
            
            loss.backward() 
            optimizer.step()
            if scheduler:
              scheduler.step()
            #print(f'Batch {i} done')

        model.train(False)
        loss_val, accuracy = evaluate(model, data_val)
        saved_acc.append(accuracy)

        #early stopping
        if accuracy>best_acc:
          best_acc=accuracy
          best_model_wts = copy.deepcopy(model.state_dict())
        model.train(True)
        print("{} loss train: {:1.4f}\t val {:1.4f}\tAcc (val): {:.1%}".format(i, loss.item(), loss_val, accuracy   ))

    model.load_state_dict(best_model_wts)
    return best_acc


In [21]:
#===== Transfer learning "simple" (sans fine tuning) =====

weight_decay = 0

my_net = models.mobilenet_v2(pretrained=True)

for param in my_net.parameters():
    param.requires_grad = False

my_net.classifier[1] = nn.Linear(in_features=my_net.classifier[1].in_features, out_features=nb_classes, bias=True)
my_net.to(device) 
my_net.train(True) 

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(my_net.classifier[1].parameters(), lr=0.0015, weight_decay=weight_decay)
lambda_ = lambda x: 0.9985 ** x
scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_)

print("Apprentissage en transfer learning")
my_net.train(True)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
best_acc=train_model(my_net, loader_train, dataset_val, optimizer, criterion, n_epochs=10, scheduler=scheduler)
print(f'Best Acc Val = {best_acc}\n')

# évaluation
my_net.train(False)
loss, accuracy = evaluate(my_net, dataset_test)
print("Accuracy (test): %.1f%%" % (100 * accuracy))

PATH="/content/gdrive/MyDrive/GitHub/ProjetFromage/MobileNetTL_weights.pth"
torch.save(my_net.state_dict(), PATH)
torch.save(my_net, "/content/gdrive/MyDrive/GitHub/ProjetFromage/MobileNetTL_model.pth")

Apprentissage en transfer learning
EPOCH  0
857 loss train: 2.3243	 val 0.0783	Acc (val): 68.3%
EPOCH  1
857 loss train: 1.2558	 val 0.0700	Acc (val): 71.9%
EPOCH  2
857 loss train: 1.0760	 val 0.0678	Acc (val): 72.9%
EPOCH  3
857 loss train: 1.2354	 val 0.0672	Acc (val): 72.6%
EPOCH  4
857 loss train: 1.3299	 val 0.0673	Acc (val): 73.0%
EPOCH  5
857 loss train: 0.8817	 val 0.0676	Acc (val): 72.8%
EPOCH  6
857 loss train: 0.7641	 val 0.0675	Acc (val): 72.6%
EPOCH  7
857 loss train: 1.0760	 val 0.0676	Acc (val): 73.0%
EPOCH  8
857 loss train: 1.9474	 val 0.0671	Acc (val): 72.5%
EPOCH  9
857 loss train: 0.7266	 val 0.0676	Acc (val): 72.5%
Best Acc Val = 0.7298951048951049

Accuracy (test): 68.7%


In [27]:
new_model=torch.load("/content/gdrive/MyDrive/GitHub/ProjetFromage/MobileNetTL_model.pth")
loss, accuracy = evaluate(new_model, dataset_test)
print("Accuracy (test): %.1f%%" % (100 * accuracy))


Accuracy (test): 68.7%


In [29]:
new_model2=torch.load("/content/gdrive/MyDrive/GitHub/ProjetFromage/ResNetTL_model.pth")
loss, accuracy = evaluate(new_model2, dataset_test)
print("Accuracy (test): %.1f%%" % (100 * accuracy))



Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7ff7ac28f680>
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7ff7ac28f680>
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child

Accuracy (test): 4.2%


In [None]:
torch.cuda.empty_cache()

#===== Fine tuning =====

weight_decay = 0

# on réinitialise MobileNet
my_net_ft = models.mobilenet_v2(pretrained=True)
my_net_ft.classifier[1] = nn.Linear(in_features=my_net_ft.classifier[1].in_features, out_features=nb_classes, bias=True)
my_net_ft.to(device)

# cette fois on veut updater tous les paramètres

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(my_net_ft.parameters(), lr=0.0015, momentum=0.9, weight_decay=weight_decay)

lambda_ = lambda x: 0.9985 ** x
scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_)

# on ré-entraîne
my_net_ft.train(True)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
best_acc=train_model(my_net_ft, loader_train, dataset_val, optimizer, criterion, n_epochs=10, scheduler=scheduler)

# on ré-évalue les performances
my_net_ft.train(False)
loss, accuracy = evaluate(my_net_ft, dataset_test)
print("Accuracy (test): %.1f%%" % (100 * accuracy))

EPOCH  0
857 loss train: 1.3451	 val 0.0477	Acc (val): 80.7%
EPOCH  1
857 loss train: 0.5658	 val 0.0355	Acc (val): 85.7%
EPOCH  2
857 loss train: 0.5634	 val 0.0326	Acc (val): 86.7%
EPOCH  3
857 loss train: 0.4309	 val 0.0326	Acc (val): 87.3%
EPOCH  4
857 loss train: 0.5283	 val 0.0322	Acc (val): 87.1%
EPOCH  5
857 loss train: 0.4434	 val 0.0324	Acc (val): 87.0%
EPOCH  6
857 loss train: 0.2313	 val 0.0320	Acc (val): 87.0%
EPOCH  7
857 loss train: 0.3795	 val 0.0321	Acc (val): 87.0%
EPOCH  8
857 loss train: 0.9986	 val 0.0321	Acc (val): 87.6%
EPOCH  9
857 loss train: 0.2658	 val 0.0323	Acc (val): 86.7%
Accuracy (test): 79.5%
