# 102 flowers features

Le but est d'utiliser un modèle préentrainé afin d'extraire les features (qui contiennent plus d'informations que les prédictions finales) du dataset 102flowers et d'entrainer ces derniers.

### Import modules

In [6]:
import torch
import torchvision
from torch.utils.data import Dataset
from PIL import Image
from sklearn.manifold import TSNE
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

ModuleNotFoundError: No module named 'torch'

### Load Data

In [4]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

data_dir = 'data'
image_datasets= {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['train', 'valid']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'valid']}


NameError: name 'transforms' is not defined

### Création du dataloaders (images)

In [5]:
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                             shuffle=False, num_workers=4)
              for x in ['train', 'valid']}
class_names = image_datasets['train'].classes
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(dataloaders)

NameError: name 'torch' is not defined

### Features Extractor

In [4]:
model_conv = torchvision.models.resnet18(pretrained=True)
for param in model_conv.parameters():
    param.requires_grad = False

# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Identity(num_ftrs, 2)
model_conv = model_conv.to(device)



### Visualizing the model predictions

In [2]:
def visualize_model(model,dataloader):
    L_features=[]
    model.eval()
    with torch.no_grad():
        for i, (inputs,labels) in enumerate(dataloader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            L_features.append(outputs)
    return L_features
L_features_train=visualize_model(model_conv,dataloaders['train'])
L_features_val=visualize_model(model_conv,dataloaders['valid'])

NameError: name 'model_conv' is not defined

### Creation des vecteurs features

In [17]:
L_features_train[0].shape # 4images par 4 images, 512 features

torch.Size([4, 512])

In [18]:
L_features_val[0].shape  # 4images par 4 images, 512 features

torch.Size([4, 512])

In [19]:
features_val=torch.vstack(L_features_val)
features_train=torch.vstack(L_features_train)
torch.vstack(L_features_train).shape #6552 features

torch.Size([6552, 512])

In [9]:
torch.vstack(L_features_val).shape #818 features

torch.Size([818, 512])

### Targets

In [1]:
def get_targets(dataloader):
    target=[]
    for i, (inputs, labels) in enumerate(dataloader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            labels=labels.tolist()
            for i in labels:
                target.append(i)
    return target
target_train=get_targets(dataloaders['train'])
target_val=get_targets(dataloaders['valid'])

NameError: name 'dataloaders' is not defined

In [11]:
len(target_train) #6552 targets: ok

6552

In [12]:
len(target_val) #818 targets: ok

818

### Creation des dataloaders (features)

In [13]:
class FeaturesDataset(torch.utils.data.Dataset):
    def __init__(self,features,targets):
        self.features=features
        self.targets=targets
    def __len__(self):
        return len(self.features)
    def __getitem__(self,i):
        return self.features[i],self.targets[i]

    
x1= FeaturesDataset(features_train,torch.tensor(target_train))
x2= FeaturesDataset(features_val,torch.tensor(target_val))
dataset_features={'train': x1 ,'valid': x2 }
dataset_sizes = {x: len(dataset_features[x]) for x in ['train', 'valid']}
dataloaders_features={'train': torch.utils.data.DataLoader(x1, batch_size=4, shuffle=True, num_workers=0),
            'valid': torch.utils.data.DataLoader(x2, batch_size=4, shuffle=False, num_workers=0)}
next(iter(dataloaders_features['train']))

[tensor([[0.9124, 0.8139, 1.1766,  ..., 0.4598, 0.5962, 1.4540],
         [0.1968, 0.3435, 0.2905,  ..., 0.1029, 0.2143, 2.4981],
         [0.1545, 0.1060, 0.1457,  ..., 0.5070, 2.4660, 0.3419],
         [0.2493, 0.0217, 1.0103,  ..., 0.1774, 0.6930, 0.7358]]),
 tensor([85, 30, 43, 51])]

### Fonction d'entrainement

In [14]:
num_ftrs=512
model = nn.Linear(num_ftrs, 102)
criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opposed to before.
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

def train_model(model, criterion, optimizer, scheduler, dataloaders, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # deep copy the model
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model
print(train_model(model, criterion, optimizer, exp_lr_scheduler, dataloaders_features, num_epochs=25))


Epoch 0/24
----------
train Loss: 2.5005 Acc: 0.4605
valid Loss: 1.3140 Acc: 0.7262

Epoch 1/24
----------
train Loss: 1.0757 Acc: 0.7842
valid Loss: 0.8083 Acc: 0.8386

Epoch 2/24
----------
train Loss: 0.7470 Acc: 0.8539
valid Loss: 0.6416 Acc: 0.8692

Epoch 3/24
----------
train Loss: 0.5871 Acc: 0.8871
valid Loss: 0.5327 Acc: 0.8851

Epoch 4/24
----------
train Loss: 0.4875 Acc: 0.9083
valid Loss: 0.4680 Acc: 0.9010

Epoch 5/24
----------
train Loss: 0.4187 Acc: 0.9264
valid Loss: 0.4214 Acc: 0.9059

Epoch 6/24
----------
train Loss: 0.3690 Acc: 0.9364
valid Loss: 0.4080 Acc: 0.9095

Epoch 7/24
----------
train Loss: 0.3000 Acc: 0.9605
valid Loss: 0.3752 Acc: 0.9230

Epoch 8/24
----------
train Loss: 0.2920 Acc: 0.9640
valid Loss: 0.3723 Acc: 0.9242

Epoch 9/24
----------
train Loss: 0.2884 Acc: 0.9638
valid Loss: 0.3729 Acc: 0.9267

Epoch 10/24
----------
train Loss: 0.2858 Acc: 0.9657
valid Loss: 0.3721 Acc: 0.9254

Epoch 11/24
----------
train Loss: 0.2829 Acc: 0.9664
valid Loss