# Projet 7 du Parcours Machine Learning (1/2)

# Thème : Vision transformers

## 1. Baseline
    
    1.1 Import des données
    1.2 Fune-tuning des hyperparamètres du modèle
    1.3 Evaluation sur le jeu de test

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
from tensorflow.keras import regularizers
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from scipy.io import loadmat
import pandas as pd
from sklearn.model_selection import train_test_split


### 1.1 Import des données


Nous allons extraire 5 classes du jeu d'entraînement originel puis fine-tuner le modèle sur ces classes

In [3]:
%mkdir 'echantillon'

In [4]:
cp -R '/kaggle/input/stanford-car-dataset-by-classes-folder/car_data/car_data/train/Hyundai Santa Fe SUV 2012' '/root/echantillon'

In [5]:
cp -R '/kaggle/input/stanford-car-dataset-by-classes-folder/car_data/car_data/train/Aston Martin V8 Vantage Coupe 2012' '/root/echantillon'

In [6]:
cp -R '/kaggle/input/stanford-car-dataset-by-classes-folder/car_data/car_data/train/BMW 3 Series Sedan 2012' '/root/echantillon'

In [7]:
cp -R '/kaggle/input/stanford-car-dataset-by-classes-folder/car_data/car_data/train/Audi TTS Coupe 2012' '/root/echantillon'

In [8]:
cp -R '/kaggle/input/stanford-car-dataset-by-classes-folder/car_data/car_data/train/Chevrolet Corvette ZR1 2012' '/root/echantillon'

In [9]:
cp -R '/kaggle/input/stanford-car-dataset-by-classes-folder/car_data/car_data/train/Jeep Patriot SUV 2012' '/root/echantillon'

In [17]:
import glob
train_list = glob.glob('/root/echantillon/*/*.jpg')
test_list = glob.glob('/root/test/*/*.jpg')

In [18]:
labels = [train_list[x].split('/')[-2] for x in range(0,len(train_list))]

In [19]:
len(train_list)

In [20]:
# Split du train set en train set et val set
train_list, valid_list = train_test_split(train_list, 
                                          train_size=200,
                                          stratify=labels,
                                          random_state=42)

In [21]:
labels = [train_list[x].split('/')[-2] for x in range(0,len(train_list))]

In [23]:
mkdir 'valid'

In [24]:
import pathlib
valid_path = pathlib.Path('/root/valid')

In [25]:
for label in set(labels):
    os.mkdir(valid_path / label)

In [26]:
import shutil

In [27]:
for i in range(0, len(valid_list)):
    shutil.move(valid_list[i], valid_path/valid_list[i].split('/')[-2])

In [28]:
# Préparation des datasets
train_dataset = tf.keras.utils.image_dataset_from_directory (
            '/root/echantillon',
            image_size=(180, 180),
            batch_size=32)
val_dataset = tf.keras.utils.image_dataset_from_directory(
            '/root/valid',
            image_size=(180, 180),
            batch_size=32)
test_dataset = tf.keras.utils.image_dataset_from_directory(
            '/root/test',
            image_size=(180, 180),
            batch_size=32)

In [29]:
!pip install keras-tuner -q

In [30]:
# On fait de l'augmentation des donnés pour éviter l'overfitting
data_augmentation = keras.Sequential (
            [
            layers.RandomFlip("horizontal_and_vertical"),
            layers.RandomRotation(0.2),
            layers.RandomZoom(0.1),
            layers.RandomTranslation(height_factor=0.1, width_factor=0.1)])

### 1.2 Fune-tuning des hyperparamètres du modèle


In [31]:
import kerastuner as kt


def build_model(hp):
    inputs = keras.Input(shape=(180, 180,3))
    x = data_augmentation(inputs)
    x = layers.Rescaling(1./255)(x)
    filter_nb = hp.Int(name="filter_nb", min_value=16, max_value=512, step=64)
    x = layers.Conv2D(filters=filter_nb, kernel_size=3, activation="relu")(x)
    x = layers.MaxPooling2D(pool_size=2)(x)
    unit = hp.Int(name="unit", min_value=64, max_value=512, step=64)
    x = layers.Dense(unit)(x)
    drop_rate = hp.Choice(name='drop_rate', values=[0.5,0.6,0.7])
    x = layers.Dropout(drop_rate)(x)
    filter_nb = hp.Int(name="filter_nb", min_value=16, max_value=512, step=64)
    x = layers.Conv2D(filters=filter_nb, kernel_size=3, activation="relu")(x)
    x = layers.MaxPooling2D(pool_size=2)(x)
    unit = hp.Int(name="unit", min_value=64, max_value=512, step=64)
    x = layers.Dense(unit)(x)
    drop_rate = hp.Choice(name='drop_rate', values=[0.5,0.6,0.7])
    x = layers.Dropout(drop_rate)(x)
    x = layers.Flatten()(x)
    outputs = layers.Dense(6, activation="softmax")(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    hp_learning_rate = hp.Choice('learning_rate', values=[0.0001, 0.00001, 0.0005, 0.0005], ordered=False)
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                metrics=['accuracy'])
    return model


In [32]:
import kerastuner as kt
tuner = kt.BayesianOptimization(
build_model,
objective="val_accuracy",
max_trials=20,executions_per_trial=2,
directory="cars_classifications",
overwrite=True)

In [33]:
callbacks = [keras.callbacks.EarlyStopping(monitor="val_loss", patience=5)]

In [34]:
tuner.search(train_dataset, batch_size=16, epochs=40, validation_data = val_dataset,
             callbacks=callbacks, verbose = 2)

In [35]:
best_hps= tuner.get_best_hyperparameters()[0]

In [36]:
best_hps.values

In [37]:
def get_best_epoch(hp):
    model = build_model(hp)
    callbacks=[
    keras.callbacks.EarlyStopping(
    monitor="val_loss", mode="min", patience=10)]
    
    history = model.fit(train_dataset, batch_size=32, epochs=30, validation_data = val_dataset,
             callbacks=callbacks, verbose = 2)
    val_loss_per_epoch = history.history["val_loss"]
    best_epoch = val_loss_per_epoch.index(min(val_loss_per_epoch)) + 1
    print(f"Best epoch: {best_epoch}")
    return best_epoch

In [38]:
best_model = get_best_epoch(best_hps)

In [40]:
best_model = build_model(best_hps)

### 1.3 Evaluation sur le jeu de test

In [41]:
cp -R '/kaggle/input/stanford-car-dataset-by-classes-folder/car_data/car_data/train' '/root/'

In [42]:
cp -R '/kaggle/input/stanford-car-dataset-by-classes-folder/car_data/car_data/test' '/root/'

In [44]:
train_dataset = tf.keras.utils.image_dataset_from_directory (
            '/root/train',
            image_size=(180, 180),
            batch_size=32)
test_dataset = tf.keras.utils.image_dataset_from_directory(
            '/root/test/',
            image_size=(180, 180),
            batch_size=32)

In [45]:
callbacks = [
keras.callbacks.ModelCheckpoint(
                                filepath="cars_classification",
                                monitor="val_loss")]

In [46]:
history = best_model.fit(train_dataset, epochs=40,validation_data=test_dataset,
                    callbacks=callbacks)