<a href="https://colab.research.google.com/github/VincentNFR/openclassrooms_p06/blob/VGG16/VGG16_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import tensorflow as tf
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

# Import des donneés

In [15]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [16]:
raw_image_tensor = np.load('/content/drive/MyDrive/Colab Notebooks/OpenClassRooms/6/data/images_reduced.npy', allow_pickle=True)
raw_label_tensor = np.load('/content/drive/MyDrive/Colab Notebooks/OpenClassRooms/6/data/label_reduced.npy', allow_pickle=True)

In [17]:
raw_image_tensor.shape, raw_label_tensor.shape

((1124,), (1124,))

# Preparation des sets

In [18]:
from numpy import expand_dims
import tensorflow as tf
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import ImageDataGenerator
from skimage.transform import resize
from tqdm.notebook import trange, tqdm

np.random.seed(1)

image_generator = ImageDataGenerator(
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    rotation_range = 20,
    width_shift_range = 0.2,
    height_shift_range = 0.2
    )

def generate_new_image(image, target_size=(224,224), seed=1):
    it = image_generator.flow(expand_dims(image, 0), batch_size=1, seed=seed).next()[0].astype('uint8')
    new_image = resize(it, target_size, anti_aliasing=True)
    return new_image

def normalize(x):
    return x/255.

def generate_dataframe(raw_label_tensor, raw_image_tensor, nb_images_per_class=100, target_size=(224,224)):
    """
    Genere une image à partir du jeu de données
    - Image aléatoire
    - Image resize
    - Image normalisee
    return df['Images','Labels'], Dict(Label: Breed)
    """
    label_unique = np.unique(raw_label_tensor)
    df = pd.DataFrame(columns=['Images', 'Labels'])
    convert_labels = dict()
    for i in trange(len(label_unique), desc='Total set'):
        breed = label_unique[i]
        convert_labels[i] = breed
        n_image_to_create = nb_images_per_class
        samples = np.where(raw_label_tensor==breed)[0]
        label_arr = np.zeros((len(label_unique)))
        label_arr[i] = 1
        for j in tqdm(range(n_image_to_create), desc=breed):
            sample_image = raw_image_tensor[np.random.choice(samples)]
            new_image = generate_new_image(sample_image)
            
            df = df.append(dict(zip(df.columns,[new_image, label_arr])), ignore_index=True)
    df['Images'].apply(normalize)
    return df, convert_labels

In [19]:
d, convert_labels = generate_dataframe(raw_label_tensor, raw_image_tensor, nb_images_per_class=500, target_size=(224,224))

Total set:   0%|          | 0/5 [00:00<?, ?it/s]

afghan_hound:   0%|          | 0/500 [00:00<?, ?it/s]

maltese_dog:   0%|          | 0/500 [00:00<?, ?it/s]

pomeranian:   0%|          | 0/500 [00:00<?, ?it/s]

samoyed:   0%|          | 0/500 [00:00<?, ?it/s]

scottish_deerhound:   0%|          | 0/500 [00:00<?, ?it/s]

In [20]:
from sklearn.model_selection import train_test_split

def split_dataframe(df, x_cols, y_cols, train_split=0.7, test_split=0.5, random_state=0):
    """
    Split du dataset en 3 parties avec mélange :
        - Train set
        - Validation set
        - Test set
    """
    X_train, X_tmp, y_train, y_tmp = train_test_split(df[x_cols], df[y_cols], test_size=1-train_split, random_state=random_state, shuffle=True)
    X_val, X_test, y_val, y_test = train_test_split(X_tmp, y_tmp, test_size=test_split, random_state=random_state)

    return np.array([im for im in X_train.values]), np.array([im for im in y_train.values]), np.array([im for im in X_val.values]), np.array([im for im in y_val.values]), np.array([im for im in X_test.values]), np.array([im for im in y_test.values])

In [21]:
X_train, y_train, X_val, y_val, X_test, y_test = split_dataframe(d, x_cols='Images', y_cols='Labels', train_split=0.7, test_split=0.5, random_state=0)

In [22]:
X_train.shape, X_val.shape, X_test.shape

((1749, 224, 224, 3), (375, 224, 224, 3), (376, 224, 224, 3))

# Import du modèle d'apprentissage

In [23]:
model = tf.keras.applications.VGG16(
    include_top=True,
    weights=None,
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=5,
    classifier_activation="softmax",
)
model.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [24]:
model.compile(
    optimizer= tf.keras.optimizers.Adam(learning_rate=1e-3), 
    loss= 'categorical_crossentropy',
    metrics= ['accuracy']
    )

In [25]:
epochs = 10
batch_size = 20
path = '/content/drive/MyDrive/Colab Notebooks/OpenClassRooms/6/saved_model/vgg'
checkpointer = tf.keras.callbacks.ModelCheckpoint(filepath=f'{path}/weights.bestaugmented.from_scratch.hdf5', 
                               verbose=1, save_best_only=True)

model.fit(
    x=X_train, y=y_train,
    validation_data=(X_val, y_val), 
    steps_per_epoch=X_train.shape[0]//batch_size, 
    epochs=epochs, 
    callbacks=[checkpointer], 
    verbose=1
    )

Epoch 1/10
Epoch 1: val_loss improved from inf to 1.61113, saving model to /content/drive/MyDrive/Colab Notebooks/OpenClassRooms/6/saved_model/vgg/weights.bestaugmented.from_scratch.hdf5
Epoch 2/10
Epoch 2: val_loss improved from 1.61113 to 1.60963, saving model to /content/drive/MyDrive/Colab Notebooks/OpenClassRooms/6/saved_model/vgg/weights.bestaugmented.from_scratch.hdf5
Epoch 3/10
Epoch 3: val_loss did not improve from 1.60963
Epoch 4/10
Epoch 4: val_loss improved from 1.60963 to 1.60949, saving model to /content/drive/MyDrive/Colab Notebooks/OpenClassRooms/6/saved_model/vgg/weights.bestaugmented.from_scratch.hdf5
Epoch 5/10
Epoch 5: val_loss improved from 1.60949 to 1.60940, saving model to /content/drive/MyDrive/Colab Notebooks/OpenClassRooms/6/saved_model/vgg/weights.bestaugmented.from_scratch.hdf5
Epoch 6/10
Epoch 6: val_loss did not improve from 1.60940
Epoch 7/10
Epoch 7: val_loss improved from 1.60940 to 1.60939, saving model to /content/drive/MyDrive/Colab Notebooks/OpenCl

<keras.callbacks.History at 0x7fdd322d4310>

In [26]:
model.load_weights(f'{path}/weights.bestaugmented.from_scratch.hdf5')

# get index of predicted dog breed for each image in test set
dog_breed_predictions = [np.argmax(model.predict(np.expand_dims(tensor, axis=0))) for tensor in X_test]

# report test accuracy
test_accuracy = 100*np.sum(np.array(dog_breed_predictions)==np.argmax(y_test, axis=1))/len(dog_breed_predictions)
print(f'Test accuracy: {test_accuracy} %')

Test accuracy: 17.5531914893617 %
