# Fine tunning de redes preentranadas

Los experimentos realizados en este notebook se basan en las indicaciones de este [blog](https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html)

La idea, básimente consiste en:
1. coger una red ya entrenada previamente y quitarle la capa superior
2. clarificar nuestro conjunto de datos con la red resultante del paso anterior
3. diseñar un modelo sencillo cuyo input es el output del punto 2 y entrenarlo

Aparentemente con muy poco cálculo se pueden obtener buenos resultados.

En los siguientes experimentos voy a probar el planteamiento anterior utilizando las redes preentrenadas que vienen con defecto con Keras para ver cual de ellas ofrece mejores resultados.

Después, una vez seleccionada una, intentaré determinar el optimizar el diseño del modelo superior.

## Parámetros comunes para todos los experimentos

In [1]:
# %matplotlib inline

import numpy as np
import os
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications
from keras.layers import Input
import os.path


train_data_dir = '../data/train'
validation_data_dir = '../data/validation'

train_features_path = '{}_train_features.npy'
train_labels_path = '{}_train_labels.npy'
validation_features_path = '{}_validation_features.npy'
validation_labels_path = '{}_validation_labels.npy'
top_model_path = '{}_top_model.h5'
history_path = '{}_history.json'

# TODO: set properly
width, height = 200, 200
train_samples = 1152
validation_samples = 288
categories = 21
batch_size = 4
epochs = 20

Using TensorFlow backend.


## Generación de datos

Definimos unas funciones que, dado un modelo preentrenado, permiten traducir nuestros datos en carácterísticas y etiquetas para utilizarse en el top model.

Primero para los datos de entrenamiento:

In [2]:
def generate_train_data(name, model):
    
    naive_datagen = ImageDataGenerator(rescale=1. / 255)    
    dataflow = naive_datagen.flow_from_directory(train_data_dir, 
                                                 batch_size=batch_size, 
                                                 class_mode='categorical',
                                                 target_size=(width, height),
                                                 shuffle=False)

    features = None
    labels = None    
    rounds = train_samples // batch_size
    print 'running {} rounds'.format(rounds)
    for i in range(rounds):
        if i % 50 == 0:
            print
            print i,'/',rounds,'.',
        else:
            print '.',
        batch = dataflow.next()
        batch_features = model.predict(batch[0])
        batch_labels = batch[1]

        if features is None:
            features = batch_features
        else:
            features = np.append(features,batch_features,axis=0)

        if labels is None:
            labels = batch_labels
        else:
            labels = np.append(labels,batch_labels,axis=0)
            
    np.save(open(train_features_path.format(name), 'w'), features)
    np.save(open(train_labels_path.format(name), 'w'), labels)

Y ahora para los datos de prueba:

In [3]:
def generate_validation_data(name, model):
    
    naive_datagen = ImageDataGenerator(rescale=1. / 255)    
    dataflow = naive_datagen.flow_from_directory(validation_data_dir, 
                                                 batch_size=batch_size, 
                                                 class_mode='categorical',
                                                 target_size=(width, height),
                                                 shuffle=False)

    features = None
    labels = None    
    rounds = validation_samples // batch_size
    print 'running {} rounds'.format(rounds)
    for i in range(rounds):
        if i % 50 == 0:
            print
            print i,'/',rounds,'.',
        else:
            print '.',
        batch = dataflow.next()
        batch_features = model.predict(batch[0])
        batch_labels = batch[1]

        if features is None:
            features = batch_features
        else:
            features = np.append(features,batch_features,axis=0)

        if labels is None:
            labels = batch_labels
        else:
            labels = np.append(labels,batch_labels,axis=0)
            
    np.save(open(validation_features_path.format(name), 'w'), features)
    np.save(open(validation_labels_path.format(name), 'w'), labels)

Una función que previene que se repitan experimentos:

In [76]:
from keras.models import load_model
import matplotlib.pyplot as plt

def is_already_done(name):
    return os.path.isfile(validation_features_path.format(name)) \
        or os.path.isfile(validation_labels_path.format(name)) \
        or os.path.isfile(train_features_path.format(name)) \
        or os.path.isfile(train_labels_path.format(name)) \
        or os.path.isfile(top_model_path.format(name)) 

def plot_history(name):
    history = json.load(open(history_path.format(name)))
    
    plt.figure(figsize=(12, 4))
    
    # summarize history for accuracy
    plt.subplot(121)
    plt.plot(history['acc'])
    plt.plot(history['val_acc'])
    plt.title(name + ' model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.grid(True)
    
    # summarize history for loss
    plt.subplot(122)
    plt.plot(history['loss'])
    plt.plot(history['val_loss'])
    plt.title(name + ' model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.grid(True)
        
    plt.tight_layout()
    plt.show()    

## Top model común

In [5]:
def common_top_model(input_shape):
    model = Sequential()
    model.add(Flatten(input_shape=input_shape))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(categories, activation='softmax'))
    return model

## Ejecutor de experimentos

In [6]:
import json

def run_experiment(name, model):    
    
    if is_already_done(name):
        raise Exception('el experimento parece que ya se ha realizado')
        
    print 'generating training data'
    generate_train_data(name, model)
    
    print 'generating validation data'
    generate_validation_data(name, model)
    
    print 'loading training data'    
    train_features = np.load(open(train_features_path.format(name)))
    train_labels = np.load(open(train_labels_path.format(name)))
    
    print 'loading validation data'    
    validation_features = np.load(open(validation_features_path.format(name)))
    validation_labels = np.load(open(validation_labels_path.format(name)))

    print 'shapes: '
    print '\t',train_features.shape
    print '\t',train_labels.shape
    print '\t',validation_features.shape
    print '\t',validation_labels.shape
              
    print 'training top model'
    top_model = common_top_model(train_features.shape[1:])
    top_model.compile(
        optimizer='rmsprop',
        loss='categorical_crossentropy',
        metrics=['accuracy'])
    history = top_model.fit(train_features,
                            train_labels,
                            batch_size=batch_size,
                            nb_epoch=epochs,
                            validation_data=(validation_features, validation_labels))
              
    print 'saving top model'
    top_model.save(top_model_path.format(name))    
    
    print history
    print history.history
    print 'saving history'
    
    json.dump(history.history, open(history_path.format(name),'w'))

## Experimentos "naive" con distintas redes preentrenadas
### VGG16

In [7]:
def VGG16_exp1():    
    name = 'VGG16_exp1'       
    input_tensor=Input(shape=(width,height,3))
    model = applications.VGG16(include_top=False, weights='imagenet',input_tensor=input_tensor)
    run_experiment(name, model)

# Comentado porque sólo se ejecuta una vez
# VGG16_exp1()

### VGG19

In [58]:
def VGG19_exp1():    
    name = 'VGG19_exp1'       
    input_tensor=Input(shape=(width,height,3))
    model = applications.VGG19(include_top=False, weights='imagenet',input_tensor=input_tensor)
    run_experiment(name, model)

# Comentado porque sólo se ejecuta una vez
VGG19_exp1()

### InceptionV3

In [77]:
def InceptionV3_exp1():    
    name = 'InceptionV3_exp1'       
    input_tensor=Input(shape=(width,height,3))
    model = applications.InceptionV3(include_top=False,weights='imagenet',input_tensor=input_tensor)
    run_experiment(name,model)
    
# Comentado porque sólo se ejecuta una vez
InceptionV3_exp1()

### ResNet50

In [78]:
def ResNet50_exp1():    
    name = 'ResNet50_exp1'       
    input_tensor=Input(shape=(width,height,3))
    model = applications.ResNet50(include_top=False,weights='imagenet',input_tensor=input_tensor)
    run_experiment(name,model)
    
# Comentado porque sólo se ejecuta una vez
ResNet50_exp1()

### Xception

In [79]:
def Xception_exp1():    
    name = 'Xception_exp1'       
    input_tensor=Input(shape=(width,height,3))
    model = applications.Xception(include_top=False,weights='imagenet',input_tensor=input_tensor)
    run_experiment(name,model)
    
# Comentado porque sólo se ejecuta una vez
Xception_exp1()

## Resultados de los experimentos "naive"

In [None]:
# plot_history('VGG16_exp1')
# plot_history('VGG19_exp1')
# plot_history('InceptionV3_exp1')
# plot_history('ResNet50_exp1')
# plot_history('Xception_exp1')