# Code du projet d'apprentissage de Maxime Calio et Jules Crafa

## Librairies et fonctions 

### Import des librairies

In [52]:
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from sklearn import preprocessing
import pandas as pd
import numpy as np


### Fonctions customisées

#### CNN

In [53]:
def create_cnn(x_train, y_train, nb_layers=1, drop=0):
    # permet de conserver la forme des données au fil de leur passage dans le cnn
    padding = 'same'
    
    # valeur du décalage appliqué à chaque filtre
    stride = 1
    
    # taille du noyau de convolution utilisé par les filtres
    kernel_size = 15
    
    # nombre de filtres utilisés
    filters = 5
    
    # fonction d'activation    
    activation = 'relu'
    
    # création de la couche d'entrée, dont la forme est celle d'1 série temporelle ESG
    input_layer = keras.layers.Input(x_train.shape[1:])
    
    # première couche de convolution, appliquant les paramétres
    conv_1 = keras.layers.Conv1D(
        filters=filters,
        kernel_size=kernel_size,
        strides=stride,
        padding=padding,
        activation=activation
    )(input_layer)
    
    # couche de réduction du résultat de la convolution
    pooling_1 = keras.layers.MaxPooling1D(pool_size = 2, strides = 2, 
                                          padding='valid')(conv_1)
    
    # si on veut un modèle 2 couches, on réitère le processus
    if nb_layers == 2 :
    
        conv_2 = keras.layers.Conv1D(
            filters=filters,
            kernel_size=kernel_size,
            strides=stride,
            padding=padding,
            activation=activation
        )(pooling_1)
        pooling_2 = keras.layers.MaxPooling1D(pool_size = 2, strides = 2, 
                                            padding='valid')(conv_2)
    
        flattened = keras.layers.Flatten()(pooling_2)
        
    else:
        flattened = keras.layers.Flatten()(pooling_1)
        
    # flattened est une couche transformant la forme de la sortie, la renverse pour la rendre 
    # compatible    
        
    # couche de sortie, units = nb de classes, ici y_train.shape[1]
    output_layer = keras.layers.Dense(units=y_train.shape[1], activation='softmax')(flattened)
    model = keras.models.Model(inputs=input_layer, outputs=output_layer)
    
    # le modèle est renvoyé
    return model

#### RNN

In [54]:
def create_simple_rnn(x_train, y_train,units, drop=0, rec_drop=0):

    input_layer = keras.layers.Input(batch_shape=[1, x_train.shape[1], x_train.shape[2]])
    hidden_layer_1 = keras.layers.SimpleRNN(units=units, dropout=drop, recurrent_dropout=rec_drop, stateful=True, return_sequences=True)(input_layer)
    hidden_layer_2 = keras.layers.SimpleRNN(units=units, dropout=drop, recurrent_dropout=rec_drop, stateful=True)(hidden_layer_1)
    
    output_layer = keras.layers.Dense(units=y_train.shape[1])(hidden_layer_2)
    
    model = keras.models.Model(inputs=input_layer, outputs=output_layer)
    
    return model

In [55]:
def create_complex_rnn(x_train, y_train,units, drop=0, rec_drop=0):
    input_layer = keras.layers.Input(batch_shape=[1, x_train.shape[1], x_train.shape[2]])
    lstm_1 = keras.layers.LSTM(units=units, dropout=drop, recurrent_dropout=rec_drop, stateful=True, return_sequences=True)(input_layer)
    lstm_2 = keras.layers.LSTM(units=units, dropout=drop, recurrent_dropout=rec_drop, stateful=True)(lstm_1)
    
    output_layer = keras.layers.Dense(activation='softmax',units=y_train.shape[1])(lstm_2)
    model=keras.models.Model(inputs=input_layer, outputs=output_layer)
    
    return model


In [72]:
def train_rnn(model, x_train, y_train, nb_epoch):
    
    model.fit(x_train, y_train, batch_size=1, epochs=nb_epoch, verbose=1)

#### Modeler

In [57]:
def display_model(model):
    # affiche le résumé du modèle
    model.summary()
    # en créé une image
    return tf.keras.utils.plot_model(model, show_shapes=True)


In [58]:
def compile_model(model, mode='lstm', learning_r=0.01):
    """function to compile with preset parameters
    """
    
    if mode == 'lstm':
        optimizer_alg = tf.keras.optimizers.Adam(learning_rate=learning_r)
        model.compile(optimizer=optimizer_alg, loss='mean_squared_error', metrics=["accuracy"])

    if mode == 'cnn':
        optimizer_alg = tf.keras.optimizers.SGD(learning_rate=learning_r)
        model.compile(optimizer=optimizer_alg, loss='categorical_crossentropy', metrics=["accuracy"])

### Nettoyage des données

In [59]:
def create_x_y(df):
    # split the x's and y's
    x, y = df.iloc[:, 1:].values, df.iloc[:, 0].values
    
    # Make the labels an integer repartition
    encoder = preprocessing.LabelEncoder()
    y_transformed = encoder.fit_transform(y)
    
    # Make the labels a binary representation of their int
    encoder = preprocessing.OneHotEncoder()
    y_transformed = encoder.fit_transform(y_transformed.reshape(-1, 1))
    y_transformed = y_transformed.toarray()
    
    #Scale the X's
    max_abs_scaler = preprocessing.MaxAbsScaler()
    x_fitted = max_abs_scaler.fit_transform(x)    
    
    return x_fitted, y_transformed


In [60]:
def get_train_test_cnn(df_train, df_test):
    # on merge les dataframe en vue d'homogénéiser la normalisation des données
    lg_train = df_train.shape[0]

    df_test = df_test.reset_index(drop=True)
    train_test_df = pd.concat([df_train, df_test], axis=0)
    
    # on fit les x et y sur l'ensemble du dataframe
    x_fitted, y_fitted = create_x_y(train_test_df)
    
    # dans le cas du cnn 1d, il suffit de rajouter une dimension pour que le réseau comprenne la donnée
    x_fitted = x_fitted.reshape(x_fitted.shape[0],x_fitted.shape[1],1)
    
    x_train, y_train = x_fitted[:lg_train], y_fitted[:lg_train]
    x_test, y_test = x_fitted[lg_train:], y_fitted[lg_train:]
    
    return x_train, y_train, x_test, y_test


In [61]:
def get_train_test_rnn(df_train, df_test):
    
    # on merge les dataframe en vue d'homogénéiser la normalisation des données
    lg_train = df_train.shape[0]

    df_test = df_test.reset_index(drop=True)
    train_test_df = pd.concat([df_train, df_test], axis=0)
    
    # on fit les x et y sur l'ensemble du dataframe
    x_fitted, y_fitted = create_x_y(train_test_df)
    
    #reshape the inner arrays of x_fitted
    # chaque série temporelle est une suite de séquences de 1, les unes à la suite des autres
    x_as_list = x_fitted.tolist()
    x_to_fitted = [np.reshape(x, (-1, 1)) for x in x_as_list]
    
    x_fitted = np.array(x_to_fitted) 
    
    x_train, y_train = x_fitted[:lg_train], y_fitted[:lg_train]
    x_test, y_test = x_fitted[lg_train:], y_fitted[lg_train:]
    
    return x_train, y_train, x_test, y_test

## DATAS

### Import des données

In [62]:
train_df = pd.read_csv('https://maxime-devanne.com/datasets/ECG200/ECG200_TRAIN.tsv', sep='\t', header=None)
test_df = pd.read_csv('https://maxime-devanne.com/datasets/ECG200/ECG200_TEST.tsv', sep='\t', header=None)

### Traitement des données

#### Jeux de données CNN

In [63]:
x_train_cnn, y_train, x_test_cnn, y_test = get_train_test_cnn(
                                                        train_df,
                                                        test_df
                                                        )

### Jeux de données RNN

In [64]:
x_train_rnn, y_train, x_test_rnn, y_test = get_train_test_rnn(
                                                        train_df,
                                                        test_df
                                                        )

## CNN Models

### Fonction d'obtentions des rnn suivants une liste de paramètres

In [65]:
def get_cnn_models(nb_layers, params_cnn):
    res = []
    for param in params_cnn:
    
        local_res = param.copy()
        
        model = create_cnn(x_train_cnn, y_train, nb_layers=1, drop=param['drop'])
        compile_model(model, mode='cnn', learning_r=param['learning_rate'])
        
        for i in range(2):
            epochs = 50 + i *50
            model.fit(x_train_cnn, y_train, batch_size=25, epochs=50, verbose=0)
        
            loss,acc = model.evaluate(x_test_cnn, y_test, batch_size=25, verbose=False)
            
            local_res[str(epochs)] = {'loss': loss, 'acc':acc}
        
        res.append(local_res)
    
    return res

### Paramètrages des réseaux cnn

In [66]:
params_cnn = []
for i in range(1, 2):
    for j in range(2, 3, 2):
        params_cnn.append(
            {
                'drop': i/10,
                'learning_rate': j/10
            }
        )
        

### Résultats

#### Modèles 1 couche (long)

In [67]:
layer_1_res = get_cnn_models(1, params_cnn)
print(layer_1_res)

[{'drop': 0.1, 'learning_rate': 0.2, '50': {'loss': 0.41189631819725037, 'acc': 0.8399999737739563}, '100': {'loss': 0.475250780582428, 'acc': 0.8199999928474426}}]


#### Modèles 2 couches (long)

In [68]:
layer_2_res = get_cnn_models(2, params_cnn)

## RNN Models

### Fonction d'obtention des RNN suivant les hyper-paramétres 

In [73]:
def get_rnn_models(typed, params_rnn):
    res = []
    for param in params_rnn:
    
        local_res = param.copy()
        
        if typed == 'simple':
            model = create_simple_rnn(x_train_rnn, y_train, param['units'], drop=param['drop'], rec_drop=param['rec_drop'])
        elif typed == 'complex':
            model = create_complex_rnn(x_train_rnn, y_train, param['units'], drop=param['drop'], rec_drop=param['rec_drop'])
       
        compile_model(model, mode='lstm', learning_r=param['learning_rate'])
        
        for i in range(2):
            epochs = 50 + i *50

            train_rnn(model, x_train_rnn, y_train, 50)
            loss,acc = model.evaluate(x_test_rnn, y_test, batch_size=1, verbose=1)
            
            local_res[str(epochs)] = {'loss': loss, 'acc':acc}
        
        res.append(local_res)
    
    return res

### Paramétrages des hyper-paramètres RNN

In [74]:
params_rnn = []
for i in range(1, 2):
    for x in range(1, 3):
        for j in range(2, 5, 2):
            for units in range(10, 26, 5):
                params_rnn.append(
                    {
                        'drop': i/10,
                        'rec_drop': x/10,
                        'learning_rate': j/10,
                        'units':units
                    }
                )

### Résultats

#### RNN simples (long)

In [75]:
simple_rnn_res = get_rnn_models('simple', params_rnn)
print(simple_rnn_res)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50

KeyboardInterrupt: 

#### RNN complexes (très long)

In [None]:
complex_rnn_res = get_rnn_models('complex', params_rnn)