In [1]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

Using TensorFlow backend.


In [3]:
def baseline_model(dense_size, input_dim):
    # create model
    model = Sequential()
    model.add(Dense(dense_size, input_dim=input_dim, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.summary()
    return model

In [4]:
# Chargement et normalisation des données
df = pd.read_pickle('data/data.pkl')
data = df[['h2s', 'pressure', 'temperature', 'humidity', 'so2', 'h2s_ref']]
df[['h2s', 'pressure', 'temperature', 'humidity', 'so2', 'h2s_ref']] =  data.apply(pd.to_numeric, errors="coerce").apply(lambda x: (x - np.mean(x)) / (np.std(x)))
print(len(df))
df = df[pd.notnull(df).all(axis=1)] # On ne garde que les données sans NaN etc..
print(len(df))
df.head()

19445
8676


Unnamed: 0,date,h2s,pressure,temperature,humidity,n_points,so2,h2s_ref,captor
4,2016-05-23 09:15:00,1.673433,0.047316,0.996046,-1.610853,15,-0.259294,-0.70924,1303
9,2016-05-23 10:30:00,1.530588,0.20213,0.996046,-1.677224,15,-0.259294,-0.70924,1303
20,2016-05-23 13:15:00,1.637722,0.356944,0.996046,-1.809965,15,-0.259294,-0.70924,1303
25,2016-05-23 14:30:00,1.655577,0.356944,0.996046,-1.809965,15,-0.259294,-0.561424,1303
31,2016-05-23 16:00:00,1.566299,0.356944,0.996046,-1.876336,15,-0.259294,-0.70924,1303


In [6]:
def split_dataframe(dataframe, percent):
    nb_rows = int(np.floor(percent * len(dataframe)))
    return dataframe[:nb_rows], dataframe[nb_rows:]

def dataframe_to_xy(df):
    return np.array(df[['h2s', 'pressure', 'temperature', 'humidity', 'so2']]), np.array(df['h2s_ref'])

df_train, df_test = split_dataframe(df, 0.5) 
df_valid, df_test = split_dataframe(df_test, 0.5)

X_train, y_train = dataframe_to_xy(df_train)
X_valid, y_valid = dataframe_to_xy(df_valid)
X_test, y_test = dataframe_to_xy(df_test)

In [7]:
%matplotlib inline
import mpld3
mpld3.enable_notebook()

In [10]:
p = 10
input_dim = 5
models_info = {}
#early_stopping = EarlyStopping(monitor='val_loss', verbose=1, mode='auto', patience=10)
tolerances = np.linspace(0, 1, 10)

for i in range(p):
    info_dict = {}

    model = baseline_model(2**(i+1), input_dim)
    info_dict['history'] = model.fit(X_train, y_train, batch_size=5, epochs=100, validation_data=(X_valid, y_valid), callbacks=[early_stopping], verbose=1)
    
    info_dict['score'] = model.evaluate(X_test, y_test, batch_size=5)
    y_pred = model.predict(X_test)
    acc = []
    for tol in tolerances:
        y_tol = tol*y_train.flatten()
        accur = np.sum(np.abs(y_pred.flatten() - y_test.flatten()) <= tol) / len(y_test)
        acc.append(accur)
    info_dict['accuracies'] = acc
    models_info[2**(p+1)] = info_dict

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 2)                 12        
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 3         
Total params: 15
Trainable params: 15
Non-trainable params: 0
_________________________________________________________________
Train on 4338 samples, validate on 2169 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/

Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 00036: early stopping
Layer (type)                 Output Shape              Param #   
dense_13 (Dense)             (None, 16)                96        
_________________________________________________________________
dense_14 (Dense)             (None, 1)                 17        
Total params: 113
Trainable params: 113
Non-trainable params: 0
_________________________________________________________________
Train on 4338 samples, validate on 2169 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/

Epoch 00015: early stopping
Layer (type)                 Output Shape              Param #   
dense_17 (Dense)             (None, 64)                384       
_________________________________________________________________
dense_18 (Dense)             (None, 1)                 65        
Total params: 449
Trainable params: 449
Non-trainable params: 0
_________________________________________________________________
Train on 4338 samples, validate on 2169 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 00015: early stopping
Layer (type)                 Output Shape              Param #   
dense_19 (Dense)             (None, 128)               768       
_________________________________________________________________
dense_20 (Dense)             (None, 1)                 129       
Total params: 897
Trainable params: 897

Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 00017: early stopping
Layer (type)                 Output Shape              Param #   
dense_25 (Dense)             (None, 1024)              6144      
_________________________________________________________________
dense_26 (Dense)             (None, 1)                 1025      
Total params: 7,169
Trainable params: 7,169
Non-trainable params: 0
_________________________________________________________________
Train on 4338 samples, validate on 2169 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 00011: early stopping