In [1]:
import numpy as np
import pandas as pd

In [10]:
df = pd.read_pickle('data/pickles/carpols_15min_normalized.pkl')
print('{} échantillons'.format(str(len(df))))
df.head()

19039 échantillons


Unnamed: 0,date,humidity,temperature,pressure,t_grad,so2_ref,h2s_ref,h2s,captor_id
0,2016-05-23 08:15:00,-1.622487,1.003809,0.053035,,0.0,0.0,7.6,1303
1,2016-05-23 08:30:00,-1.622487,1.003809,0.053035,,0.0,0.0,14.1,1303
2,2016-05-23 08:45:00,-1.622487,1.003809,0.053035,,0.0,0.0,14.9,1303
3,2016-05-23 09:00:00,-1.622487,1.003809,0.053035,,0.0,0.0,15.0,1303
4,2016-05-23 09:15:00,-1.622487,1.003809,0.053035,,0.0,0.0,14.9,1303


In [11]:
def split_dataframe(dataframe, percent):
    nb_rows = int(np.floor(percent * len(dataframe)))
    return dataframe[:nb_rows], dataframe[nb_rows:]

def dataframe_to_xy_with_ref(df):
    return (np.array(df[['h2s_ref', 'pressure', 'temperature', 'humidity', 't_grad', 'h2s']]),\
            np.array(df['so2_ref']))

def dataframe_to_xy_without_ref(df):
    return (np.array(df[['pressure', 'temperature', 'humidity', 't_grad', 'h2s']]),\
            np.array(df['so2_ref']))

df = df.reset_index()
df = df[pd.notnull(df).all(axis=1)]
df = df.reindex(np.random.permutation(df.index))
print('{} échantillons'.format(str(len(df))))

14936 échantillons


In [12]:
df_test, df_train = split_dataframe(df, 0.5) 
df_valid, df_test = split_dataframe(df_test, 0.5)

In [13]:
df_train.head()

Unnamed: 0,index,date,humidity,temperature,pressure,t_grad,so2_ref,h2s_ref,h2s,captor_id
1956,1956,2016-06-12 20:30:00,0.575613,-0.386005,-1.491604,-0.102774,0.0,0.0,8.7,1303
12807,12807,2016-09-01 08:00:00,0.575613,0.135175,1.134282,-0.362858,3.4,0.3,1.8,1304
15855,15855,2016-10-03 19:15:00,-0.023869,-0.907186,1.597674,2.237978,0.0,0.3,0.0,1304
9669,9669,2016-07-29 20:00:00,-0.090478,0.135175,-0.564821,-0.102774,0.4,0.3,2.3,1304
15622,15622,2016-10-01 05:45:00,0.84205,-1.080913,-1.491604,0.157309,6.4,1.4,6.5,1304


## Learning

In [18]:
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping

def baseline_model(dense_size, input_dim, loss_function, optimizer):
    # create model
    model = Sequential()
    model.add(Dense(dense_size, input_dim=input_dim, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss=loss_function, optimizer=optimizer)
    model.summary()
    return model

Using TensorFlow backend.


In [19]:
X_train, y_train = dataframe_to_xy_with_ref(df_train)
X_valid, y_valid = dataframe_to_xy_with_ref(df_valid)
X_test, y_test = dataframe_to_xy_with_ref(df_test)

In [20]:
model = baseline_model(32, X_train.shape[1], 'mse', 'adamax')
early_stopping = EarlyStopping(monitor='val_loss', verbose=1, mode='auto', patience=10)
history = model.fit(X_train, y_train, batch_size=32, epochs=1000, validation_data=(X_valid, y_valid), callbacks=[early_stopping], verbose=1)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 32)                224       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 33        
Total params: 257
Trainable params: 257
Non-trainable params: 0
_________________________________________________________________
Train on 7468 samples, validate on 3734 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/10

KeyboardInterrupt: 