In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
import os
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras import regularizers


In [2]:
sns.set_style("whitegrid")

In [3]:
dir_gdl = '../data/processed/2016-2019_3std_preprocessed.csv'
df = pd.read_csv(dir_gdl)
df_data = df[df['PARAM']=='PM10'].fillna(-1)
df_data.drop(columns=['Unnamed: 0'], inplace=True)
df_data = df_data[df_data.CEN != -1] #Elimina valores negativos en la columna de salida
print(df_data.shape)
df_data.head(5)

(27504, 13)


Unnamed: 0,FECHA,HORA,PARAM,AGU,ATM,CEN,LDO,LPIN,MIR,OBL,SFE,TLA,VAL
4,2016-01-01 00:00:00,00:00,PM10,49.92,146.95,86.12,174.04,-1.0,69.75,197.67,115.54,143.4,17.08
13,2016-01-01 01:00:00,01:00,PM10,52.8,-1.0,46.49,115.27,-1.0,68.99,138.09,84.24,100.46,29.15
22,2016-01-01 02:00:00,02:00,PM10,52.71,113.44,63.93,99.0,-1.0,117.7,98.79,135.39,82.05,30.89
31,2016-01-01 03:00:00,03:00,PM10,51.24,73.3,60.75,83.65,-1.0,160.3,97.94,117.6,114.74,38.74
40,2016-01-01 04:00:00,04:00,PM10,58.84,52.55,108.09,49.7,-1.0,180.89,134.39,164.68,118.83,51.48


In [4]:
X,Y = df_data[['AGU','ATM','LDO','LPIN','MIR','OBL','SFE','TLA','VAL']].to_numpy(), df_data["CEN"].to_numpy()   #separate data into input and output features

Y=np.reshape(Y, (-1,1))

X_std = (X - np.nanmin(np.where(X>=0, X, np.nan),axis=0)) / (X.max(axis=0) - np.nanmin(np.where(X>=0, X, np.nan),axis=0))
xscale = X_std * (1 - 0) + 0
xscale[X==-1]=-1

scaler_y = MinMaxScaler()
scaler_y.fit(Y)
yscale=scaler_y.transform(Y)

X_train,X_test,Y_train,Y_test = train_test_split(X, Y, test_size = 0.2) #split

## Model creation

#### Creating layers

In [5]:
def neuron_layers(nx,nh,ny,hl,act,r):
    
    tf.keras.regularizers.l1(l1=r)
    
    model = Sequential()
    
    for i in range(1, 3+hl):
        
        if i == 1:
            model.add(Dense(nx, input_dim=9, kernel_initializer='normal', activation=act,kernel_regularizer='l1'))
            
        elif i == (2+hl):
            model.add(Dense(ny, activation='linear'))
            
        else:
            model.add(Dense(nh, activation=act))
            
    return model

### Iterating over models

In [6]:
epochs_ls = [50, 100, 200] #50 y 100, 250
nh_ls = [5,10,25,50,75,100] #minimo 5, maximo 25 de una en una (8-18)
hl_ls = [1,2,3,4,5,6,7,8,9,10] #maximo 3 capas
reg_ls = [1,0.75,0.5,0.1,0.05,0.01,0.005]
activation_ls = ['relu'] #relu

models = {}

i = 1

for e in epochs_ls:
        for nh in nh_ls:
            for hl in hl_ls:
                for r in reg_ls:
                    for a in activation_ls:
                    
                        model = neuron_layers(10,nh,1,hl,a,r)

                        model.compile(loss='mse', optimizer='adam', metrics=['mse','mae'])

                        history = model.fit(X_train, Y_train, epochs=e, batch_size=50,  verbose=0, validation_split=0.2)

                        #statistics for train
                        y_hat= model.predict(X_train)
                        acc_train = r2_score(Y_train, y_hat)
                        mse_train = mean_squared_error(Y_train, y_hat)
                        mae_train = mean_absolute_error(Y_train, y_hat)
                        rmse_train = mean_squared_error(Y_train, y_hat, squared=False)

                        res_train = Y_train - y_hat
                        std_res_train = round(res_train.std(),3)

                        num_train = ((y_hat - Y_train)**2).sum()
                        den_train = ((abs(y_hat - Y_train.mean()) + 
                                abs(Y_train - Y_train.mean()))**2).sum()
                        ia_train = round(1 - (num_train / den_train),3)

                        #accuracy for test
                        y_hat = model.predict(X_test)
                        acc_test = r2_score(Y_test, y_hat)
                        mse_test = mean_squared_error(Y_test, y_hat)
                        mae_test = mean_absolute_error(Y_test, y_hat)
                        rmse_test = mean_squared_error(Y_test, y_hat, squared=False)

                        res_test = Y_test - y_hat
                        std_res_test = round(res_test.std(),3)

                        num_test = ((y_hat - Y_test)**2).sum()
                        den_test = ((abs(y_hat - Y_test.mean()) + 
                                abs(Y_test - Y_test.mean()))**2).sum()
                        ia_test = round(1 - (num_test / den_test),3)

                        models['model'+str(i)] = [e, nh, hl, a, r, 
                                                               acc_train, mse_train, mae_train, rmse_train, std_res_train, ia_train,
                                                               acc_test, mse_test, mae_test, rmse_test, std_res_test, ia_test]

                        print ('\n*For model',str(i),'settings are:','-epochs:',str(e),'-hidden neurons:',str(nh),'-hidden layers:',str(hl),'-activation:',a,
                               '-regularization cost:',r,
                               '\nAccuracy for training is:', str(acc_train),'Accuracy for test is:',str(acc_test))
                        
                        i += 1


*For model 1 settings are: -epochs: 50 -hidden neurons: 5 -hidden layers: 1 -activation: relu -regularization cost: 1 
Accuracy for training is: 0.7698158612396616 Accuracy for test is: 0.7568455518875187

*For model 2 settings are: -epochs: 50 -hidden neurons: 5 -hidden layers: 1 -activation: relu -regularization cost: 0.75 
Accuracy for training is: -0.5038172635179039 Accuracy for test is: -0.5251577267281813

*For model 3 settings are: -epochs: 50 -hidden neurons: 5 -hidden layers: 1 -activation: relu -regularization cost: 0.5 
Accuracy for training is: 0.7527281772050061 Accuracy for test is: 0.7376032906200056

*For model 4 settings are: -epochs: 50 -hidden neurons: 5 -hidden layers: 1 -activation: relu -regularization cost: 0.1 
Accuracy for training is: 0.7151567282279812 Accuracy for test is: 0.7055572837757875

*For model 5 settings are: -epochs: 50 -hidden neurons: 5 -hidden layers: 1 -activation: relu -regularization cost: 0.05 
Accuracy for training is: 0.775486555986095 

In [37]:
df_models = pd.DataFrame(models)
df_models.head(20)

Unnamed: 0,model1,model2,model3,model4,model5,model6,model7,model8,model9,model10,...,model1251,model1252,model1253,model1254,model1255,model1256,model1257,model1258,model1259,model1260
0,50,50,50,50,50,50,50,50,50,50,...,200,200,200,200,200,200,200,200,200,200
1,5,5,5,5,5,5,5,5,5,5,...,100,100,100,100,100,100,100,100,100,100
2,1,1,1,1,1,1,1,2,2,2,...,9,9,9,10,10,10,10,10,10,10
3,relu,relu,relu,relu,relu,relu,relu,relu,relu,relu,...,relu,relu,relu,relu,relu,relu,relu,relu,relu,relu
4,1,0.75,0.5,0.1,0.05,0.01,0.005,1,0.75,0.5,...,0.05,0.01,0.005,1,0.75,0.5,0.1,0.05,0.01,0.005
5,0.769816,-0.503817,0.752728,0.715157,0.775487,0.768434,0.774627,0.77487,-0.503562,0.760738,...,0.871866,0.876707,0.842827,0.881065,0.853101,0.867192,0.870167,0.854923,0.879892,0.882754
6,153.146995,1000.525475,164.515838,189.513019,149.374147,154.066418,149.946281,149.784519,1000.355852,159.186961,...,85.25089,82.029876,104.570892,79.130535,97.735554,88.360531,86.381222,96.523138,79.910403,78.006346
7,8.308663,20.455751,8.458616,9.137971,8.138845,8.151289,7.951833,8.244184,20.453105,8.435771,...,6.120738,6.141205,6.960505,5.864301,6.46343,6.307355,6.198451,6.541824,6.035041,5.974908
8,12.375257,31.631084,12.826373,13.766373,12.221872,12.412349,12.245255,12.238649,31.628403,12.616932,...,9.233141,9.057035,10.225991,8.895535,9.886129,9.400028,9.29415,9.824619,8.939262,8.83212
9,12.317,25.794,12.806,13.761,12.222,12.412,12.183,12.176,25.794,12.617,...,9.226,8.885,10.029,8.895,9.599,9.364,9.285,9.824,8.935,8.832


In [38]:
df_models.to_csv('../output/data/nn_RawModels_v1.csv')

In [39]:
df_models = df_models.transpose()
df_models = df_models.rename(columns={0:'epochs',1:'hidden_neurons',2:'hidden_layers',3:'activation',
                        4:'regularization',5:'acc_train',6:'mse_train',7:'mae_train',
                        8:'rmse_train',9:'std_res_train',10:'ia_train',11:'acc_test',
                         12:'mse_test',13:'mae_test',14:'rmse_test',15:'std_res_test',16:'ia_test'})
print(df_models.shape)
df_models.head(5)

(1260, 17)


Unnamed: 0,epochs,hidden_neurons,hidden_layers,activation,regularization,acc_train,mse_train,mae_train,rmse_train,std_res_train,ia_train,acc_test,mse_test,mae_test,rmse_test,std_res_test,ia_test
model1,50,5,1,relu,1.0,0.769816,153.146995,8.308663,12.375257,12.317,0.933,0.756846,155.684865,8.299926,12.477374,12.437,0.929
model2,50,5,1,relu,0.75,-0.503817,1000.525475,20.455751,31.631084,25.794,0.398,-0.525158,976.515039,20.416087,31.249241,25.304,0.401
model3,50,5,1,relu,0.5,0.752728,164.515838,8.458616,12.826373,12.806,0.923,0.737603,168.005137,8.512054,12.96168,12.927,0.919
model4,50,5,1,relu,0.1,0.715157,189.513019,9.137971,13.766373,13.761,0.911,0.705557,188.523282,9.123379,13.730378,13.722,0.909
model5,50,5,1,relu,0.05,0.775487,149.374147,8.138845,12.221872,12.222,0.933,0.758,154.945734,8.184086,12.44772,12.444,0.929


In [40]:
cols = ['acc_train','mse_train','mae_train','rmse_train',
       'std_res_train','ia_train','acc_test','mse_test','mae_test',
       'rmse_test','std_res_test','ia_test']
for c in cols:
    df_models[c] = pd.to_numeric(df_models[c])
    df_models[c] = round(df_models[c],3)

In [41]:
df_models.to_csv('../output/data/nn_models_v1.csv')

## Filtering best models

In [82]:
df_best_models = pd.DataFrame(columns=['model','variable','result'])

min_cols = ['mse_train','mae_train','rmse_train',
       'mse_test','mae_test','rmse_test']
max_cols = ['acc_train','ia_train',
            'acc_test','ia_test']

i = 0
for c in min_cols:
    val = df_models[c].min()
    df_tmp = df_models.loc[df_models[c]==val].copy()
    df_tmp[['epochs']] = df_tmp[['epochs']]/200
    df_tmp[['hidden_neurons']] = df_tmp[['hidden_neurons']]/100
    df_tmp[['hidden_layers']] = df_tmp[['hidden_layers']]/10
    df_tmp[['regularization']] = df_tmp[['regularization']]/1
    model = df_tmp[['epochs','hidden_neurons',
                    'hidden_layers','regularization']].sum(axis=1).idxmin()
    df_best_models.loc[i] = [model, c, val]
    i += 1
    
for c in max_cols:
    val = df_models[c].max()
    df_tmp = df_models.loc[df_models[c]==val].copy()
    df_tmp[['epochs']] = df_tmp[['epochs']]/200
    df_tmp[['hidden_neurons']] = df_tmp[['hidden_neurons']]/100
    df_tmp[['hidden_layers']] = df_tmp[['hidden_layers']]/10
    df_tmp[['regularization']] = df_tmp[['regularization']]/1
    model = df_tmp[['epochs','hidden_neurons',
                    'hidden_layers','regularization']].sum(axis=1).idxmin()
    df_best_models.loc[i] = [model, c, val]
    i += 1

In [83]:
df_best_models

Unnamed: 0,model,variable,result
0,model1209,mse_train,76.078
1,model1254,mae_train,5.864
2,model1209,rmse_train,8.722
3,model732,mse_test,130.45
4,model737,mae_test,7.472
5,model732,rmse_test,11.421
6,model1209,acc_train,0.886
7,model1230,ia_train,0.97
8,model732,acc_test,0.796
9,model788,ia_test,0.944


In [84]:
df_best_models.to_csv('../output/data/RNA/BestModels_Notebook6b.csv')

## Analyzing models

In [81]:
model_var = ['epochs','hidden_neurons',
            'hidden_layers','regularization']

for m in model_var:
    df_models.groupby([m]).agg(['mean','std']).to_csv(f'../output/data/RNA/{m}_ModelAnalysis_Notebook6b.csv')

In [79]:
df_models.groupby(['epochs']).agg(['mean','std'])

Unnamed: 0_level_0,hidden_neurons,hidden_neurons,hidden_layers,hidden_layers,regularization,regularization,acc_train,acc_train,mse_train,mse_train,...,mse_test,mse_test,mae_test,mae_test,rmse_test,rmse_test,std_res_test,std_res_test,ia_test,ia_test
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,...,mean,std,mean,std,mean,std,mean,std,mean,std
epochs,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
50,44.166667,34.61201,5.5,2.875707,0.345,0.376889,0.769952,0.17883,153.06376,118.965629,...,163.744714,113.805352,8.211683,1.735393,12.521298,2.641624,12.30401,1.843284,0.923243,0.073054
100,44.166667,34.61201,5.5,2.875707,0.345,0.376889,0.796783,0.108476,135.209443,72.177703,...,153.929086,65.213602,8.028779,1.246567,12.281976,1.757637,12.207279,1.733325,0.920905,0.105124
200,44.166667,34.61201,5.5,2.875707,0.345,0.376889,0.815948,0.103057,122.451767,68.563434,...,155.139198,58.90367,8.050943,1.186469,12.354119,1.587603,12.304467,1.5903,0.920536,0.110536
