In [12]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
import os
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

plt.style.use('seaborn-bright')

In [4]:
dir_gdl = '../data/processed/2016-2019_3std_preprocessed.csv'
df = pd.read_csv(dir_gdl)
df_data = df[df['PARAM']=='PM10'].fillna(-1)
df_data.drop(columns=['Unnamed: 0'], inplace=True)
df_data = df_data[df_data.CEN != -1] #Elimina valores negativos en la columna de salida
df_data

Unnamed: 0,FECHA,HORA,PARAM,AGU,ATM,CEN,LDO,LPIN,MIR,OBL,SFE,TLA,VAL
4,2016-01-01 00:00:00,00:00,PM10,49.92,146.95,86.12,174.04,-1.0,69.75,197.67,115.54,143.40,17.08
13,2016-01-01 01:00:00,01:00,PM10,52.80,-1.00,46.49,115.27,-1.0,68.99,138.09,84.24,100.46,29.15
22,2016-01-01 02:00:00,02:00,PM10,52.71,113.44,63.93,99.00,-1.0,117.70,98.79,135.39,82.05,30.89
31,2016-01-01 03:00:00,03:00,PM10,51.24,73.30,60.75,83.65,-1.0,160.30,97.94,117.60,114.74,38.74
40,2016-01-01 04:00:00,04:00,PM10,58.84,52.55,108.09,49.70,-1.0,180.89,134.39,164.68,118.83,51.48
...,...,...,...,...,...,...,...,...,...,...,...,...,...
315535,2019-12-31 19:00:00,19:00,PM10,-1.00,18.10,22.27,84.00,46.2,-1.00,-1.00,-1.00,12.68,16.20
315544,2019-12-31 20:00:00,20:00,PM10,-1.00,-1.00,27.51,84.40,57.4,-1.00,-1.00,-1.00,50.31,14.00
315553,2019-12-31 21:00:00,21:00,PM10,-1.00,-1.00,28.60,75.30,151.5,-1.00,-1.00,-1.00,6.86,22.90
315562,2019-12-31 22:00:00,22:00,PM10,-1.00,-1.00,50.43,125.60,174.2,-1.00,-1.00,-1.00,113.16,32.10


In [5]:
X,Y = df_data[['AGU','ATM','LDO','LPIN','MIR','OBL','SFE','TLA','VAL']].to_numpy(), df_data["CEN"].to_numpy()   #separate data into input and output features

Y=np.reshape(Y, (-1,1))

X_std = (X - np.nanmin(np.where(X>=0, X, np.nan),axis=0)) / (X.max(axis=0) - np.nanmin(np.where(X>=0, X, np.nan),axis=0))
xscale = X_std * (1 - 0) + 0
xscale[X==-1]=-1

scaler_y = MinMaxScaler()
scaler_y.fit(Y)
yscale=scaler_y.transform(Y)

X_train,X_test,Y_train,Y_test = train_test_split(X, Y, test_size = 0.2) #split

## Testing code configurations

#### Creating layers

In [6]:
def neuron_layers(nx,nh,ny,hl,act,r):
    
    tf.keras.regularizers.l1(l1=r)
    
    model = Sequential()
    
    for i in range(1, 3+hl):
        
        if i == 1:
            model.add(Dense(nx, input_dim=9, kernel_initializer='normal', activation=act,kernel_regularizer='l1'))
            
        elif i == (2+hl):
            model.add(Dense(ny, activation='linear'))
            
        else:
            model.add(Dense(nh, activation=act))
            
    return model

In [318]:
epochs_ls = [50, 100, 200] #50 y 100, 250
nh_ls = [5,10,25,50,75,100] #minimo 5, maximo 25 de una en una (8-18)
hl_ls = [1,2,3,4,5,6,7,8,9,10] #maximo 3 capas
reg_ls = [1,0.75,0.5,0.1,0.05,0.01,0.005]
activation_ls = ['relu'] #relu

models = {}

i = 1

for e in epochs_ls:
        for nh in nh_ls:
            for hl in hl_ls:
                for r in reg_ls:
                    for a in activation_ls:
                    
                        model = neuron_layers(10,nh,1,hl,a,r)

                        model.compile(loss='mse', optimizer='adam', metrics=['mse','mae'])

                        history = model.fit(X_train, Y_train, epochs=e, batch_size=50,  verbose=0, validation_split=0.2)

                        #accuracy for train
                        y_hat= model.predict(X_train)
                        acc_train = r2_score(Y_train, y_hat)

                        #accuracy for test
                        y_hat = model.predict(X_test)
                        acc_test = r2_score(Y_test, y_hat)

                        models['model'+str(i)] = [e, nh, hl, a, acc_train, acc_test]

                        print ('*For model',str(i),'settings are:','-epochs:',str(e),'-hidden neurons:',str(nh),'-hidden layers:',str(hl),'-activation:',a,
                               '-regularization cost:',r,
                               '\nAccuracy for training is:', str(acc_train),'Accuracy for test is:',str(acc_test))

                        i += 1 

*For model 1 settings are: -epochs: 50 -hidden neurons: 5 -hidden layers: 1 -activation: relu -regularization cost: 1 
Accuracy for training is: 0.767177493139766 Accuracy for test is: 0.7729807985170473
*For model 2 settings are: -epochs: 50 -hidden neurons: 5 -hidden layers: 1 -activation: relu -regularization cost: 0.75 
Accuracy for training is: 0.7731823564405294 Accuracy for test is: 0.7863405567465785
*For model 3 settings are: -epochs: 50 -hidden neurons: 5 -hidden layers: 1 -activation: relu -regularization cost: 0.5 
Accuracy for training is: 0.7780088849659437 Accuracy for test is: 0.7873429586392116
*For model 4 settings are: -epochs: 50 -hidden neurons: 5 -hidden layers: 1 -activation: relu -regularization cost: 0.1 
Accuracy for training is: 0.7639709965842975 Accuracy for test is: 0.7615581295732288
*For model 5 settings are: -epochs: 50 -hidden neurons: 5 -hidden layers: 1 -activation: relu -regularization cost: 0.05 
Accuracy for training is: 0.7443761739638564 Accurac

In [322]:
df_models = pd.DataFrame(models)
df_models.head(1)

Unnamed: 0,model1,model2,model3,model4,model5,model6,model7,model8,model9,model10,...,model1251,model1252,model1253,model1254,model1255,model1256,model1257,model1258,model1259,model1260
0,50,50,50,50,50,50,50,50,50,50,...,200,200,200,200,200,200,200,200,200,200


In [323]:
df_models.to_csv('../data/nn_models_vRegL1.csv')

## Testing data entries

## Previous model

In [37]:
nn = pd.read_csv('../data/nn_models_vRegL1.csv')
nn.loc[nn['Unnamed: 0']==0, ['Unnamed: 0']]='epochs'
nn.loc[nn['Unnamed: 0']==1, ['Unnamed: 0']]='hidden_neurons'
nn.loc[nn['Unnamed: 0']==2, ['Unnamed: 0']]='hidden_layers'
nn.loc[nn['Unnamed: 0']==3, ['Unnamed: 0']]='activation'
nn.loc[nn['Unnamed: 0']==4, ['Unnamed: 0']]='r2_train'
nn.loc[nn['Unnamed: 0']==5, ['Unnamed: 0']]='r2_test'
nn

Unnamed: 0.1,Unnamed: 0,model1,model2,model3,model4,model5,model6,model7,model8,model9,...,model1251,model1252,model1253,model1254,model1255,model1256,model1257,model1258,model1259,model1260
0,epochs,50,50,50,50,50,50,50,50,50,...,200,200,200,200,200,200,200,200,200,200
1,hidden_neurons,5,5,5,5,5,5,5,5,5,...,100,100,100,100,100,100,100,100,100,100
2,hidden_layers,1,1,1,1,1,1,1,2,2,...,9,9,9,10,10,10,10,10,10,10
3,activation,relu,relu,relu,relu,relu,relu,relu,relu,relu,...,relu,relu,relu,relu,relu,relu,relu,relu,relu,relu
4,r2_train,0.767177493139766,0.7731823564405294,0.7780088849659437,0.7639709965842975,0.7443761739638564,0.7485955263466167,0.7447819158669,0.7743928212944475,0.7605766571428063,...,0.8743118756089514,0.8379421373019184,0.8817088906088432,0.8623310446962773,0.8813360701690305,0.8386252635530339,0.879685679252549,0.8665445848303253,0.8740642065917972,0.881184186776162
5,r2_test,0.7729807985170473,0.7863405567465785,0.7873429586392116,0.7615581295732288,0.7477935499990529,0.7549564420063439,0.7505409061706845,0.7777078330199059,0.7660937078405978,...,0.7521476346579116,0.7835933074432411,0.7780620619541616,0.760399305220298,0.764375972529673,0.7503914483105985,0.7658356805633514,0.7920441935959,0.7658092798421967,0.7659200814292269


In [38]:
nn.set_index('Unnamed: 0', inplace=True)

In [39]:
nn_80 = list(nn.iloc[5,1:].astype('float')>=0.8)
indices = [i for i, x in enumerate(nn_80) if x == True]

In [None]:
models = {}
reg_ls = [1,0.75,0.5,0.1,0.05,0.01,0.005]

for i in indices:
    for r in reg_ls:
        
        nh = nn.iloc[:,i+1]['hidden_neurons']
        hl = nn.iloc[:,i+1]['hidden_layers']
        e = nn.iloc[:,i+1]['epochs']
        a = nn.iloc[:,i+1]['activation']

        model = neuron_layers(10,nh,1,hl,a,r)

        model.compile(loss='mse', optimizer='adam', metrics=['mse','mae'])

        history = model.fit(X_train, Y_train, epochs=e, batch_size=50,  verbose=0, validation_split=0.2)

        #statistics for train
        y_hat= model.predict(X_train)
        acc_train = r2_score(Y_train, y_hat)
        mse_train = mean_square_error(Y_train, y_hat)
        mae_train = mean_absolute_error(Y_train, y_hat)
        rmse_train = mean_square_error(Y_train, y_hat, squared=False)    

        #accuracy for test
        y_hat = model.predict(X_test)
        acc_test = r2_score(Y_test, y_hat)
        mse_test = mean_square_error(Y_test, y_hat)
        mae_test = mean_absolute_error(Y_test, y_hat)
        rmse_test = mean_square_error(Y_test, y_hat, squared=False)

        models['model'+str(i)] = [e, nh, hl, a, r, acc_train, mse_train, mae_train, rmse_train, acc_test, mse_test, mae_test, rmse_test]

        print ('*For model',str(i),'settings are:','-epochs:',str(e),'-hidden neurons:',str(nh),'-hidden layers:',str(hl),'-activation:',a,
               '-regularization cost:',r,
               '\nAccuracy for training is:', str(acc_train),'Accuracy for test is:',str(acc_test),
              '\nMSE for training is:', str(mse_train),'MSE for test is:',str(mse_test),
               '\nMAE for training is:', str(mae_train),'MAE for test is:',str(mae_test),
               '\nRMSE for training is:', str(rmse_train),'RMSE for test is:',str(rmse_test))

In [40]:
for i in indices:
    print (nn.iloc[:,i+1])

Unnamed: 0
epochs                            50
hidden_neurons                    50
hidden_layers                      2
activation                      relu
r2_train          0.8060124360359334
r2_test           0.8034749512331564
Name: model221, dtype: object
Unnamed: 0
epochs                            50
hidden_neurons                    50
hidden_layers                      3
activation                      relu
r2_train          0.8091423666149207
r2_test           0.8041590250378898
Name: model227, dtype: object
Unnamed: 0
epochs                            50
hidden_neurons                    50
hidden_layers                      3
activation                      relu
r2_train           0.817840144583328
r2_test           0.8020445261640035
Name: model228, dtype: object
Unnamed: 0
epochs                            50
hidden_neurons                    50
hidden_layers                      4
activation                      relu
r2_train          0.8175716168845519
r2_test        

In [36]:
nn_80

220