In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
import os
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras import regularizers


In [2]:
sns.set_style("whitegrid")

In [3]:
dir_gdl = '../data/processed/2016-2019_3std_preprocessed.csv'
df = pd.read_csv(dir_gdl)
df_data = df[df['PARAM']=='PM10'].fillna(-1)
df_data.drop(columns=['Unnamed: 0'], inplace=True)
df_data = df_data[df_data.CEN != -1] #Elimina valores negativos en la columna de salida
print(df_data.shape)
df_data.head(5)

(27504, 13)


Unnamed: 0,FECHA,HORA,PARAM,AGU,ATM,CEN,LDO,LPIN,MIR,OBL,SFE,TLA,VAL
4,2016-01-01 00:00:00,00:00,PM10,49.92,146.95,86.12,174.04,-1.0,69.75,197.67,115.54,143.4,17.08
13,2016-01-01 01:00:00,01:00,PM10,52.8,-1.0,46.49,115.27,-1.0,68.99,138.09,84.24,100.46,29.15
22,2016-01-01 02:00:00,02:00,PM10,52.71,113.44,63.93,99.0,-1.0,117.7,98.79,135.39,82.05,30.89
31,2016-01-01 03:00:00,03:00,PM10,51.24,73.3,60.75,83.65,-1.0,160.3,97.94,117.6,114.74,38.74
40,2016-01-01 04:00:00,04:00,PM10,58.84,52.55,108.09,49.7,-1.0,180.89,134.39,164.68,118.83,51.48


In [3]:
X,Y = df_data[['AGU','ATM','LDO','LPIN','MIR','OBL','SFE','TLA','VAL']].to_numpy(), df_data["CEN"].to_numpy()   #separate data into input and output features

Y=np.reshape(Y, (-1,1))

X_std = (X - np.nanmin(np.where(X>=0, X, np.nan),axis=0)) / (X.max(axis=0) - np.nanmin(np.where(X>=0, X, np.nan),axis=0))
xscale = X_std * (1 - 0) + 0
xscale[X==-1]=-1

scaler_y = MinMaxScaler()
scaler_y.fit(Y)
yscale=scaler_y.transform(Y)

X_train,X_test,Y_train,Y_test = train_test_split(X, Y, test_size = 0.2) #split

## Model creation

#### Creating layers

In [4]:
def neuron_layers(nx,nh,ny,hl,act,r):
    
    tf.keras.regularizers.l1(l1=r)
    
    model = Sequential()
    
    for i in range(1, 3+hl):
        
        if i == 1:
            model.add(Dense(nx, input_dim=9, kernel_initializer='normal', activation=act,kernel_regularizer='l1'))
            
        elif i == (2+hl):
            model.add(Dense(ny, activation='linear'))
            
        else:
            model.add(Dense(nh, activation=act))
            
    return model

### Iterating over models

In [None]:
epochs_ls = [50, 100, 200] #50 y 100, 250
nh_ls = [5,10,25,50,75,100] #minimo 5, maximo 25 de una en una (8-18)
hl_ls = [1,2,3,4,5,6,7,8,9,10] #maximo 3 capas
reg_ls = [1,0.75,0.5,0.1,0.05,0.01,0.005]
activation_ls = ['relu'] #relu

models = {}

i = 1

for e in epochs_ls:
        for nh in nh_ls:
            for hl in hl_ls:
                for r in reg_ls:
                    for a in activation_ls:
                    
                        model = neuron_layers(10,nh,1,hl,a,r)

                        model.compile(loss='mse', optimizer='adam', metrics=['mse','mae'])

                        history = model.fit(X_train, Y_train, epochs=e, batch_size=50,  verbose=0, validation_split=0.2)

                        #statistics for train
                        y_hat= model.predict(X_train)
                        acc_train = r2_score(Y_train, y_hat)
                        mse_train = mean_squared_error(Y_train, y_hat)
                        mae_train = mean_absolute_error(Y_train, y_hat)
                        rmse_train = mean_squared_error(Y_train, y_hat, squared=False)

                        res_train = Y_train - y_hat
                        std_res_train = round(res_train.std(),3)

                        num_train = ((y_hat - Y_train)**2).sum()
                        den_train = ((abs(y_hat - Y_train.mean()) + 
                                abs(Y_train - Y_train.mean()))**2).sum()
                        ia_train = round(1 - (num_train / den_train),3)

                        #accuracy for test
                        y_hat = model.predict(X_test)
                        acc_test = r2_score(Y_test, y_hat)
                        mse_test = mean_squared_error(Y_test, y_hat)
                        mae_test = mean_absolute_error(Y_test, y_hat)
                        rmse_test = mean_squared_error(Y_test, y_hat, squared=False)

                        res_test = Y_test - y_hat
                        std_res_test = round(res_test.std(),3)

                        num_test = ((y_hat - Y_test)**2).sum()
                        den_test = ((abs(y_hat - Y_test.mean()) + 
                                abs(Y_test - Y_test.mean()))**2).sum()
                        ia_test = round(1 - (num_test / den_test),3)

                        models['model'+str(i+1)+'-'+str(r)] = [e, nh, hl, a, r, 
                                                               acc_train, mse_train, mae_train, rmse_train, std_res_train, ia_train
                                                               acc_test, mse_test, mae_test, rmse_test, std_res_test, ia_test]

                        print ('\n*For model',str(i+1),'settings are:','-epochs:',str(e),'-hidden neurons:',str(nh),'-hidden layers:',str(hl),'-activation:',a,
                               '-regularization cost:',r,
                               '\nAccuracy for training is:', str(acc_train),'Accuracy for test is:',str(acc_test))

In [None]:
df_models = pd.DataFrame(models)
df_models.head(1)

In [None]:
df_models = df_models.set_index('Unnamed: 0').transpose()
print(df_models.shape)
df_models.head(5)

In [None]:
df_models.to_csv('../output/data/nn_models_v1.csv')

## Filtering best models

In [None]:
df_models.acc_train.idxmax()
df_models.mse_train.idxmin()
df_models.mae_train.idxmin()
df_models.rmse_train.idxmin()
df_models.std_res_train.idxmin()
df_models.ia_train.idxmax()

In [None]:
df_models.acc_test.idxmax()
df_models.mse_test.idxmin()
df_models.mae_test.idxmin()
df_models.rmse_test.idxmin()
df_models.std_res_test.idxmin()
df_models.ia_test.idxmax()