# Model prediction with Keras Neural Network

### Import libraries

In [1]:
import numpy as np
from numpy.random import seed
seed(1)
from fs import methods as m
import geopandas as gpd
import pandas as pd
import tensorflow
tensorflow.random.set_seed(1)
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras.models import Sequential
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score



### Configuration
It possibile to set these parameter before running the model:

- **NUMBER_OF_COVARIATES**: It's number of the n features with the highest Borda Count score take as input for the model;
- **TARGET**: It represents the target variable to be predicted by the model;

In [2]:
#Number of variables used for training 
NUMBER_OF_COVARIATES = 12

#target variable choosen
TARGET = 'nh3_st'

### Results
Final results are finally stored in a Dataframe in which columns are referred to the errors and accuracy reached by model if it's validated by:
- values interpolated from ARPA sensors (which are used also by the training);
- values of CAMS model;

### Data Import

In [3]:
#Configurations of the model
params = ['0_1_mountains', '0_1_nomountains', '0_01_mountains', '0_01_nomountains']
#Period
geopackages = ['0324_0331_2021','0418_0425_2021', '0717_0724_2021','0903_0910_2021','1007_1014_2021']
#With these nested loops a training for each period and configuratio is performed
for par in params:
    #results are stored in a df
    results = pd.DataFrame(columns=['MAE_sensor', 'MSE_sensor', 'R2_sensor', 'MAE_cams', 'MSE_cams', 'R2_cams'])

    for index, grid in enumerate(geopackages):
        #Data acquisition
        if(par[0:3] == '0_1'):
            data = gpd.read_file('assets/grids_0_1/grid_0_1_'+ grid+'.gpkg')
            data = m.process_data(data, 10, TARGET)
        else:
            data = gpd.read_file('assets/grids_0_01/grid_0_01_'+ grid+'.gpkg')
            data = m.process_data(data, 30, TARGET)

        if(par[-11:]=='nomountains'):
            data = data[data['clim_zone'] > 3]
        data.pop('clim_zone')
        data = data[~data[TARGET].isnull()]
        data = data.dropna(axis=1).dropna(axis=0)
        labels = pd.read_csv('assets/fs_results/'+TARGET + par + '_features_model.csv')['Features'].tolist()

        #Store dataset in x and y variables
        X = pd.DataFrame(data=data, columns=labels ).dropna(axis = 1)
        cams_model = X[TARGET[:-2]+'cams'].to_numpy()
        X.pop(TARGET[:-2]+'cams')
        
        X = X.iloc[:, :NUMBER_OF_COVARIATES].to_numpy()

        Y = pd.DataFrame(data=data, columns=[TARGET])
        Y = Y.values.ravel()


        #Store dataset in x and y variables
        X = pd.DataFrame(data=data, columns=labels ).dropna(axis = 1)
        cams_model = X[TARGET[:-2]+'cams'].to_numpy()

        X.pop(TARGET[:-2]+'cams')

        X = X.iloc[:, :NUMBER_OF_COVARIATES].to_numpy()

        Y = pd.DataFrame(data=data, columns=[TARGET])
        Y = Y.values.ravel()

        
        y1 = np.array(Y)
        mae_list1 = []
        mse_list1 = []
        r2_list1 = []
        mae_list2 = []
        mse_list2 = []
        r2_list2 = []
        print('---------'+ grid +'---------')

        #K-Fold is applied
        skf = KFold(n_splits=5, shuffle = True)
        i = 1
        for train_index, test_index in skf.split(X):
            print("Iteration n°:  ", i)
            i = i + 1
            model = None
            model = Sequential()
            model.add(Dense(NUMBER_OF_COVARIATES, input_dim=NUMBER_OF_COVARIATES, kernel_initializer='normal', activation='relu'))
            model.add(Dense(2*NUMBER_OF_COVARIATES+1, activation='relu'))
            model.add(Dense(1, activation='linear'))
            model.compile(loss='mse', optimizer='adam', metrics=['mse','mae'])

            model.summary()
            X_train = X[train_index]
            X_val = X[test_index]
            y_train, y_val = y1[train_index], y1[test_index]
            cams_model_validation = cams_model[test_index]




            y_train=np.reshape(y_train, (-1,1))
            y_val=np.reshape(y_val, (-1,1))

            scaler_x = MinMaxScaler()
            scaler_y = MinMaxScaler()

            scaler_x.fit(X_train)
            xtrain_scale=scaler_x.transform(X_train)
            scaler_x.fit(X_val)
            xval_scale=scaler_x.transform(X_val)

            scaler_y.fit(y_train)
            ytrain_scale=scaler_y.transform(y_train)
            scaler_y.fit(y_val)
            yval_scale=scaler_y.transform(y_val)

            #training 
            history=model.fit(xtrain_scale, ytrain_scale, epochs=32, batch_size=1, verbose=0) #, validation_split=0.30)
            
            #validation with testset
            predictions = model.predict(xval_scale)
            predictions = scaler_y.inverse_transform(predictions)
            mae = mean_absolute_error(y_val, predictions)
            mse = mean_squared_error(y_val, predictions)
            r2 = r2_score(y_val, predictions)
            
            #result are stored
            mae_list1.append(mae)
            mse_list1.append(mse)
            r2_list1.append(r2)
            
            #validation with CAMS model values
            mae = mean_absolute_error(y_val, cams_model_validation)
            mse = mean_squared_error(y_val, cams_model_validation)
            r2 = r2_score(y_val, cams_model_validation)
            
            #result are stored
            mae_list2.append(mae)
            mse_list2.append(mse)
            r2_list2.append(r2)

        #At the end of the K-Fold results obtained are averaged
        avg_mae1 = np.mean(mae_list1)
        avg_mse1 = np.mean(mse_list1)
        avg_r21 = np.mean(r2_list1)

        avg_mae2 = np.mean(mae_list2)
        avg_mse2 = np.mean(mse_list2)
        avg_r22 = np.mean(r2_list2)

        print('---------VALIDATION (ARPA)  ---------')
        print('Mean Absolute Error: ',avg_mae1)
        print('Mean Squared Error: ',avg_mse1)
        print('R2 score: ',avg_r21)
        print('---------VALIDATION (CAMS) ---------')
        print('Mean Absolute Error: ',avg_mae2)
        print('Mean Squared Error: ',avg_mse2)
        print('R2 score: ',avg_r22)


        mae_list1 = []
        mse_list1 = []
        r2_list1 = []
        mae_list2 = []
        mse_list2 = []
        r2_list2 = []

        results.loc[index]= [round(avg_mae1,3), round(avg_mse1, 3), round(avg_r21, 3), round(avg_mae2,3), round(avg_mse2, 3), round(avg_r22, 3)]
        results.rename(index={index: grid}, inplace=True)

    #results are exported
    new = results.T
    new.to_excel('assets/test/keras'+TARGET+par+'.xlsx')
    new.to_csv('assets/test/keras'+TARGET+par+'.csv')




---------0324_0331_2021---------
Iteration n°:   1


2022-08-31 16:49:44.034082: I tensorflow/core/platform/cpu_feature_guard.cc:145] This TensorFlow binary is optimized with Intel(R) MKL-DNN to use the following CPU instructions in performance critical operations:  SSE4.1 SSE4.2
To enable them in non-MKL-DNN operations, rebuild TensorFlow with the appropriate compiler flags.
2022-08-31 16:49:44.043380: I tensorflow/core/common_runtime/process_util.cc:115] Creating new thread pool with default inter op setting: 8. Tune using inter_op_parallelism_threads for best performance.


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 12)                156       
_________________________________________________________________
dense_1 (Dense)              (None, 25)                325       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 26        
Total params: 507
Trainable params: 507
Non-trainable params: 0
_________________________________________________________________
Iteration n°:   2
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 12)                156       
_________________________________________________________________
dense_4 (Dense)              (None, 25)                325       
__________________