In [3]:

import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from keras import backend as K
from sklearn.metrics import mean_squared_error, r2_score
from math import sqrt

def generate_report(y_actual, y_pred):
    mse = round(mean_squared_error(y_actual, y_pred),3)
    rmse = round(sqrt(mean_squared_error(y_actual, y_pred)),3)
    r2 = round(r2_score(y_actual, y_pred),3)
    error = np.mean(pd.DataFrame(y_train) - pd.DataFrame(y_pred))[0]
    print('mse',mse)
    print('RMSE', rmse)
    print('R2', r2)
    print('error', error)
    return mse,rmse,r2,error

def generate_loss_plot(history, filename=None):
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('loss curve')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    if (filename!=None):
        plt.savefig(filename)
    plt.show()

def generate_hist_plot(y_actual, y_pred, filename=None):
    y = pd.DataFrame(y_actual)
    y['new']=y.index
    pred = pd.DataFrame(y_pred)
    pred.index=y['new'].values
    y = y.drop('new',axis=1)
    pred = pred.rename(columns={0:'predicted'})
    x =pd.DataFrame(y[0]-pred['predicted'])
    x = x.rename(columns={0:'difference'})
    done = pd.concat([x,y,pred],axis=1)
    p = x['difference'].values
    type(p)
    plt.hist(p, bins='auto', range=(-75000, 75000))
    if (filename!=None):
        plt.savefig(filename)
    plt.show()


def get_data(): 
    df = pd.read_csv('pluto6_fullstd.csv')
    df.drop(['assessland'], axis=1, inplace=True)
    
    X = df[df.columns]
    X.drop('assesstot', axis=1, inplace=True)
    predictors = X.columns
    X = X.values
    Y = df['assesstot'].values
    x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
    return x_train, x_test, y_train, y_test, predictors

#3)Adam combines the good properties of Adadelta and RMSprop and hence tend to do better for most of the problems.
def fit_model(model, x_train, x_test, y_train, y_test, optimizer, epochs):
    model.compile(loss='mse', optimizer=optimizer, metrics=['mse'])
    history = model.fit(x_train, y_train, epochs=epochs, verbose=0, validation_data=(x_test, y_test))
    generate_loss_plot(history, filename=None)
    return model

def plot_comparation(y_test, y_test_pred, filename):
    fig, ax = plt.subplots()
    ax.plot(y_test, color = 'blue')
    ax.plot(y_test_pred, color = 'red')
    ax.legend(['Real', 'Predicted'])
    if (filename!=None):
        fig.savefig(filename)
    plt.show()

def predict(model, x_train, y_train, x_test, y_test, filename=None):
    y_train_pred = model.predict(x_train)
    y_test_pred = model.predict(x_test)
    print('ERROR Training')
    generate_report(y_train, y_train_pred)
    print('ERROR Test')
    mse,rmse,r2,error = generate_report(y_test, y_test_pred)
    print('Histogram Training')
    generate_hist_plot(y_train, y_train_pred)
    print('Histogram Test')
    generate_hist_plot(y_test, y_test_pred)
    return y_train_pred, y_test_pred, mse,rmse,r2,error
    
def run_model(hidden_nodes, x_train, x_test, y_train, y_test, optimizer, epochs):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(input_nodes, tf.keras.activations.linear))
    model.add(tf.keras.layers.Dense(hidden_nodes, tf.keras.activations.relu))
    model.add(tf.keras.layers.Dense(1, tf.keras.activations.linear))
    model = fit_model(model, x_train, x_test, y_train, y_test, optimizer, epochs)
    y_train_pred, y_test_pred, mse,rmse,r2,error = predict(model, x_train, y_train, x_test, y_test, filename=None)
    plot_comparation(y_test, y_test_pred, filename=None)
    return y_train_pred, y_test_pred, mse,rmse,r2,error



In [4]:
x_train, x_test, y_train, y_test, predictors = get_data()
input_nodes = len(predictors)
epochs = 20
hidden_nodes = [int((input_nodes+1)*(2/3)), int(len(x_train)/(30*2)), int(len(x_train)/(30*4)), int(len(x_train)/(30*6)), int(len(x_train)/(30*8))]
optimizers = ['rmsprop', 'adam', 'sgd','adagrad','adadelta']
h = []
o = []
df_models = pd.DataFrame()
for i in hidden_nodes:
    for j in optimizers:
        h.append(i)
        o.append(j)
        
df_models['hidden_nodes'] = h
df_models['optimizer']  = o
df_models

Unnamed: 0,hidden_nodes,optimizer
0,739,rmsprop
1,739,adam
2,739,sgd
3,739,adagrad
4,739,adadelta
5,10357,rmsprop
6,10357,adam
7,10357,sgd
8,10357,adagrad
9,10357,adadelta


In [None]:
mse_list = []
rmse_list = []
r2_list = []
error_list = []
for index, row in df_models.iterrows():
    try:
        print('Model:', index, 'hidden_nodes:', row['hidden_nodes'], 'optimizer:', row['optimizer'])
        y_train_pred, y_test_pred, mse,rmse,r2,error = run_model(row['hidden_nodes'], x_train, x_test, y_train, y_test, row['optimizer'], epochs)
    except Exception as ex:
        mse = float('nan')
        rmse = float('nan')
        r2 = float('nan')
        error = float('nan')
        print('Error while computing model: ', index)
    mse_list.append(mse)
    rmse_list.append(rmse)
    r2_list.append(r2)
    error_list.append(error)
        
df_models['mse'] = mse_list
df_models['rmse'] = rmse_list
df_models['r2'] = r2_list
df_models['error'] = error_list
df_models

Model: 0 hidden_nodes: 739 optimizer: rmsprop


In [None]:
min_r2 = df_models['r2'].idxmax(axis=0, skipna=True)
print('Min r2:', min_r2)
print(df_models.loc[min_r2])
max_rmse = df_models['rmse'].idxmin(axis=0, skipna=True)
print('Min rmse:', max_rmse)
print(df_models.loc[max_rmse])