# Using Neural Networks

In [3]:
import tensorflow
import xgboost
import sklearn

print('The xgboost version is {}.'.format(xgboost.__version__))
print('The scikit-learn version is {}.'.format(sklearn.__version__))
print('The tensorflow version is {}.'.format(tensorflow.__version__))


# The nltk version is 3.0.0.
# The scikit-learn version is 0.15.2.

The xgboost version is 1.2.1.
The scikit-learn version is 0.23.2.
The tensorflow version is 2.3.1.


## Define evaluation matrices

In [4]:
%%time
#==============================================================================
# Defining the function to vaiidate the model with the test data and
# get the results from regression evaluation metrices in sklearn
#==============================================================================
pred = []
accuracy = []


def test_data_regression(y_pred, y_test,  model='NN', comment1= 'NA', comment2 = '', comment3 = ''):
    pred = []
    accuracy = []

    #==============================================================================
    # Compute performance
    #==============================================================================

    from sklearn.metrics import mean_absolute_error
    mae = mean_absolute_error(y_test, y_pred)
    print(mae, ' mean_absolute_error')
    accuracy.append(mae)

    from sklearn.metrics import mean_squared_error
    mse = mean_squared_error(y_test, y_pred, squared=True)
    print(mse, ' mean_squared_error')
    accuracy.append(mse)

    from sklearn.metrics import mean_squared_error
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    print(rmse, ' root_mean_squared_error')
    accuracy.append(rmse)

    from sklearn.metrics import r2_score
    r2 = r2_score(y_test, y_pred)
    print(r2, ' r2_score')
    accuracy.append(r2)

    # Print accuracy computed from predictions on the test set
    print(accuracy)

    #==============================================================================
    # Append Results
    #==============================================================================
    results = []
    import datetime
    datetime = datetime.datetime.now()
    results.append((model, 'MAE = {}'.format(mae), 'MSE = {}'.format(mse),
                    'RMSE = {}'.format(rmse), 'R2 = {}'.format(r2),
                    'List = {}'.format(accuracy), datetime, '', comment1, comment2, comment3))

    pd.DataFrame(np.asarray(results)).to_csv('results.csv',
                                             mode='a',
                                             header=None)
    pred.extend(y_pred)

Wall time: 0 ns


## Import data

In [5]:
import pandas as pd

ml_df = pd.read_csv("ml_df.csv")
ml_df

Unnamed: 0,blue_median,green_median,red_median,red_edge_median,ndvi_median,nir_median,mtci_values,evi_values,age,grain_yield
0,0.034152,0.085196,0.045748,0.174878,0.883373,0.734195,4.331436,0.982070,48,431.698672
1,0.036060,0.084995,0.045884,0.168578,0.876906,0.692295,4.268514,0.952200,48,409.089032
2,0.036943,0.089985,0.045336,0.180775,0.888153,0.759447,4.272559,1.017603,48,372.860721
3,0.030649,0.072090,0.036053,0.151221,0.906785,0.739433,5.107437,1.018870,48,528.219355
4,0.037586,0.085404,0.050601,0.167592,0.869756,0.713398,4.665330,0.954982,48,462.505958
...,...,...,...,...,...,...,...,...,...,...
9265,0.029813,0.115001,0.043041,0.235598,0.896805,0.778433,3.080369,1.906517,51,631.004782
9266,0.028875,0.116390,0.041759,0.235039,0.903283,0.792686,2.926750,1.949479,51,641.486148
9267,0.028778,0.119711,0.039235,0.255132,0.916507,0.884904,2.912663,2.181455,51,562.676888
9268,0.026632,0.106301,0.036410,0.221450,0.926392,0.858537,3.239740,2.120530,51,547.551879


In [6]:
def get_data():
    #get train data
    train_data_path ='ml_df.csv'
    data = pd.read_csv(train_data_path)
    
    return data

def get_input_target_data():
  #reading train data
  data = get_data()

  target_all = data.grain_yield
  data.drop(['grain_yield'],axis = 1 , inplace = True)


  return data, target_all

#Load train and test data into pandas DataFrames
data, target_all = get_input_target_data()


In [7]:
target_all.shape

(9270,)

## Test train split


In [8]:
%%time

from sklearn.model_selection import train_test_split

#==============================================================================
# Create separate train/test splits from Main data
#==============================================================================
train, test, target, val = train_test_split(data,
                                            target_all,
                                            test_size=0.3,
                                            shuffle = True,
                                            random_state=55)

Wall time: 2.99 ms


In [9]:
%%time
from sklearn.preprocessing import StandardScaler
#==============================================================================
# Scale features using StandardScaler class in scikit-learn
#==============================================================================

# Initialise standard scaler and compute mean and STD from training data
sc = StandardScaler()
sc.fit(train)

# Transform (standardise) both X_train and X_test with mean and STD from
# training data
train = sc.transform(train)
test = sc.transform(test)

Wall time: 6.98 ms


## Import libraries

In [10]:
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error 
from matplotlib import pyplot as plt
import seaborn as sb
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import warnings 
warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=DeprecationWarning)
from xgboost import XGBRegressor

## Make the Deep Neural Network

In [11]:
%%time
NN_model = Sequential()

Wall time: 22.7 ms


## The Input Layer

In [12]:
%%time
NN_model.add(Dense(128, kernel_initializer='normal',input_dim = train.shape[1], activation='relu'))

Wall time: 16 ms


## The Hidden Layers

In [13]:
%%time
NN_model.add(Dense(256, kernel_initializer='normal',activation='relu'))
NN_model.add(Dense(256, kernel_initializer='normal',activation='relu'))
NN_model.add(Dense(256, kernel_initializer='normal',activation='relu'))
NN_model.add(Dense(256, kernel_initializer='normal',activation='relu'))
NN_model.add(Dense(256, kernel_initializer='normal',activation='relu'))
NN_model.add(Dense(256, kernel_initializer='normal',activation='relu'))
NN_model.add(Dense(256, kernel_initializer='normal',activation='relu'))

Wall time: 44.5 ms


## The Output Layer

In [14]:
%%time
NN_model.add(Dense(1, kernel_initializer='normal',activation='linear'))

Wall time: 5.19 ms


## Compile the network

In [15]:
%%time
NN_model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_absolute_error'])
NN_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 128)               1280      
_________________________________________________________________
dense_1 (Dense)              (None, 256)               33024     
_________________________________________________________________
dense_2 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_3 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_4 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_5 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_6 (Dense)              (None, 256)               6

## Define a checkpoint callback :

In [16]:
%%time
checkpoint_name = 'nn_checkpoint/Weights-{epoch:03d}--{val_loss:.5f}.hdf5' 
checkpoint = ModelCheckpoint(checkpoint_name, monitor='val_loss', verbose = 1, save_best_only = True, mode ='auto')
callbacks_list = [checkpoint]

Wall time: 0 ns


## Third : Train the model :

In [18]:
%%time
NN_model.fit(train, target, epochs=500, batch_size=32, validation_split = 0.2, callbacks=callbacks_list)

Epoch 1/500
Epoch 00001: val_loss improved from inf to 100.54611, saving model to nn_checkpoint\Weights-001--100.54611.hdf5
Epoch 2/500
Epoch 00002: val_loss improved from 100.54611 to 47.41439, saving model to nn_checkpoint\Weights-002--47.41439.hdf5
Epoch 3/500
Epoch 00003: val_loss did not improve from 47.41439
Epoch 4/500
Epoch 00004: val_loss improved from 47.41439 to 44.65670, saving model to nn_checkpoint\Weights-004--44.65670.hdf5
Epoch 5/500
Epoch 00005: val_loss did not improve from 44.65670
Epoch 6/500
Epoch 00006: val_loss did not improve from 44.65670
Epoch 7/500
Epoch 00007: val_loss did not improve from 44.65670
Epoch 8/500
Epoch 00008: val_loss improved from 44.65670 to 44.36031, saving model to nn_checkpoint\Weights-008--44.36031.hdf5
Epoch 9/500
Epoch 00009: val_loss improved from 44.36031 to 44.35781, saving model to nn_checkpoint\Weights-009--44.35781.hdf5
Epoch 10/500
Epoch 00010: val_loss improved from 44.35781 to 44.19086, saving model to nn_checkpoint\Weights-01

Epoch 00024: val_loss did not improve from 40.69478
Epoch 25/500
Epoch 00025: val_loss did not improve from 40.69478
Epoch 26/500
Epoch 00026: val_loss did not improve from 40.69478
Epoch 27/500
Epoch 00027: val_loss did not improve from 40.69478
Epoch 28/500
Epoch 00028: val_loss did not improve from 40.69478
Epoch 29/500
Epoch 00029: val_loss improved from 40.69478 to 39.00885, saving model to nn_checkpoint\Weights-029--39.00885.hdf5
Epoch 30/500
Epoch 00030: val_loss did not improve from 39.00885
Epoch 31/500
Epoch 00031: val_loss did not improve from 39.00885
Epoch 32/500
Epoch 00032: val_loss did not improve from 39.00885
Epoch 33/500
Epoch 00033: val_loss did not improve from 39.00885
Epoch 34/500
Epoch 00034: val_loss improved from 39.00885 to 37.86706, saving model to nn_checkpoint\Weights-034--37.86706.hdf5
Epoch 35/500
Epoch 00035: val_loss did not improve from 37.86706
Epoch 36/500
Epoch 00036: val_loss did not improve from 37.86706
Epoch 37/500
Epoch 00037: val_loss did not

Epoch 48/500
Epoch 00048: val_loss did not improve from 37.76737
Epoch 49/500
Epoch 00049: val_loss did not improve from 37.76737
Epoch 50/500
Epoch 00050: val_loss did not improve from 37.76737
Epoch 51/500
Epoch 00051: val_loss did not improve from 37.76737
Epoch 52/500
Epoch 00052: val_loss did not improve from 37.76737
Epoch 53/500
Epoch 00053: val_loss did not improve from 37.76737
Epoch 54/500
Epoch 00054: val_loss did not improve from 37.76737
Epoch 55/500
Epoch 00055: val_loss did not improve from 37.76737
Epoch 56/500
Epoch 00056: val_loss did not improve from 37.76737
Epoch 57/500
Epoch 00057: val_loss did not improve from 37.76737
Epoch 58/500
Epoch 00058: val_loss did not improve from 37.76737
Epoch 59/500
Epoch 00059: val_loss did not improve from 37.76737
Epoch 60/500
Epoch 00060: val_loss did not improve from 37.76737
Epoch 61/500
Epoch 00061: val_loss did not improve from 37.76737
Epoch 62/500
Epoch 00062: val_loss did not improve from 37.76737
Epoch 63/500
Epoch 00063:

Epoch 00073: val_loss did not improve from 37.76737
Epoch 74/500
Epoch 00074: val_loss did not improve from 37.76737
Epoch 75/500
Epoch 00075: val_loss did not improve from 37.76737
Epoch 76/500
Epoch 00076: val_loss did not improve from 37.76737
Epoch 77/500
Epoch 00077: val_loss improved from 37.76737 to 37.52567, saving model to nn_checkpoint\Weights-077--37.52567.hdf5
Epoch 78/500
Epoch 00078: val_loss did not improve from 37.52567
Epoch 79/500
Epoch 00079: val_loss did not improve from 37.52567
Epoch 80/500
Epoch 00080: val_loss did not improve from 37.52567
Epoch 81/500
Epoch 00081: val_loss did not improve from 37.52567
Epoch 82/500
Epoch 00082: val_loss did not improve from 37.52567
Epoch 83/500
Epoch 00083: val_loss did not improve from 37.52567
Epoch 84/500
Epoch 00084: val_loss did not improve from 37.52567
Epoch 85/500
Epoch 00085: val_loss did not improve from 37.52567
Epoch 86/500
Epoch 00086: val_loss did not improve from 37.52567
Epoch 87/500
Epoch 00087: val_loss did n

Epoch 99/500
Epoch 00099: val_loss did not improve from 37.52567
Epoch 100/500
Epoch 00100: val_loss did not improve from 37.52567
Epoch 101/500
Epoch 00101: val_loss did not improve from 37.52567
Epoch 102/500
Epoch 00102: val_loss did not improve from 37.52567
Epoch 103/500
Epoch 00103: val_loss did not improve from 37.52567
Epoch 104/500
Epoch 00104: val_loss did not improve from 37.52567
Epoch 105/500
Epoch 00105: val_loss did not improve from 37.52567
Epoch 106/500
Epoch 00106: val_loss did not improve from 37.52567
Epoch 107/500
Epoch 00107: val_loss did not improve from 37.52567
Epoch 108/500
Epoch 00108: val_loss did not improve from 37.52567
Epoch 109/500
Epoch 00109: val_loss improved from 37.52567 to 37.43390, saving model to nn_checkpoint\Weights-109--37.43390.hdf5
Epoch 110/500
Epoch 00110: val_loss did not improve from 37.43390
Epoch 111/500
Epoch 00111: val_loss did not improve from 37.43390
Epoch 112/500
Epoch 00112: val_loss did not improve from 37.43390
Epoch 113/500


Epoch 00124: val_loss did not improve from 37.41634
Epoch 125/500
Epoch 00125: val_loss did not improve from 37.41634
Epoch 126/500
Epoch 00126: val_loss did not improve from 37.41634
Epoch 127/500
Epoch 00127: val_loss did not improve from 37.41634
Epoch 128/500
Epoch 00128: val_loss did not improve from 37.41634
Epoch 129/500
Epoch 00129: val_loss did not improve from 37.41634
Epoch 130/500
Epoch 00130: val_loss did not improve from 37.41634
Epoch 131/500
Epoch 00131: val_loss did not improve from 37.41634
Epoch 132/500
Epoch 00132: val_loss did not improve from 37.41634
Epoch 133/500
Epoch 00133: val_loss did not improve from 37.41634
Epoch 134/500
Epoch 00134: val_loss did not improve from 37.41634
Epoch 135/500
Epoch 00135: val_loss did not improve from 37.41634
Epoch 136/500
Epoch 00136: val_loss did not improve from 37.41634
Epoch 137/500
Epoch 00137: val_loss did not improve from 37.41634
Epoch 138/500
Epoch 00138: val_loss did not improve from 37.41634
Epoch 139/500
Epoch 0013

Epoch 150/500
Epoch 00150: val_loss did not improve from 37.37964
Epoch 151/500
Epoch 00151: val_loss did not improve from 37.37964
Epoch 152/500
Epoch 00152: val_loss did not improve from 37.37964
Epoch 153/500
Epoch 00153: val_loss did not improve from 37.37964
Epoch 154/500
Epoch 00154: val_loss did not improve from 37.37964
Epoch 155/500
Epoch 00155: val_loss did not improve from 37.37964
Epoch 156/500
Epoch 00156: val_loss did not improve from 37.37964
Epoch 157/500
Epoch 00157: val_loss did not improve from 37.37964
Epoch 158/500
Epoch 00158: val_loss did not improve from 37.37964
Epoch 159/500
Epoch 00159: val_loss did not improve from 37.37964
Epoch 160/500
Epoch 00160: val_loss did not improve from 37.37964
Epoch 161/500
Epoch 00161: val_loss did not improve from 37.37964
Epoch 162/500
Epoch 00162: val_loss did not improve from 37.37964
Epoch 163/500
Epoch 00163: val_loss did not improve from 37.37964
Epoch 164/500
Epoch 00164: val_loss did not improve from 37.37964
Epoch 165/

Epoch 175/500
Epoch 00175: val_loss did not improve from 37.37964
Epoch 176/500
Epoch 00176: val_loss did not improve from 37.37964
Epoch 177/500
Epoch 00177: val_loss did not improve from 37.37964
Epoch 178/500
Epoch 00178: val_loss did not improve from 37.37964
Epoch 179/500
Epoch 00179: val_loss did not improve from 37.37964
Epoch 180/500
Epoch 00180: val_loss did not improve from 37.37964
Epoch 181/500
Epoch 00181: val_loss did not improve from 37.37964
Epoch 182/500
Epoch 00182: val_loss did not improve from 37.37964
Epoch 183/500
Epoch 00183: val_loss did not improve from 37.37964
Epoch 184/500
Epoch 00184: val_loss did not improve from 37.37964
Epoch 185/500
Epoch 00185: val_loss did not improve from 37.37964
Epoch 186/500
Epoch 00186: val_loss did not improve from 37.37964
Epoch 187/500
Epoch 00187: val_loss did not improve from 37.37964
Epoch 188/500
Epoch 00188: val_loss did not improve from 37.37964
Epoch 189/500
Epoch 00189: val_loss did not improve from 37.37964
Epoch 190/

Epoch 201/500
Epoch 00201: val_loss did not improve from 37.37964
Epoch 202/500
Epoch 00202: val_loss did not improve from 37.37964
Epoch 203/500
Epoch 00203: val_loss did not improve from 37.37964
Epoch 204/500
Epoch 00204: val_loss did not improve from 37.37964
Epoch 205/500
Epoch 00205: val_loss did not improve from 37.37964
Epoch 206/500
Epoch 00206: val_loss did not improve from 37.37964
Epoch 207/500
Epoch 00207: val_loss did not improve from 37.37964
Epoch 208/500
Epoch 00208: val_loss did not improve from 37.37964
Epoch 209/500
Epoch 00209: val_loss did not improve from 37.37964
Epoch 210/500
Epoch 00210: val_loss did not improve from 37.37964
Epoch 211/500
Epoch 00211: val_loss did not improve from 37.37964
Epoch 212/500
Epoch 00212: val_loss did not improve from 37.37964
Epoch 213/500
Epoch 00213: val_loss did not improve from 37.37964
Epoch 214/500
Epoch 00214: val_loss did not improve from 37.37964
Epoch 215/500
Epoch 00215: val_loss did not improve from 37.37964
Epoch 216/

Epoch 227/500
Epoch 00227: val_loss did not improve from 37.37964
Epoch 228/500
Epoch 00228: val_loss did not improve from 37.37964
Epoch 229/500
Epoch 00229: val_loss did not improve from 37.37964
Epoch 230/500
Epoch 00230: val_loss did not improve from 37.37964
Epoch 231/500
Epoch 00231: val_loss did not improve from 37.37964
Epoch 232/500
Epoch 00232: val_loss did not improve from 37.37964
Epoch 233/500
Epoch 00233: val_loss improved from 37.37964 to 37.36064, saving model to nn_checkpoint\Weights-233--37.36064.hdf5
Epoch 234/500
Epoch 00234: val_loss did not improve from 37.36064
Epoch 235/500
Epoch 00235: val_loss did not improve from 37.36064
Epoch 236/500
Epoch 00236: val_loss did not improve from 37.36064
Epoch 237/500
Epoch 00237: val_loss did not improve from 37.36064
Epoch 238/500
Epoch 00238: val_loss did not improve from 37.36064
Epoch 239/500
Epoch 00239: val_loss did not improve from 37.36064
Epoch 240/500
Epoch 00240: val_loss did not improve from 37.36064
Epoch 241/500

Epoch 00252: val_loss did not improve from 37.36064
Epoch 253/500
Epoch 00253: val_loss did not improve from 37.36064
Epoch 254/500
Epoch 00254: val_loss did not improve from 37.36064
Epoch 255/500
Epoch 00255: val_loss did not improve from 37.36064
Epoch 256/500
Epoch 00256: val_loss did not improve from 37.36064
Epoch 257/500
Epoch 00257: val_loss did not improve from 37.36064
Epoch 258/500
Epoch 00258: val_loss did not improve from 37.36064
Epoch 259/500
Epoch 00259: val_loss did not improve from 37.36064
Epoch 260/500
Epoch 00260: val_loss did not improve from 37.36064
Epoch 261/500
Epoch 00261: val_loss did not improve from 37.36064
Epoch 262/500
Epoch 00262: val_loss did not improve from 37.36064
Epoch 263/500
Epoch 00263: val_loss did not improve from 37.36064
Epoch 264/500
Epoch 00264: val_loss did not improve from 37.36064
Epoch 265/500
Epoch 00265: val_loss did not improve from 37.36064
Epoch 266/500
Epoch 00266: val_loss did not improve from 37.36064
Epoch 267/500
Epoch 0026

Epoch 278/500
Epoch 00278: val_loss did not improve from 37.36064
Epoch 279/500
Epoch 00279: val_loss did not improve from 37.36064
Epoch 280/500
Epoch 00280: val_loss did not improve from 37.36064
Epoch 281/500
Epoch 00281: val_loss did not improve from 37.36064
Epoch 282/500
Epoch 00282: val_loss did not improve from 37.36064
Epoch 283/500
Epoch 00283: val_loss did not improve from 37.36064
Epoch 284/500
Epoch 00284: val_loss did not improve from 37.36064
Epoch 285/500
Epoch 00285: val_loss did not improve from 37.36064
Epoch 286/500
Epoch 00286: val_loss did not improve from 37.36064
Epoch 287/500
Epoch 00287: val_loss did not improve from 37.36064
Epoch 288/500
Epoch 00288: val_loss did not improve from 37.36064
Epoch 289/500
Epoch 00289: val_loss did not improve from 37.36064
Epoch 290/500
Epoch 00290: val_loss did not improve from 37.36064
Epoch 291/500
Epoch 00291: val_loss did not improve from 37.36064
Epoch 292/500
Epoch 00292: val_loss did not improve from 37.36064
Epoch 293/

Epoch 304/500
Epoch 00304: val_loss did not improve from 37.36064
Epoch 305/500
Epoch 00305: val_loss did not improve from 37.36064
Epoch 306/500
Epoch 00306: val_loss did not improve from 37.36064
Epoch 307/500
Epoch 00307: val_loss did not improve from 37.36064
Epoch 308/500
Epoch 00308: val_loss did not improve from 37.36064
Epoch 309/500
Epoch 00309: val_loss did not improve from 37.36064
Epoch 310/500
Epoch 00310: val_loss did not improve from 37.36064
Epoch 311/500
Epoch 00311: val_loss did not improve from 37.36064
Epoch 312/500
Epoch 00312: val_loss did not improve from 37.36064
Epoch 313/500
Epoch 00313: val_loss did not improve from 37.36064
Epoch 314/500
Epoch 00314: val_loss did not improve from 37.36064
Epoch 315/500
Epoch 00315: val_loss did not improve from 37.36064
Epoch 316/500
Epoch 00316: val_loss did not improve from 37.36064
Epoch 317/500
Epoch 00317: val_loss did not improve from 37.36064
Epoch 318/500
Epoch 00318: val_loss did not improve from 37.36064
Epoch 319/

Epoch 330/500
Epoch 00330: val_loss did not improve from 37.36064
Epoch 331/500
Epoch 00331: val_loss did not improve from 37.36064
Epoch 332/500
Epoch 00332: val_loss did not improve from 37.36064
Epoch 333/500
Epoch 00333: val_loss did not improve from 37.36064
Epoch 334/500
Epoch 00334: val_loss did not improve from 37.36064
Epoch 335/500
Epoch 00335: val_loss did not improve from 37.36064
Epoch 336/500
Epoch 00336: val_loss did not improve from 37.36064
Epoch 337/500
Epoch 00337: val_loss did not improve from 37.36064
Epoch 338/500
Epoch 00338: val_loss did not improve from 37.36064
Epoch 339/500
Epoch 00339: val_loss did not improve from 37.36064
Epoch 340/500
Epoch 00340: val_loss did not improve from 37.36064
Epoch 341/500
Epoch 00341: val_loss did not improve from 37.36064
Epoch 342/500
Epoch 00342: val_loss did not improve from 37.36064
Epoch 343/500
Epoch 00343: val_loss did not improve from 37.36064
Epoch 344/500
Epoch 00344: val_loss did not improve from 37.36064
Epoch 345/

Epoch 356/500
Epoch 00356: val_loss did not improve from 37.36064
Epoch 357/500
Epoch 00357: val_loss did not improve from 37.36064
Epoch 358/500
Epoch 00358: val_loss did not improve from 37.36064
Epoch 359/500
Epoch 00359: val_loss did not improve from 37.36064
Epoch 360/500
Epoch 00360: val_loss did not improve from 37.36064
Epoch 361/500
Epoch 00361: val_loss did not improve from 37.36064
Epoch 362/500
Epoch 00362: val_loss did not improve from 37.36064
Epoch 363/500
Epoch 00363: val_loss did not improve from 37.36064
Epoch 364/500
Epoch 00364: val_loss did not improve from 37.36064
Epoch 365/500
Epoch 00365: val_loss did not improve from 37.36064
Epoch 366/500
Epoch 00366: val_loss did not improve from 37.36064
Epoch 367/500
Epoch 00367: val_loss did not improve from 37.36064
Epoch 368/500
Epoch 00368: val_loss did not improve from 37.36064
Epoch 369/500
Epoch 00369: val_loss did not improve from 37.36064
Epoch 370/500
Epoch 00370: val_loss did not improve from 37.36064
Epoch 371/

Epoch 382/500
Epoch 00382: val_loss did not improve from 37.36064
Epoch 383/500
Epoch 00383: val_loss did not improve from 37.36064
Epoch 384/500
Epoch 00384: val_loss did not improve from 37.36064
Epoch 385/500
Epoch 00385: val_loss did not improve from 37.36064
Epoch 386/500
Epoch 00386: val_loss did not improve from 37.36064
Epoch 387/500
Epoch 00387: val_loss did not improve from 37.36064
Epoch 388/500
Epoch 00388: val_loss did not improve from 37.36064
Epoch 389/500
Epoch 00389: val_loss did not improve from 37.36064
Epoch 390/500
Epoch 00390: val_loss did not improve from 37.36064
Epoch 391/500
Epoch 00391: val_loss did not improve from 37.36064
Epoch 392/500
Epoch 00392: val_loss did not improve from 37.36064
Epoch 393/500
Epoch 00393: val_loss did not improve from 37.36064
Epoch 394/500
Epoch 00394: val_loss did not improve from 37.36064
Epoch 395/500
Epoch 00395: val_loss did not improve from 37.36064
Epoch 396/500
Epoch 00396: val_loss did not improve from 37.36064
Epoch 397/

Epoch 408/500
Epoch 00408: val_loss did not improve from 37.36064
Epoch 409/500
Epoch 00409: val_loss did not improve from 37.36064
Epoch 410/500
Epoch 00410: val_loss did not improve from 37.36064
Epoch 411/500
Epoch 00411: val_loss did not improve from 37.36064
Epoch 412/500
Epoch 00412: val_loss did not improve from 37.36064
Epoch 413/500
Epoch 00413: val_loss did not improve from 37.36064
Epoch 414/500
Epoch 00414: val_loss did not improve from 37.36064
Epoch 415/500
Epoch 00415: val_loss did not improve from 37.36064
Epoch 416/500
Epoch 00416: val_loss did not improve from 37.36064
Epoch 417/500
Epoch 00417: val_loss did not improve from 37.36064
Epoch 418/500
Epoch 00418: val_loss did not improve from 37.36064
Epoch 419/500
Epoch 00419: val_loss did not improve from 37.36064
Epoch 420/500
Epoch 00420: val_loss did not improve from 37.36064
Epoch 421/500
Epoch 00421: val_loss did not improve from 37.36064
Epoch 422/500
Epoch 00422: val_loss did not improve from 37.36064
Epoch 423/

Epoch 434/500
Epoch 00434: val_loss did not improve from 37.36064
Epoch 435/500
Epoch 00435: val_loss did not improve from 37.36064
Epoch 436/500
Epoch 00436: val_loss did not improve from 37.36064
Epoch 437/500
Epoch 00437: val_loss did not improve from 37.36064
Epoch 438/500
Epoch 00438: val_loss did not improve from 37.36064
Epoch 439/500
Epoch 00439: val_loss did not improve from 37.36064
Epoch 440/500
Epoch 00440: val_loss did not improve from 37.36064
Epoch 441/500
Epoch 00441: val_loss did not improve from 37.36064
Epoch 442/500
Epoch 00442: val_loss did not improve from 37.36064
Epoch 443/500
Epoch 00443: val_loss did not improve from 37.36064
Epoch 444/500
Epoch 00444: val_loss did not improve from 37.36064
Epoch 445/500
Epoch 00445: val_loss did not improve from 37.36064
Epoch 446/500
Epoch 00446: val_loss did not improve from 37.36064
Epoch 447/500
Epoch 00447: val_loss did not improve from 37.36064
Epoch 448/500
Epoch 00448: val_loss did not improve from 37.36064
Epoch 449/

Epoch 460/500
Epoch 00460: val_loss did not improve from 37.36064
Epoch 461/500
Epoch 00461: val_loss did not improve from 37.36064
Epoch 462/500
Epoch 00462: val_loss did not improve from 37.36064
Epoch 463/500
Epoch 00463: val_loss did not improve from 37.36064
Epoch 464/500
Epoch 00464: val_loss did not improve from 37.36064
Epoch 465/500
Epoch 00465: val_loss did not improve from 37.36064
Epoch 466/500
Epoch 00466: val_loss did not improve from 37.36064
Epoch 467/500
Epoch 00467: val_loss did not improve from 37.36064
Epoch 468/500
Epoch 00468: val_loss did not improve from 37.36064
Epoch 469/500
Epoch 00469: val_loss did not improve from 37.36064
Epoch 470/500
Epoch 00470: val_loss did not improve from 37.36064
Epoch 471/500
Epoch 00471: val_loss did not improve from 37.36064
Epoch 472/500
Epoch 00472: val_loss did not improve from 37.36064
Epoch 473/500
Epoch 00473: val_loss did not improve from 37.36064
Epoch 474/500
Epoch 00474: val_loss did not improve from 37.36064
Epoch 475/

Epoch 486/500
Epoch 00486: val_loss did not improve from 37.36064
Epoch 487/500
Epoch 00487: val_loss did not improve from 37.36064
Epoch 488/500
Epoch 00488: val_loss did not improve from 37.36064
Epoch 489/500
Epoch 00489: val_loss did not improve from 37.36064
Epoch 490/500
Epoch 00490: val_loss did not improve from 37.36064
Epoch 491/500
Epoch 00491: val_loss did not improve from 37.36064
Epoch 492/500
Epoch 00492: val_loss did not improve from 37.36064
Epoch 493/500
Epoch 00493: val_loss did not improve from 37.36064
Epoch 494/500
Epoch 00494: val_loss did not improve from 37.36064
Epoch 495/500
Epoch 00495: val_loss did not improve from 37.36064
Epoch 496/500
Epoch 00496: val_loss did not improve from 37.36064
Epoch 497/500
Epoch 00497: val_loss did not improve from 37.36064
Epoch 498/500
Epoch 00498: val_loss did not improve from 37.36064
Epoch 499/500
Epoch 00499: val_loss did not improve from 37.36064
Epoch 500/500
Epoch 00500: val_loss did not improve from 37.36064
Wall time:

<tensorflow.python.keras.callbacks.History at 0x1d798a55c40>

In [19]:
%%time
# Load wights file of the best model :
wights_file = 'nn_checkpoint/Weights-233--37.36064.hdf5' # choose the best checkpoint 
NN_model.load_weights(wights_file) # load it
NN_model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_absolute_error'])

Wall time: 25.7 ms


We see that the validation loss of the best model is 18738.19

 ## Fourth : Test the model

In [20]:
%%time
predictions = NN_model.predict(test)

Wall time: 232 ms


In [21]:
%%time
diff=[]
for i in range(len(predictions)):
    diff_temp = predictions[i] -val.values[i]
    diff.append(diff_temp)
#     print(diff_temp)
print(min(diff), max(diff))


std = np.std(diff)
print(std)

[-190.10223] [363.51886]
50.60558
Wall time: 20 ms


In [23]:
%%time
test_data_regression(predictions, val, '', 'Shuffled test train split', 'Standardized data')


38.270097912041734  mean_absolute_error
2570.249690178348  mean_squared_error
50.69763002526201  root_mean_squared_error
0.833658735255884  r2_score
[38.270097912041734, 2570.249690178348, 50.69763002526201, 0.833658735255884]
Wall time: 3.97 ms


In [52]:
%%time
test_data_regression(predictions, val, '', 'Shuffled test train split')


40.268938468954225  mean_absolute_error
2992.4329133723945  mean_squared_error
54.70313440171773  root_mean_squared_error
0.8063359068286691  r2_score
[40.268938468954225, 2992.4329133723945, 54.70313440171773, 0.8063359068286691]
Wall time: 6.39 ms


Not bad at all, with some more preprocessing, and more training, we can do better.

# Using sklearn regressors

In [24]:
%%time
from sklearn.model_selection import train_test_split

# train_X, val_X, train_y, val_y = train_test_split(data, target_all, test_size = 0.25, shuffle= True,random_state = 14)


train_X = train
val_X = test
train_y = target
val_y = val

Wall time: 0 ns


In [25]:
%%time

from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor()
model.fit(train_X,train_y)

Wall time: 2.85 s


RandomForestRegressor()

In [26]:
%%time
from sklearn.metrics import mean_absolute_error

predicted_prices = model.predict(val_X)
MAE = mean_absolute_error(val_y , predicted_prices)
print('Random forest validation MAE = ', MAE)

Random forest validation MAE =  38.536555504345785
Wall time: 54.3 ms


In [29]:
%%time
test_data_regression(predicted_prices, val_y, model, 'Random Forest', 'age included', 'StandardScaler, shuffle')


38.536555504345785  mean_absolute_error
2744.0147602402244  mean_squared_error
52.383344301793336  root_mean_squared_error
0.8224130179106415  r2_score
[38.536555504345785, 2744.0147602402244, 52.383344301793336, 0.8224130179106415]
Wall time: 4.99 ms


In [16]:
%%time
test_data_regression(predicted_prices, val_y, model, 'Random Forest', 'age included', 'shuffle')


37.031414405305114  mean_absolute_error
2454.5334610906866  mean_squared_error
49.543248390579784  root_mean_squared_error
0.8387331101268987  r2_score
[37.031414405305114, 2454.5334610906866, 49.543248390579784, 0.8387331101268987]
Wall time: 7.38 ms


In [19]:
# import sklearn
# sorted(sklearn.metrics.SCORERS.keys())

## Fine tuning using grid search

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV


# Define the pipeline
pipe_line = make_pipeline(RandomForestRegressor())

# Define ranges of parameter values:
param_range  = list(range(1,200))                   # For n_components
param_range2 = list(range(10, 100, 10))          # For max_iter
param_range3 = list(range(1, 25,3))                   # For max_depth
param_range4 = [x/10 for x in list(range(0, 10))]   # For learning_rate
param_range5  = list(range(5,20))                   # For n_components


# estimator.get_params().keys()
# pipe_line.get_params().keys()

gs = GridSearchCV(estimator=pipe_line, 
                  param_grid=[{'randomforestregressor__n_estimators': param_range2,
                              'randomforestregressor__max_depth': param_range3,
                               'randomforestregressor__min_samples_split': param_range3}], 
                  scoring='neg_root_mean_squared_error', 
                  cv=3,
                  n_jobs=-1)

gs = gs.fit(X, y)
print(gs.best_score_)
print(gs.best_params_)


# Inspect AUC of parameter grid combinations
for r, _ in enumerate(grid.cv_results_['mean_test_score']):
    print("%0.3f +/- %0.2f %r"
          % (grid.cv_results_['mean_test_score'][r], 
             grid.cv_results_['std_test_score'][r] / 2.0, 
             grid.cv_results_['params'][r]))

# Using XGBOOST

In [30]:
%%time
from xgboost import XGBRegressor
XGBModel = XGBRegressor()
XGBModel.fit(train_X,train_y , verbose=False)

Wall time: 339 ms


XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.300000012, max_delta_step=0, max_depth=6,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=100, n_jobs=0, num_parallel_tree=1, random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)

In [31]:
%%time
XGBpredictions = XGBModel.predict(val_X)
MAE = mean_absolute_error(val_y , XGBpredictions)
print('XGBoost validation MAE = ',MAE)

XGBoost validation MAE =  40.40873643636777
Wall time: 8.35 ms


In [32]:
%%time

test_data_regression(XGBpredictions, val_y, XGBModel, 'XGBpredictions', 'age included', 'StandardScaler shuffle')


40.40873643636777  mean_absolute_error
3018.9793536110706  mean_squared_error
54.94523959007796  root_mean_squared_error
0.8046178759071483  r2_score
[40.40873643636777, 3018.9793536110706, 54.94523959007796, 0.8046178759071483]
Wall time: 5.98 ms


In [25]:
%%time

test_data_regression(XGBpredictions, val_y, XGBModel, 'XGBpredictions', 'age included')


38.84113664369768  mean_absolute_error
2717.9945151945826  mean_squared_error
52.13438898840747  root_mean_squared_error
0.8214232850739762  r2_score
[38.84113664369768, 2717.9945151945826, 52.13438898840747, 0.8214232850739762]
Wall time: 11.3 ms


# Using Catboost

In [85]:
%%time
from catboost import CatBoostRegressor
CatBoostModel = CatBoostRegressor()


CatBoostModel.fit(train_X,train_y , verbose=False)

Wall time: 4.9 s


<catboost.core.CatBoostRegressor at 0x1d829999340>

In [86]:
%%time
CatBoostpredictions = CatBoostModel.predict(val_X)
MAE = mean_absolute_error(val_y , CatBoostpredictions)
print('XGBoost validation MAE = ',MAE)

XGBoost validation MAE =  37.273405610893555
Wall time: 6.95 ms


In [87]:
%%time

test_data_regression(CatBoostpredictions, val_y, CatBoostModel, 'CatBoostpredictions', 'age included')


37.273405610893555  mean_absolute_error
2443.5808884591224  mean_squared_error
49.432589335974725  root_mean_squared_error
0.8394527121826054  r2_score
[37.273405610893555, 2443.5808884591224, 49.432589335974725, 0.8394527121826054]
Wall time: 7.33 ms


In [82]:
from sklearn.model_selection import GridSearchCV

    
parameters = {'depth': [4,5,6,7,8,9, 10],
              'learning_rate' : [0.01,0.02,0.03],
              'iterations': [10, 20,30,40,50,60,70,80,90, 100]}


In [83]:
CatBoostModel = CatBoostRegressor()

Grid_CBR = GridSearchCV(estimator=CatBoostModel, scoring = 'r2', param_grid = parameters, cv = 2, n_jobs=-1)
Grid_CBR.fit(data, target_all)

0:	learn: 121.0700720	total: 3.04ms	remaining: 240ms
1:	learn: 119.1324819	total: 5.42ms	remaining: 211ms
2:	learn: 117.1401434	total: 7.96ms	remaining: 204ms
3:	learn: 115.3778416	total: 10.3ms	remaining: 196ms
4:	learn: 113.6808977	total: 12.5ms	remaining: 188ms
5:	learn: 112.0952054	total: 15ms	remaining: 185ms
6:	learn: 110.3419524	total: 17.4ms	remaining: 182ms
7:	learn: 108.7709126	total: 19.9ms	remaining: 179ms
8:	learn: 107.1308329	total: 22.5ms	remaining: 178ms
9:	learn: 105.5486467	total: 25ms	remaining: 175ms
10:	learn: 104.0190206	total: 29.1ms	remaining: 183ms
11:	learn: 102.6462254	total: 31.8ms	remaining: 180ms
12:	learn: 101.3956343	total: 34.6ms	remaining: 178ms
13:	learn: 100.2519722	total: 37.3ms	remaining: 176ms
14:	learn: 99.0404708	total: 40ms	remaining: 173ms
15:	learn: 97.7398832	total: 42.7ms	remaining: 171ms
16:	learn: 96.5827274	total: 45.2ms	remaining: 168ms
17:	learn: 95.4439522	total: 47.8ms	remaining: 165ms
18:	learn: 94.3977064	total: 50.2ms	remaining: 1

GridSearchCV(cv=2,
             estimator=<catboost.core.CatBoostRegressor object at 0x000001D8299CE5E0>,
             n_jobs=-1,
             param_grid={'depth': [4, 5, 6, 7, 8, 9, 10],
                         'iterations': [10, 20, 30, 40, 50, 60, 70, 80, 90,
                                        100],
                         'learning_rate': [0.01, 0.02, 0.03]},
             scoring='r2')

In [84]:
print(" Results from Grid Search " )
print("\n The best estimator across ALL searched params:\n",Grid_CBR.best_estimator_)
print("\n The best score across ALL searched params:\n",Grid_CBR.best_score_)
print("\n The best parameters across ALL searched params:\n",Grid_CBR.best_params_)


 Results from Grid Search 

 The best estimator across ALL searched params:
 <catboost.core.CatBoostRegressor object at 0x000001D829999310>

 The best score across ALL searched params:
 -1.0738438391806138

 The best parameters across ALL searched params:
 {'depth': 4, 'iterations': 80, 'learning_rate': 0.03}


In [None]:
from sklearn.cross_decomposition import PLSRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
import time

pipe_svc = PLSRegression()

param_grid = {'max_iter':[1,2, 3, 4, 5, 6, 7, 8, 9],
              'scale' : [True]
             }

gs = GridSearchCV(estimator  = pipe_svc,
                  param_grid = param_grid,
                  scoring    = 'r2',
                  cv         = 3,
                  n_jobs=-1)

gs.fit(data,target_all)
print(gs.best_score_)
print(gs.best_params_)

In [None]:
from sklearn.svm import SVR
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
import time

pipe_svc = SVR()

param_grid = {'C':[1,10,100,1000],'gamma':[1,0.1,0.001,0.0001], 'kernel':['linear','rbf']}

gs = GridSearchCV(estimator  = pipe_svc,
                  param_grid = param_grid,
                  scoring    = 'r2',
                  cv         = 3,
                  n_jobs=-1)

gs.fit(data,target_all)
print(gs.best_score_)
print(gs.best_params_)