# Import libraries

In [1]:
import numpy as np
import pandas as pd
from statistics import mean, stdev
from sklearn.neural_network import MLPRegressor
from sklearn import metrics
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from tqdm import tqdm

# Data preprocessing

### Read data from file

In [2]:
#Read the file
file_name = 'Harvesting data'
data = pd.read_excel(file_name + '.xlsx', header=0)

#Print number of rows and colums read
print("{0} rows and {1} columns".format(len(data.index), len(data.columns)))
print("")

152 rows and 7 columns



### Initialization

In [3]:
#Defining X and Y
X = data.drop(columns = ['Water_volume'], axis = 1)
Y = data.Water_volume

#Using Built in train test split function in sklearn
bins = np.linspace(Y.min(), Y.max() + 0.1, 5)
y_binned = np.digitize(Y, bins)


params = {'activation' : ('logistic', 'tanh', 'relu'),
          'hidden_layer_sizes' : [6, 9, 12],
          'solver' : ['lbfgs'],
          'max_iter' : [2000]}

mlp = MLPRegressor()
    
gs = GridSearchCV(estimator = mlp, param_grid = params, scoring = 'r2', cv = 5)

# Grid-search optimization

In [4]:
for i in tqdm(range(50)):
    data_train, data_test = train_test_split(data, test_size = 0.2,
                                                stratify = y_binned, random_state = i)
    
    #Hacking a scaling but keeping columns names since min_max_scaler does not return a dataframe
    minval = data_train.min()
    minmax = data_train.max() - data_train.min()
    data_train_scaled = (data_train - minval) / minmax
    data_test_scaled = (data_test - minval) / minmax
    
    #Define X and Y
    X_train = data_train_scaled.drop(columns = ['Water_volume'], axis=1)
    Y_train = data_train_scaled.Water_volume
    X_test = data_test_scaled.drop(columns = ['Water_volume'], axis=1)
    Y_test = data_test_scaled.Water_volume
    
    
    # fitting the model for grid search
    grid_result = gs.fit(X_train, Y_train)
    
    print('Best score = {:.4f} using {}'.format(gs.best_score_,
                                            gs.best_params_))

  2%|▏         | 1/50 [00:02<01:41,  2.07s/it]

Best score = 0.8714 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


  4%|▍         | 2/50 [00:04<01:54,  2.39s/it]

Best score = 0.8473 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


  6%|▌         | 3/50 [00:06<01:44,  2.23s/it]

Best score = 0.8875 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


  8%|▊         | 4/50 [00:08<01:43,  2.24s/it]

Best score = 0.8717 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 10%|█         | 5/50 [00:11<01:38,  2.19s/it]

Best score = 0.8890 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 12%|█▏        | 6/50 [00:13<01:35,  2.17s/it]

Best score = 0.8779 using {'activation': 'logistic', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}


 14%|█▍        | 7/50 [00:15<01:35,  2.22s/it]

Best score = 0.8546 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 16%|█▌        | 8/50 [00:17<01:36,  2.29s/it]

Best score = 0.8246 using {'activation': 'logistic', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}


 18%|█▊        | 9/50 [00:20<01:41,  2.47s/it]

Best score = 0.8196 using {'activation': 'logistic', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}


 20%|██        | 10/50 [00:23<01:37,  2.44s/it]

Best score = 0.8925 using {'activation': 'logistic', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}


 22%|██▏       | 11/50 [00:25<01:32,  2.37s/it]

Best score = 0.8759 using {'activation': 'logistic', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}


 24%|██▍       | 12/50 [00:27<01:31,  2.40s/it]

Best score = 0.8646 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 26%|██▌       | 13/50 [00:30<01:36,  2.60s/it]

Best score = 0.8562 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 28%|██▊       | 14/50 [00:33<01:31,  2.55s/it]

Best score = 0.8683 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 30%|███       | 15/50 [00:35<01:24,  2.43s/it]

Best score = 0.8567 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 32%|███▏      | 16/50 [00:37<01:17,  2.29s/it]

Best score = 0.8603 using {'activation': 'logistic', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}


 34%|███▍      | 17/50 [00:39<01:15,  2.30s/it]

Best score = 0.8779 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 36%|███▌      | 18/50 [00:41<01:08,  2.14s/it]

Best score = 0.8806 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 38%|███▊      | 19/50 [00:43<01:06,  2.16s/it]

Best score = 0.8833 using {'activation': 'relu', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 40%|████      | 20/50 [00:46<01:10,  2.35s/it]

Best score = 0.8405 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 42%|████▏     | 21/50 [00:49<01:10,  2.43s/it]

Best score = 0.8571 using {'activation': 'relu', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 44%|████▍     | 22/50 [00:51<01:04,  2.29s/it]

Best score = 0.8628 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 46%|████▌     | 23/50 [00:53<01:01,  2.27s/it]

Best score = 0.8697 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 48%|████▊     | 24/50 [00:55<01:01,  2.35s/it]

Best score = 0.8651 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 50%|█████     | 25/50 [00:58<00:57,  2.32s/it]

Best score = 0.8481 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 52%|█████▏    | 26/50 [01:01<00:59,  2.49s/it]

Best score = 0.8553 using {'activation': 'logistic', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}


 54%|█████▍    | 27/50 [01:03<01:00,  2.61s/it]

Best score = 0.8497 using {'activation': 'relu', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 56%|█████▌    | 28/50 [01:06<00:55,  2.54s/it]

Best score = 0.8503 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 58%|█████▊    | 29/50 [01:08<00:53,  2.57s/it]

Best score = 0.8719 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 60%|██████    | 30/50 [01:11<00:49,  2.47s/it]

Best score = 0.8457 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 62%|██████▏   | 31/50 [01:13<00:47,  2.51s/it]

Best score = 0.8854 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 64%|██████▍   | 32/50 [01:16<00:45,  2.50s/it]

Best score = 0.8246 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 66%|██████▌   | 33/50 [01:18<00:41,  2.46s/it]

Best score = 0.8629 using {'activation': 'relu', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}


 68%|██████▊   | 34/50 [01:20<00:38,  2.39s/it]

Best score = 0.8247 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 70%|███████   | 35/50 [01:23<00:34,  2.33s/it]

Best score = 0.8569 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 72%|███████▏  | 36/50 [01:25<00:33,  2.40s/it]

Best score = 0.8593 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 74%|███████▍  | 37/50 [01:27<00:30,  2.33s/it]

Best score = 0.8882 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 76%|███████▌  | 38/50 [01:30<00:28,  2.36s/it]

Best score = 0.8656 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 78%|███████▊  | 39/50 [01:32<00:26,  2.41s/it]

Best score = 0.8359 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 80%|████████  | 40/50 [01:35<00:23,  2.36s/it]

Best score = 0.8824 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 82%|████████▏ | 41/50 [01:37<00:20,  2.31s/it]

Best score = 0.8591 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 84%|████████▍ | 42/50 [01:39<00:18,  2.35s/it]

Best score = 0.8493 using {'activation': 'relu', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}


 86%|████████▌ | 43/50 [01:42<00:16,  2.37s/it]

Best score = 0.8639 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 88%|████████▊ | 44/50 [01:44<00:13,  2.31s/it]

Best score = 0.8567 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 90%|█████████ | 45/50 [01:46<00:11,  2.25s/it]

Best score = 0.8677 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 92%|█████████▏| 46/50 [01:48<00:09,  2.31s/it]

Best score = 0.8258 using {'activation': 'logistic', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}


 94%|█████████▍| 47/50 [01:51<00:07,  2.36s/it]

Best score = 0.8663 using {'activation': 'logistic', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}


 96%|█████████▌| 48/50 [01:53<00:04,  2.38s/it]

Best score = 0.8399 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 98%|█████████▊| 49/50 [01:55<00:02,  2.24s/it]

Best score = 0.8620 using {'activation': 'relu', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


100%|██████████| 50/50 [01:58<00:00,  2.37s/it]

Best score = 0.8810 using {'activation': 'relu', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}



