# Import libraries

In [1]:
import numpy as np
import pandas as pd
from statistics import mean, stdev
from sklearn.neural_network import MLPRegressor
from sklearn import metrics
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from tqdm import tqdm
from collections import defaultdict

# Data preprocessing

### Read data from file

In [2]:
#Read the file
file_name = 'Harvesting data'
data = pd.read_excel(file_name + '.xlsx', header=0)

#Print number of rows and colums read
print("{0} rows and {1} columns".format(len(data.index), len(data.columns)))
print("")

152 rows and 7 columns



### Initialization

In [3]:
#Defining X and Y
X = data.drop(columns = ['Water_volume'], axis = 1)
Y = data.Water_volume

#Using Built in train test split function in sklearn
bins = np.linspace(Y.min(), Y.max() + 0.1, 5)
y_binned = np.digitize(Y, bins)


params = {'activation' : ('logistic', 'tanh', 'relu'),
          'hidden_layer_sizes' : [6, 9, 12],
          'solver' : ['lbfgs'],
          'max_iter' : [2000]}

mlp = MLPRegressor()
    
gs = GridSearchCV(estimator = mlp, param_grid = params, scoring = 'r2', cv = 5)

# Grid-search optimization

In [4]:
results_counter = defaultdict(lambda: defaultdict(int))
optimization_results = []

for i in tqdm(range(50)):
    data_train, data_test = train_test_split(data, test_size = 0.2,
                                                stratify = y_binned, random_state = i)
    
    #Hacking a scaling but keeping columns names since min_max_scaler does not return a dataframe
    minval = data_train.min()
    minmax = data_train.max() - data_train.min()
    data_train_scaled = (data_train - minval) / minmax
    data_test_scaled = (data_test - minval) / minmax
    
    #Define X and Y
    X_train = data_train_scaled.drop(columns = ['Water_volume'], axis=1)
    Y_train = data_train_scaled.Water_volume
    X_test = data_test_scaled.drop(columns = ['Water_volume'], axis=1)
    Y_test = data_test_scaled.Water_volume
    
    
    # fitting the model for grid search
    grid_result = gs.fit(X_train, Y_train)

    optimization_results.append(gs.best_params_)
    
    print('Best score = {:.4f} using {}'.format(gs.best_score_,
                                            gs.best_params_))

  0%|          | 0/50 [00:00<?, ?it/s]

  2%|▏         | 1/50 [00:02<01:59,  2.44s/it]

Best score = 0.8581 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


  4%|▍         | 2/50 [00:04<02:00,  2.51s/it]

Best score = 0.8517 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


  6%|▌         | 3/50 [00:07<01:49,  2.33s/it]

Best score = 0.8874 using {'activation': 'logistic', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}


  8%|▊         | 4/50 [00:09<01:47,  2.34s/it]

Best score = 0.8725 using {'activation': 'logistic', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}


 10%|█         | 5/50 [00:11<01:48,  2.41s/it]

Best score = 0.8779 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 12%|█▏        | 6/50 [00:13<01:39,  2.26s/it]

Best score = 0.8767 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 14%|█▍        | 7/50 [00:16<01:42,  2.39s/it]

Best score = 0.8453 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 16%|█▌        | 8/50 [00:18<01:37,  2.32s/it]

Best score = 0.8143 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 18%|█▊        | 9/50 [00:21<01:43,  2.54s/it]

Best score = 0.7954 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 20%|██        | 10/50 [00:24<01:41,  2.54s/it]

Best score = 0.8777 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 22%|██▏       | 11/50 [00:26<01:35,  2.44s/it]

Best score = 0.8757 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 24%|██▍       | 12/50 [00:29<01:34,  2.48s/it]

Best score = 0.8743 using {'activation': 'relu', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 26%|██▌       | 13/50 [00:31<01:33,  2.52s/it]

Best score = 0.8711 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 28%|██▊       | 14/50 [00:34<01:31,  2.55s/it]

Best score = 0.8628 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 30%|███       | 15/50 [00:36<01:27,  2.50s/it]

Best score = 0.8515 using {'activation': 'relu', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}


 32%|███▏      | 16/50 [00:38<01:22,  2.42s/it]

Best score = 0.8665 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 34%|███▍      | 17/50 [00:41<01:19,  2.41s/it]

Best score = 0.8844 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 36%|███▌      | 18/50 [00:43<01:14,  2.33s/it]

Best score = 0.8665 using {'activation': 'logistic', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}


 38%|███▊      | 19/50 [00:45<01:11,  2.31s/it]

Best score = 0.8840 using {'activation': 'relu', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 40%|████      | 20/50 [00:48<01:12,  2.42s/it]

Best score = 0.8347 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 42%|████▏     | 21/50 [00:51<01:11,  2.46s/it]

Best score = 0.8469 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 44%|████▍     | 22/50 [00:53<01:06,  2.38s/it]

Best score = 0.8788 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 46%|████▌     | 23/50 [00:55<01:04,  2.39s/it]

Best score = 0.8764 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 48%|████▊     | 24/50 [00:58<01:04,  2.49s/it]

Best score = 0.8571 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 50%|█████     | 25/50 [01:00<01:02,  2.48s/it]

Best score = 0.8421 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 52%|█████▏    | 26/50 [01:03<01:01,  2.58s/it]

Best score = 0.8705 using {'activation': 'relu', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 54%|█████▍    | 27/50 [01:06<01:00,  2.62s/it]

Best score = 0.8437 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 56%|█████▌    | 28/50 [01:08<00:57,  2.61s/it]

Best score = 0.8684 using {'activation': 'relu', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 58%|█████▊    | 29/50 [01:11<00:54,  2.59s/it]

Best score = 0.8784 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 60%|██████    | 30/50 [01:14<00:51,  2.58s/it]

Best score = 0.8311 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 62%|██████▏   | 31/50 [01:17<00:52,  2.74s/it]

Best score = 0.8826 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 64%|██████▍   | 32/50 [01:21<00:56,  3.13s/it]

Best score = 0.8493 using {'activation': 'relu', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}


 66%|██████▌   | 33/50 [01:23<00:50,  2.98s/it]

Best score = 0.8673 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 68%|██████▊   | 34/50 [01:26<00:45,  2.84s/it]

Best score = 0.8426 using {'activation': 'logistic', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}


 70%|███████   | 35/50 [01:28<00:40,  2.73s/it]

Best score = 0.8722 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 72%|███████▏  | 36/50 [01:31<00:36,  2.61s/it]

Best score = 0.8539 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 74%|███████▍  | 37/50 [01:33<00:34,  2.68s/it]

Best score = 0.8721 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 76%|███████▌  | 38/50 [01:36<00:31,  2.58s/it]

Best score = 0.8816 using {'activation': 'relu', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}


 78%|███████▊  | 39/50 [01:39<00:28,  2.62s/it]

Best score = 0.8383 using {'activation': 'relu', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 80%|████████  | 40/50 [01:41<00:24,  2.46s/it]

Best score = 0.8834 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 82%|████████▏ | 41/50 [01:43<00:22,  2.46s/it]

Best score = 0.8689 using {'activation': 'logistic', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}


 84%|████████▍ | 42/50 [01:46<00:20,  2.59s/it]

Best score = 0.8586 using {'activation': 'logistic', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}


 86%|████████▌ | 43/50 [01:49<00:18,  2.70s/it]

Best score = 0.8573 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 88%|████████▊ | 44/50 [01:52<00:16,  2.68s/it]

Best score = 0.8325 using {'activation': 'tanh', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 90%|█████████ | 45/50 [01:54<00:12,  2.57s/it]

Best score = 0.8642 using {'activation': 'logistic', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}


 92%|█████████▏| 46/50 [01:56<00:10,  2.56s/it]

Best score = 0.8259 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 94%|█████████▍| 47/50 [01:59<00:07,  2.56s/it]

Best score = 0.8619 using {'activation': 'logistic', 'hidden_layer_sizes': 6, 'max_iter': 2000, 'solver': 'lbfgs'}


 96%|█████████▌| 48/50 [02:02<00:05,  2.56s/it]

Best score = 0.8371 using {'activation': 'logistic', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


 98%|█████████▊| 49/50 [02:04<00:02,  2.52s/it]

Best score = 0.8743 using {'activation': 'relu', 'hidden_layer_sizes': 9, 'max_iter': 2000, 'solver': 'lbfgs'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
100%|██████████| 50/50 [02:07<00:00,  2.54s/it]

Best score = 0.8684 using {'activation': 'relu', 'hidden_layer_sizes': 12, 'max_iter': 2000, 'solver': 'lbfgs'}





In [5]:
# Update the counts for each result
for result in optimization_results:
    for param, value in result.items():
        results_counter[param][value] += 1

# Print the results counter
for param, counts in results_counter.items():
    print(f"Parameter: {param}")
    for value, count in counts.items():
        print(f"  Value: {value}, Count: {count}")

Parameter: activation
  Value: logistic, Count: 39
  Value: relu, Count: 10
  Value: tanh, Count: 1
Parameter: hidden_layer_sizes
  Value: 6, Count: 26
  Value: 12, Count: 11
  Value: 9, Count: 13
Parameter: max_iter
  Value: 2000, Count: 50
Parameter: solver
  Value: lbfgs, Count: 50
