# Import libraries

In [1]:
import numpy as np
import pandas as pd
from statistics import mean, stdev
from sklearn.svm import SVR
from sklearn import metrics
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from tqdm import tqdm

# Data preprocessing

### Read data from file

In [2]:
#Read the file
file_name = 'Harvesting data'
data = pd.read_excel(file_name + '.xlsx', header=0)

#Print number of rows and colums read
print("{0} rows and {1} columns".format(len(data.index), len(data.columns)))
print("")

152 rows and 7 columns



### Initialization

In [3]:
#Defining X and Y
X = data.drop(columns = ['Water_volume'], axis = 1)
Y = data.Water_volume

#Using Built in train test split function in sklearn
bins = np.linspace(Y.min(), Y.max() + 0.1, 5)
y_binned = np.digitize(Y, bins)


params = {'kernel' : ('sigmoid', 'rbf'),
          'epsilon' : [0.01, 0.05, 0.1, 0.2, 0.5, 1],
          'C' : [0.05, 0.1, 0.5, 1]}

svr = SVR()
    
gs = GridSearchCV(estimator = svr, param_grid = params, scoring = 'r2', cv = 5)

# Grid-search optimization

In [4]:
for i in tqdm(range(50)):
    data_train, data_test = train_test_split(data, test_size = 0.2,
                                                stratify = y_binned, random_state = i)
    
    #Hacking a scaling but keeping columns names since min_max_scaler does not return a dataframe
    minval = data_train.min()
    minmax = data_train.max() - data_train.min()
    data_train_scaled = (data_train - minval) / minmax
    data_test_scaled = (data_test - minval) / minmax
    
    #Define X and Y
    X_train = data_train_scaled.drop(columns = ['Water_volume'], axis=1)
    Y_train = data_train_scaled.Water_volume
    X_test = data_test_scaled.drop(columns = ['Water_volume'], axis=1)
    Y_test = data_test_scaled.Water_volume
    
    
    # fitting the model for grid search
    grid_result = gs.fit(X_train, Y_train)
    
    print('Best score = {:.4f} using {}'.format(gs.best_score_,
                                            gs.best_params_))

  2%|▏         | 1/50 [00:00<00:32,  1.51it/s]

Best score = 0.8288 using {'C': 0.5, 'epsilon': 0.01, 'kernel': 'rbf'}


  4%|▍         | 2/50 [00:01<00:32,  1.48it/s]

Best score = 0.8574 using {'C': 0.5, 'epsilon': 0.01, 'kernel': 'rbf'}


  6%|▌         | 3/50 [00:02<00:32,  1.43it/s]

Best score = 0.8707 using {'C': 0.5, 'epsilon': 0.01, 'kernel': 'rbf'}


  8%|▊         | 4/50 [00:02<00:31,  1.46it/s]

Best score = 0.8526 using {'C': 1, 'epsilon': 0.05, 'kernel': 'rbf'}


 10%|█         | 5/50 [00:03<00:30,  1.48it/s]

Best score = 0.8615 using {'C': 1, 'epsilon': 0.01, 'kernel': 'rbf'}


 12%|█▏        | 6/50 [00:04<00:29,  1.50it/s]

Best score = 0.8505 using {'C': 1, 'epsilon': 0.01, 'kernel': 'rbf'}


 14%|█▍        | 7/50 [00:04<00:28,  1.53it/s]

Best score = 0.8448 using {'C': 1, 'epsilon': 0.05, 'kernel': 'rbf'}


 16%|█▌        | 8/50 [00:05<00:27,  1.53it/s]

Best score = 0.8207 using {'C': 0.5, 'epsilon': 0.01, 'kernel': 'rbf'}


 18%|█▊        | 9/50 [00:06<00:28,  1.46it/s]

Best score = 0.8139 using {'C': 1, 'epsilon': 0.01, 'kernel': 'rbf'}


 20%|██        | 10/50 [00:06<00:27,  1.46it/s]

Best score = 0.8556 using {'C': 1, 'epsilon': 0.05, 'kernel': 'rbf'}


 22%|██▏       | 11/50 [00:07<00:26,  1.49it/s]

Best score = 0.8416 using {'C': 1, 'epsilon': 0.05, 'kernel': 'rbf'}


 24%|██▍       | 12/50 [00:08<00:25,  1.49it/s]

Best score = 0.8434 using {'C': 0.5, 'epsilon': 0.05, 'kernel': 'rbf'}


 26%|██▌       | 13/50 [00:08<00:25,  1.47it/s]

Best score = 0.8576 using {'C': 0.5, 'epsilon': 0.01, 'kernel': 'rbf'}


 28%|██▊       | 14/50 [00:09<00:25,  1.42it/s]

Best score = 0.8587 using {'C': 1, 'epsilon': 0.01, 'kernel': 'rbf'}


 30%|███       | 15/50 [00:10<00:25,  1.35it/s]

Best score = 0.8360 using {'C': 0.5, 'epsilon': 0.01, 'kernel': 'rbf'}


 32%|███▏      | 16/50 [00:11<00:25,  1.33it/s]

Best score = 0.8448 using {'C': 0.5, 'epsilon': 0.01, 'kernel': 'rbf'}


 34%|███▍      | 17/50 [00:11<00:25,  1.32it/s]

Best score = 0.8351 using {'C': 0.5, 'epsilon': 0.01, 'kernel': 'rbf'}


 36%|███▌      | 18/50 [00:12<00:24,  1.30it/s]

Best score = 0.8363 using {'C': 0.5, 'epsilon': 0.05, 'kernel': 'rbf'}


 38%|███▊      | 19/50 [00:13<00:22,  1.37it/s]

Best score = 0.8596 using {'C': 1, 'epsilon': 0.01, 'kernel': 'rbf'}


 40%|████      | 20/50 [00:14<00:22,  1.35it/s]

Best score = 0.8281 using {'C': 0.5, 'epsilon': 0.05, 'kernel': 'rbf'}


 42%|████▏     | 21/50 [00:14<00:21,  1.36it/s]

Best score = 0.8577 using {'C': 1, 'epsilon': 0.05, 'kernel': 'rbf'}


 44%|████▍     | 22/50 [00:16<00:31,  1.11s/it]

Best score = 0.8537 using {'C': 1, 'epsilon': 0.01, 'kernel': 'rbf'}


 46%|████▌     | 23/50 [00:19<00:40,  1.50s/it]

Best score = 0.8523 using {'C': 1, 'epsilon': 0.05, 'kernel': 'rbf'}


 48%|████▊     | 24/50 [00:21<00:41,  1.60s/it]

Best score = 0.8650 using {'C': 1, 'epsilon': 0.01, 'kernel': 'rbf'}


 50%|█████     | 25/50 [00:22<00:41,  1.65s/it]

Best score = 0.8274 using {'C': 1, 'epsilon': 0.05, 'kernel': 'rbf'}


 52%|█████▏    | 26/50 [00:24<00:41,  1.72s/it]

Best score = 0.8490 using {'C': 1, 'epsilon': 0.01, 'kernel': 'rbf'}


 54%|█████▍    | 27/50 [00:26<00:41,  1.81s/it]

Best score = 0.8465 using {'C': 1, 'epsilon': 0.01, 'kernel': 'rbf'}


 56%|█████▌    | 28/50 [00:29<00:44,  2.04s/it]

Best score = 0.8464 using {'C': 1, 'epsilon': 0.01, 'kernel': 'rbf'}


 58%|█████▊    | 29/50 [00:31<00:41,  1.98s/it]

Best score = 0.8543 using {'C': 1, 'epsilon': 0.01, 'kernel': 'rbf'}


 60%|██████    | 30/50 [00:33<00:39,  1.96s/it]

Best score = 0.8531 using {'C': 1, 'epsilon': 0.01, 'kernel': 'rbf'}


 62%|██████▏   | 31/50 [00:34<00:36,  1.93s/it]

Best score = 0.8656 using {'C': 1, 'epsilon': 0.01, 'kernel': 'rbf'}


 64%|██████▍   | 32/50 [00:37<00:37,  2.08s/it]

Best score = 0.8290 using {'C': 1, 'epsilon': 0.05, 'kernel': 'rbf'}


 66%|██████▌   | 33/50 [00:39<00:35,  2.09s/it]

Best score = 0.8472 using {'C': 1, 'epsilon': 0.01, 'kernel': 'rbf'}


 68%|██████▊   | 34/50 [00:41<00:35,  2.20s/it]

Best score = 0.8287 using {'C': 0.5, 'epsilon': 0.01, 'kernel': 'rbf'}


 70%|███████   | 35/50 [00:44<00:32,  2.20s/it]

Best score = 0.8635 using {'C': 0.5, 'epsilon': 0.01, 'kernel': 'rbf'}


 72%|███████▏  | 36/50 [00:46<00:29,  2.12s/it]

Best score = 0.8581 using {'C': 1, 'epsilon': 0.01, 'kernel': 'rbf'}


 74%|███████▍  | 37/50 [00:48<00:30,  2.34s/it]

Best score = 0.8365 using {'C': 1, 'epsilon': 0.05, 'kernel': 'rbf'}


 76%|███████▌  | 38/50 [00:50<00:26,  2.24s/it]

Best score = 0.8747 using {'C': 0.5, 'epsilon': 0.01, 'kernel': 'rbf'}


 78%|███████▊  | 39/50 [00:53<00:24,  2.20s/it]

Best score = 0.8434 using {'C': 0.5, 'epsilon': 0.01, 'kernel': 'rbf'}


 80%|████████  | 40/50 [00:54<00:21,  2.13s/it]

Best score = 0.8463 using {'C': 1, 'epsilon': 0.05, 'kernel': 'rbf'}


 82%|████████▏ | 41/50 [00:56<00:18,  2.06s/it]

Best score = 0.8597 using {'C': 0.5, 'epsilon': 0.01, 'kernel': 'rbf'}


 84%|████████▍ | 42/50 [00:59<00:18,  2.29s/it]

Best score = 0.8722 using {'C': 0.5, 'epsilon': 0.01, 'kernel': 'rbf'}


 86%|████████▌ | 43/50 [01:01<00:15,  2.24s/it]

Best score = 0.8321 using {'C': 0.5, 'epsilon': 0.01, 'kernel': 'rbf'}


 88%|████████▊ | 44/50 [01:03<00:12,  2.14s/it]

Best score = 0.8452 using {'C': 1, 'epsilon': 0.05, 'kernel': 'rbf'}


 90%|█████████ | 45/50 [01:06<00:11,  2.31s/it]

Best score = 0.8504 using {'C': 1, 'epsilon': 0.05, 'kernel': 'rbf'}


 92%|█████████▏| 46/50 [01:09<00:09,  2.43s/it]

Best score = 0.8471 using {'C': 0.5, 'epsilon': 0.01, 'kernel': 'rbf'}


 94%|█████████▍| 47/50 [01:10<00:06,  2.24s/it]

Best score = 0.8579 using {'C': 0.5, 'epsilon': 0.01, 'kernel': 'rbf'}


 96%|█████████▌| 48/50 [01:13<00:04,  2.44s/it]

Best score = 0.8606 using {'C': 1, 'epsilon': 0.01, 'kernel': 'rbf'}


 98%|█████████▊| 49/50 [01:15<00:02,  2.34s/it]

Best score = 0.8379 using {'C': 1, 'epsilon': 0.05, 'kernel': 'rbf'}


100%|██████████| 50/50 [01:17<00:00,  1.56s/it]

Best score = 0.8498 using {'C': 1, 'epsilon': 0.05, 'kernel': 'rbf'}



