In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn import ensemble
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor
from matplotlib import pyplot as plt

In [2]:
def find_nearest(array,value):
    idx = (np.abs(array-value)).argmin()
    
    return array[idx]

In [3]:
def refine_data(arr1,arr2,arr3):
    refined_data = []
    for i in range(arr1.shape[0]):
        rt = find_nearest(arr1[i],arr3[i])
        refined_data.append(rt.round(3))
        
    refined_data = np.array(refined_data)
    
    return refined_data

In [4]:
def get_metrics(arr1,arr2):
    mse = mean_squared_error(arr1,arr2)
    r2 = r2_score(arr1,arr2)
    return mse,r2

In [None]:
X = pd.read_csv('X_data_t3',sep = ',')
y = pd.read_csv('y_data',sep = ',')
time = np.array(pd.read_csv('time',sep = ',',header = None).dropna(axis = 'columns'))
abundance = np.array(pd.read_csv('abundance',sep = ',',header = None).dropna(axis = 'columns'))
baseline = np.array(pd.read_csv('baseline',sep = ',',header = None).dropna(axis = 'columns'))

In [None]:
X,y,time,abundance,baseline = shuffle(X,y,time,abundance,baseline)

In [None]:
Xleft = np.array(X[['maxRT_t','x_start_t','diff_start']])
Xright = np.array(X[['maxRT_t','x_end_t','diff_end']])
#Xleft = np.array(X[['maxRT_t','maxRT_ab','maxRT_baseline','x_start_t','x_start_ab']])
#Xright = np.array(X[['maxRT_t','maxRT_ab','maxRT_baseline','x_end_t','x_end_ab']])

#Xleft = np.array(X[['maxRT_t','maxRT_ab','x_start_t','x_start_ab','diff_start']])
#Xright = np.array(X[['maxRT_t','maxRT_ab','x_end_t','x_end_ab','diff_end']])

In [None]:
yleft = np.array(y['y_left_t'])
yright = np.array(y['y_right_t'])

In [None]:
scaler_left = MinMaxScaler(feature_range = (0,1))
Xleft = scaler_left.fit_transform(Xleft)

scaler_right = MinMaxScaler(feature_range = (0,1))
Xright = scaler_right.fit_transform(Xright)

In [None]:
Xright

array([[0.50684932, 1.        , 0.94237288],
       [0.7260274 , 1.        , 0.91525424],
       [0.50684932, 1.        , 0.94237288],
       ...,
       [0.54794521, 1.        , 0.93728814],
       [0.45205479, 1.        , 0.94915254],
       [0.04109589, 0.01180438, 0.00677966]])

In [None]:
test_size = 0.05
random_state = 42

Xleft_train,Xleft_test,yleft_train,yleft_test,tleft_train,tleft_test,aleft_train,aleft_test = train_test_split(Xleft,yleft,time,abundance,test_size=test_size,random_state=random_state)
Xright_train,Xright_test,yright_train,yright_test,tright_train,tright_test,aright_train,aright_test = train_test_split(Xright,yright,time,abundance,test_size=test_size,random_state=random_state)

In [None]:
n_estimators = np.linspace(1000000,2000000,2,dtype=int)
max_depth = np.linspace(100000,200000,2,dtype=int)

mse_left,r2_left = [[] for i in range(len(n_estimators))], [[] for i in range(len(n_estimators))]
mse_right,r2_right = [[] for i in range(len(n_estimators))], [[] for i in range(len(n_estimators))]
yleft_, yright_ = [[] for i in range(len(n_estimators))], [[] for i in range(len(n_estimators))]

for i in range(len(n_estimators)):
    for j in range(len(max_depth)):

        params = {'n_estimators': n_estimators[i], 'max_depth': max_depth[j], 'min_samples_split': 5,
                 'learning_rate': 0.01, 'loss': 'ls'}

        clf_left = ensemble.GradientBoostingRegressor(**params)
        clf_right = ensemble.GradientBoostingRegressor(**params)

        clf_left.fit(Xleft_train,yleft_train)
        clf_right.fit(Xright_train,yright_train)

        yleft_predict = clf_left.predict(Xleft_test).round(3)
        yright_predict = clf_right.predict(Xright_test).round(3)

        yleft_refined = refine_data(tleft_test,aleft_test,yleft_predict)
        yright_refined = refine_data(tright_test,aright_test,yright_predict)
           
        mse_1,r2_1 = get_metrics(yleft_test,yleft_predict)
        mse_2,r2_2 = get_metrics(yright_test,yright_predict)
    
    
        mse_left[i].append(mse_1)
        r2_left[i].append(r2_1)

        mse_right[i].append(mse_2)
        r2_right[i].append(r2_2)

        yleft_[i].append(yleft_refined)
        yright_[i].append(yright_refined)

yleft_ = np.array(yleft_)
yright_ = np.array(yright_)
    
mse_left = np.array(mse_left)
mse_right = np.array(mse_right)

r2_left = np.array(r2_left)
r2_right = np.array(r2_right)

In [None]:
r2_left

In [None]:
r2_right

In [None]:
mse_left

In [None]:
mse_right

In [None]:
yleft_[1][1]

In [None]:
yleft_test