# V02 Max Model Experiments
##### Andrew Lefebvre and Adam Cabral

### Here we test multiple different regression methods and compare their results to find the best to use.

In [2]:
import pandas as pd
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV

In [3]:
test_df = pd.read_csv("./data/rawData/sbj_1_I.csv")
df = pd.read_csv("./data/concatedData/combined_csv.csv")
df.drop(['Cadence', 'Power'], axis=1, inplace=True)
test_df.drop(['Cadence', 'Power'], axis=1, inplace=True)

In [4]:
df.head()

Unnamed: 0,time,Oxygen,HR,RF
0,2,742.4,73.2,16.8
1,3,737.5,73.5,16.0
2,4,729.261905,73.642857,15.404762
3,5,718.9375,73.6875,14.9375
4,6,707.222222,73.666667,14.555556


In [5]:
df.describe()

Unnamed: 0,time,Oxygen,HR,RF
count,194331.0,194331.0,194331.0,194331.0
mean,1209.727774,2313.769284,127.814212,29.158271
std,764.841905,1272.320962,35.887111,9.604156
min,0.0,105.273333,45.394286,5.927273
25%,529.0,1096.75,101.0,22.3
50%,1168.0,2282.083333,129.9,28.2
75%,1843.0,3307.95,157.55,35.05
max,3133.0,5988.45,204.6,172.3


In [6]:
train_feat_df = df.drop(['Oxygen'], axis=1)
train_targ_df = df['Oxygen']

test_feat_df = test_df.drop(['Oxygen'], axis=1)
test_targ_df = test_df['Oxygen']

## Models to be tested:
##### 1. KNN Regression


In [None]:
from sklearn.neighbors import KNeighborsRegressor

KNN_Parameters = {'n_neighbors': [i for i in range(1,100)]}
neigh_regr = GridSearchCV(KNeighborsRegressor(), KNN_Parameters, verbose = 1, n_jobs = 4)
neigh_regr.fit(train_feat_df, train_targ_df)


In [None]:
test_pred_targ = neigh_regr.predict(test_feat_df)
print("Predicted Max Oxygen: ", test_pred_targ.max(), "Actual Max Oxygen: ", test_targ_df.max())

In [None]:
r2_score(test_pred_targ,test_targ_df)

##### 2. Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression
lin_regr = LinearRegression(n_jobs = 4)
lin_regr.fit(train_feat_df, train_targ_df)

In [None]:
LinearR_pred_targ = lin_regr.predict(test_feat_df)
print("Predicted Max Oxygen: ", LinearR_pred_targ.max(), "Actual Max Oxygen: ", test_targ_df.max())

In [None]:
r2_score(LinearR_pred_targ,test_targ_df)

##### 3. Support Vector Machine

In [None]:
from sklearn import svm
# svm_regr = svm.SVR()
svm_parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
 
svm_regr = GridSearchCV(svm.SVR(), svm_parameters, verbose = 3, n_jobs = 4)
svm_regr.fit(train_feat_df, train_targ_df)

Fitting 5 folds for each of 4 candidates, totalling 20 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


In [None]:
svm_pred = smv_regr.predict(test_feat_df)
print("Predicted Max Oxygen: ", svm_pred.max(), "Actual Max Oxygen: ", test_targ_df.max())

In [None]:
r2_score(svm_pred,test_targ_df)

##### 4. MLP Regressor

In [7]:
from sklearn.neural_network import MLPRegressor

In [9]:
mlp_regr = MLPRegressor(random_state=1, max_iter=300, solver='sgd', verbose=True).fit(train_feat_df, train_targ_df)

Iteration 1, loss = 363788952421113387989618571150767423488.00000000
Iteration 2, loss = 410564087682727804557295254415427502080.00000000
Iteration 3, loss = 410560094856526213342280693155614425088.00000000
Iteration 4, loss = 410556102069156301705516457873568169984.00000000
Iteration 5, loss = 410552109320615878468954497053909581824.00000000
Iteration 6, loss = 410548116610906983694915410383370977280.00000000
Iteration 7, loss = 410544123940026217279531531717398495232.00000000
Iteration 8, loss = 410540131307977734905307786343780188160.00000000
Iteration 9, loss = 410536138714757003100420619403110907904.00000000
Iteration 10, loss = 410532146160365986369326904667799552000.00000000
Iteration 11, loss = 410528153644802342418251138793820127232.00000000
Iteration 12, loss = 410524161168067506846604114153317597184.00000000
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.


In [10]:
mlp_pred = mlp_regr.predict(test_feat_df)
print("Predicted Max Oxygen: ", mlp_pred.max(), "Actual Max Oxygen: ", test_targ_df.max())

Predicted Max Oxygen:  2324.72923456801 Actual Max Oxygen:  4575.6


In [11]:
r2_score(mlp_pred,test_targ_df)

-9.067865749129566e+30