In [1]:
import numpy as np
import pandas as pd
from scipy.stats import zscore
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.ensemble import GradientBoostingRegressor

In [2]:
df = pd.read_csv(r'C:\Users\Cameron\OneDrive\Documents\DCU_Data\MWBW full dataset 2377 participants_anonymised (1).csv')

drop_df_names = ['new_calc_bmi','VO2max','age','puntkick_total','handpass_total','groundstrike_total','balance_catch_total','run_total','skip_total','gallop_total','slide_total','hop_total','hj_total','vj_total','mnm_total','catch_total','throw_total','roll_total','kick_total','2hs_total','1hs_total','bounce_total','balance_total']
feature_names = ['age','puntkick_total','handpass_total','groundstrike_total','balance_catch_total','run_total','skip_total','gallop_total','slide_total','hop_total','hj_total','vj_total','mnm_total','catch_total','throw_total','roll_total','kick_total','2hs_total','1hs_total','bounce_total','balance_total']

drop_df = df[drop_df_names].copy()
drop_df = drop_df.dropna()
drop_df = drop_df.reset_index(drop=True)

y_var = drop_df['VO2max'].copy()
y_var = y_var.astype('float')

data_set = drop_df[feature_names].copy()
data_set = data_set.astype('float')

a = df['new_calc_bmi'].copy()
a = a.astype('float')
b = a[(a >= 10) & (a <= 40)].copy()
b = b.reset_index(drop=True)

x_var = pd.concat([b,data_set], axis=1)
x_var = x_var.astype('float')
x_var = x_var.dropna()

In [3]:
x_var = zscore(x_var, nan_policy='omit')
y_var = zscore(y_var, nan_policy='omit')

In [4]:
feature_names_fix = ["new_calc_bmi"]
new_feature_names = feature_names_fix + feature_names

In [5]:
X_t, X_remain, y_t, y_remain = train_test_split(x_var, y_var, test_size=0.3)
X_test, X_val, y_test, y_val = train_test_split(X_remain, y_remain, test_size=0.5)

X_train = np.concatenate((X_t, X_val))
y_train = np.concatenate((y_t, y_val))

X_train = pd.DataFrame(X_train, columns = new_feature_names)
y_train = pd.DataFrame(y_train, columns = ['VO2max'] )

model = LinearRegression().fit(X_train,y_train)

In [6]:
y_test = y_test.reset_index(drop=True)

y_hat = model.predict(X_test)

y_hat_data = pd.DataFrame(y_hat, columns = ['predicted_VO2max'])

linear_result = pd.concat([y_hat_data, y_test], axis=1)

mean_absolute = mean_absolute_error(y_test, y_hat_data)

print('Linear Regression\n'
      '-------------------------')

print(f'The mean absolute error is: {mean_absolute}')

r2 = r2_score(y_test, y_hat_data)

print(f'The R2 score for the model is: {r2}')

linear_result

Linear Regression
-------------------------
The mean absolute error is: 0.7355442561122202
The R2 score for the model is: 0.2264981888492561


Unnamed: 0,predicted_VO2max,VO2max
0,0.888241,1.352447
1,0.159510,0.362718
2,0.462527,-0.548919
3,0.463412,-0.319736
4,0.013365,-0.447060
...,...,...
132,-0.002135,0.768456
133,-0.529641,-0.739055
134,0.049353,-0.941075
135,0.301708,0.695457


In [7]:
numpy_y_train = y_train.to_numpy(dtype=float, copy=True)

numpy_y_train = np.ravel(numpy_y_train)

model = SVR(kernel='rbf').fit(X_train, numpy_y_train)

y_hat = model.predict(X_test)

y_hat_data = pd.DataFrame(y_hat, columns = ['predicted_VO2max'])

support_vector_regression = pd.concat([y_hat_data, y_test], axis=1)

mean_absolute = mean_absolute_error(y_test, y_hat_data)

print('Support Vector Regression\n'
      '-------------------------')

print(f'The mean absolute error is: {mean_absolute}')

r2 = r2_score(y_test, y_hat_data)

print(f'The R2 score for the model is: {r2}')

support_vector_regression

Support Vector Regression
-------------------------
The mean absolute error is: 0.7472551122370724
The R2 score for the model is: 0.22240895055576937


Unnamed: 0,predicted_VO2max,VO2max
0,0.544133,1.352447
1,0.509986,0.362718
2,0.249219,-0.548919
3,0.598020,-0.319736
4,-0.139587,-0.447060
...,...,...
132,-0.019498,0.768456
133,-1.097252,-0.739055
134,-0.214003,-0.941075
135,0.207306,0.695457


In [8]:
numpy_y_train = y_train.to_numpy(dtype=float, copy=True)

numpy_y_train = np.ravel(numpy_y_train)

model = GradientBoostingRegressor().fit(X_train, numpy_y_train)

y_hat_data = pd.DataFrame(y_hat, columns = ['predicted_VO2max'])

gradient_regressor_results = pd.concat([y_hat_data, y_test], axis=1)

mean_absolute = mean_absolute_error(y_test, y_hat_data)

print('Gradient Boosting Regressor\n'
      '---------------------------')

print(f'The mean absolute error is: {mean_absolute}')

r2 = r2_score(y_test, y_hat_data)

print(f'The R2 score for the model is: {r2}')

gradient_regressor_results

Gradient Boosting Regressor
---------------------------
The mean absolute error is: 0.7472551122370724
The R2 score for the model is: 0.22240895055576937


Unnamed: 0,predicted_VO2max,VO2max
0,0.544133,1.352447
1,0.509986,0.362718
2,0.249219,-0.548919
3,0.598020,-0.319736
4,-0.139587,-0.447060
...,...,...
132,-0.019498,0.768456
133,-1.097252,-0.739055
134,-0.214003,-0.941075
135,0.207306,0.695457
