In [None]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt

from sklearn.metrics import (classification_report, matthews_corrcoef,
                            confusion_matrix, ConfusionMatrixDisplay,
                            mean_squared_error, explained_variance_score)

from sklearn.linear_model import LinearRegression, LogisticRegression, Lasso, Ridge
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.model_selection import KFold, LeaveOneOut, train_test_split

from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression

from sklearn.model_selection import cross_val_score
from itertools import combinations
from time import time

import seaborn as sns

In [None]:
data = pd.read_csv("parkinsons_updrs.data")

In [None]:
data.info()

In [None]:
data.columns

In [None]:
#Separate X from y
X = data.drop(columns=["total_UPDRS", "motor_UPDRS"])
y = data["motor_UPDRS"]

In [None]:
#Separate Training set from Independent Validations Set
(X_train, X_IND, 
 y_train, y_IND) = train_test_split(X, y, test_size=361, random_state=361)

In [None]:
#join X and y from the training set
data_train = pd.concat((X_train, y_train), axis=1 )

In [None]:
data_train.head()

In [None]:
data_train[data_train["subject#"]==20].plot(x="test_time", y = "motor_UPDRS", ls="", marker="o")

In [None]:
data_train.columns

In [None]:
data_train.sex.unique()

In [None]:
sns.boxplot(data=data_train, x="sex", y="motor_UPDRS")

In [None]:
X_train = X_train.drop(columns=["subject#", "test_time"])

In [None]:
def get_model_statistics(model, X, y):
    kf = KFold(n_splits=16)
    
    #statistic arrays
    pearse = []
    rmse = []
    rve = []
    max_err = []
    
    #Get the train/test folds
    for train_index, test_index in kf.split(X):
        #Create train/test sets
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        
        #fit model
        temp_model = model.fit(X_train, y_train)
        
        #get predictions
        preds = temp_model.predict(X_test)
        
        #get statistics
        pearse.append( np.corrcoef(y_test, preds)[0,1] )
        rmse.append( np.sqrt(mean_squared_error(y_test, preds)) )
        rve.append( explained_variance_score(y_test, preds) )
        max_err.append( np.abs(y_test - preds).max() )
    
    #return statistics estimations
    return (
         np.mean(pearse),
         np.mean(rmse),
         np.mean(rve),
         np.max(max_err)
       )

In [None]:
get_model_statistics(Ridge(), X_train, y_train)

In [None]:
for train, test in KFold(n_splits=16).split(X_train):
    print(train)
    break