In [1]:
import matplotlib.pyplot as plt
from pycaret.datasets import get_data
from pycaret.regression import *
from pandas import DataFrame
from itertools import combinations
from math import comb

## An issue with time to run
- Takes ~35 minutes to run ${14 \choose 1} = 14$ times, getting only 3 models instead of all
  - ${35 \over 14} = 2.5$ minutes per run
- So, to run ${14 \choose 3} = 364$ times would take ${364 \times 2.5} = 910$ minutes
- If I want to include all cases, could take up to $5500$ minutes (running for all models once generally takes about 15 minutes)

In [2]:
df = get_data('data/initial_features')

df[['weight', 'pace']] = df['experimental_condition'].str.split('-', expand=True)

df['weight'] = df['weight'].str.replace('Condition ', '').astype(float)
df['pace'] = df['pace'].astype(int)

df['rpe'] = df['rpe'].fillna(method='ffill')
df['rpe'] = df['rpe'].astype(int)


COMBINATIONS = 3
combinations_list = list(combinations(df['subject'].unique(), COMBINATIONS))

# comb(14, 3)

In [3]:
models_df = DataFrame({
    "Model": [], "MAE": [], "MSE": [], "RMSE": [], "R2": [], "RMSLE": [], "MAPE": [], "TT (Sec)": [], "Training Subjects": []
})

In [26]:
for sub in combinations_list:
    train = df[~df['subject'].isin(sub)]
    test = df[df['subject'].isin(sub)]

    reg = setup(data=train, target='rpe')
    best = compare_models(sort='MAE', include=['rf', 'gbr', 'dt'])
    all = pull()

    # Stored in separate df?
    # pred = predict_model(best, data=test)

    for index, row in all.iterrows():
        models_df = models_df.append({
            "Model": row['Model'],
            "MAE": row['MAE'],
            "MSE": row['MSE'],
            "RMSE": row['RMSE'],
            "R2": row['R2'],
            "RMSLE": row['RMSLE'],
            "MAPE": row['MAPE'],
            "TT (Sec)": row['TT (Sec)'],
            "Training Subjects": sub
        }, ignore_index=True)

models_df
models_df.to_csv('outputs/models.csv', index=False)

# -= FOR MY COMPUTER =-
# Takes ~35 minutes to run 14 times (14 choose 1), getting only 3 models instead of all
#   --> 2.5 minutes per run
# So, to run 364 times (14 choose 3) would take 364*2.5=~910 minutes
# if want to include all cases, could take up to 5500 minutes (running for all models once generally takes about 15 minutes)

Unnamed: 0,Description,Value
0,Session id,1291
1,Target,rpe
2,Target type,Regression
3,Original data shape,"(2585, 61)"
4,Transformed data shape,"(2585, 64)"
5,Transformed train set shape,"(1809, 64)"
6,Transformed test set shape,"(776, 64)"
7,Numeric features,59
8,Categorical features,1
9,Preprocess,True


Processing:   0%|          | 0/17 [00:00<?, ?it/s]

Unnamed: 0,Description,Value
0,Session id,7289
1,Target,rpe
2,Target type,Regression
3,Original data shape,"(2699, 61)"
4,Transformed data shape,"(2699, 64)"
5,Transformed train set shape,"(1889, 64)"
6,Transformed test set shape,"(810, 64)"
7,Numeric features,59
8,Categorical features,1
9,Preprocess,True


Processing:   0%|          | 0/17 [00:00<?, ?it/s]

Unnamed: 0,Description,Value
0,Session id,7289
1,Target,rpe
2,Target type,Regression
3,Original data shape,"(2699, 61)"
4,Transformed data shape,"(2699, 64)"
5,Transformed train set shape,"(1889, 64)"
6,Transformed test set shape,"(810, 64)"
7,Numeric features,59
8,Categorical features,1
9,Preprocess,True


Unnamed: 0,Description,Value
0,Session id,7210
1,Target,rpe
2,Target type,Regression
3,Original data shape,"(2707, 61)"
4,Transformed data shape,"(2707, 64)"
5,Transformed train set shape,"(1894, 64)"
6,Transformed test set shape,"(813, 64)"
7,Numeric features,59
8,Categorical features,1
9,Preprocess,True


Processing:   0%|          | 0/17 [00:00<?, ?it/s]

Unnamed: 0,Description,Value
0,Session id,7210
1,Target,rpe
2,Target type,Regression
3,Original data shape,"(2707, 61)"
4,Transformed data shape,"(2707, 64)"
5,Transformed train set shape,"(1894, 64)"
6,Transformed test set shape,"(813, 64)"
7,Numeric features,59
8,Categorical features,1
9,Preprocess,True


Unnamed: 0,Description,Value
0,Session id,8899
1,Target,rpe
2,Target type,Regression
3,Original data shape,"(2696, 61)"
4,Transformed data shape,"(2696, 64)"
5,Transformed train set shape,"(1887, 64)"
6,Transformed test set shape,"(809, 64)"
7,Numeric features,59
8,Categorical features,1
9,Preprocess,True


Processing:   0%|          | 0/17 [00:00<?, ?it/s]

Unnamed: 0,Description,Value
0,Session id,8899
1,Target,rpe
2,Target type,Regression
3,Original data shape,"(2696, 61)"
4,Transformed data shape,"(2696, 64)"
5,Transformed train set shape,"(1887, 64)"
6,Transformed test set shape,"(809, 64)"
7,Numeric features,59
8,Categorical features,1
9,Preprocess,True


Unnamed: 0,Description,Value
0,Session id,1133
1,Target,rpe
2,Target type,Regression
3,Original data shape,"(2815, 61)"
4,Transformed data shape,"(2815, 64)"
5,Transformed train set shape,"(1970, 64)"
6,Transformed test set shape,"(845, 64)"
7,Numeric features,59
8,Categorical features,1
9,Preprocess,True


Unnamed: 0,Description,Value
0,Session id,4395
1,Target,rpe
2,Target type,Regression
3,Original data shape,"(2797, 61)"
4,Transformed data shape,"(2797, 64)"
5,Transformed train set shape,"(1957, 64)"
6,Transformed test set shape,"(840, 64)"
7,Numeric features,59
8,Categorical features,1
9,Preprocess,True


Processing:   0%|          | 0/17 [00:00<?, ?it/s]

Unnamed: 0,Description,Value
0,Session id,2489
1,Target,rpe
2,Target type,Regression
3,Original data shape,"(2613, 61)"
4,Transformed data shape,"(2613, 64)"
5,Transformed train set shape,"(1829, 64)"
6,Transformed test set shape,"(784, 64)"
7,Numeric features,59
8,Categorical features,1
9,Preprocess,True


Unnamed: 0,Description,Value
0,Session id,2160
1,Target,rpe
2,Target type,Regression
3,Original data shape,"(2634, 61)"
4,Transformed data shape,"(2634, 64)"
5,Transformed train set shape,"(1843, 64)"
6,Transformed test set shape,"(791, 64)"
7,Numeric features,59
8,Categorical features,1
9,Preprocess,True


Unnamed: 0,Description,Value
0,Session id,8880
1,Target,rpe
2,Target type,Regression
3,Original data shape,"(2667, 61)"
4,Transformed data shape,"(2667, 64)"
5,Transformed train set shape,"(1866, 64)"
6,Transformed test set shape,"(801, 64)"
7,Numeric features,59
8,Categorical features,1
9,Preprocess,True


Processing:   0%|          | 0/17 [00:00<?, ?it/s]

Unnamed: 0,Description,Value
0,Session id,3832
1,Target,rpe
2,Target type,Regression
3,Original data shape,"(2738, 61)"
4,Transformed data shape,"(2738, 64)"
5,Transformed train set shape,"(1916, 64)"
6,Transformed test set shape,"(822, 64)"
7,Numeric features,59
8,Categorical features,1
9,Preprocess,True


Processing:   0%|          | 0/17 [00:00<?, ?it/s]

Unnamed: 0,Description,Value
0,Session id,3670
1,Target,rpe
2,Target type,Regression
3,Original data shape,"(2613, 61)"
4,Transformed data shape,"(2613, 64)"
5,Transformed train set shape,"(1829, 64)"
6,Transformed test set shape,"(784, 64)"
7,Numeric features,59
8,Categorical features,1
9,Preprocess,True


Unnamed: 0,Description,Value
0,Session id,7260
1,Target,rpe
2,Target type,Regression
3,Original data shape,"(2774, 61)"
4,Transformed data shape,"(2774, 64)"
5,Transformed train set shape,"(1941, 64)"
6,Transformed test set shape,"(833, 64)"
7,Numeric features,59
8,Categorical features,1
9,Preprocess,True


Processing:   0%|          | 0/17 [00:00<?, ?it/s]

Unnamed: 0,Description,Value
0,Session id,7260
1,Target,rpe
2,Target type,Regression
3,Original data shape,"(2774, 61)"
4,Transformed data shape,"(2774, 64)"
5,Transformed train set shape,"(1941, 64)"
6,Transformed test set shape,"(833, 64)"
7,Numeric features,59
8,Categorical features,1
9,Preprocess,True


Unnamed: 0,Description,Value
0,Session id,6971
1,Target,rpe
2,Target type,Regression
3,Original data shape,"(2682, 61)"
4,Transformed data shape,"(2682, 64)"
5,Transformed train set shape,"(1877, 64)"
6,Transformed test set shape,"(805, 64)"
7,Numeric features,59
8,Categorical features,1
9,Preprocess,True


Processing:   0%|          | 0/17 [00:00<?, ?it/s]

Processing:   0%|          | 0/17 [00:00<?, ?it/s]