In [584]:
#Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import torch
%matplotlib inline

In [585]:
combined_df = pd.read_csv('Mens_Crossfit_data_cleaned.csv')

In [586]:
combined_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5445 entries, 0 to 5444
Data columns (total 17 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Userid          5445 non-null   int64  
 1   Name            5445 non-null   object 
 2   Age             5445 non-null   int64  
 3   Height_inches   5445 non-null   float64
 4   Weight_lbs      5445 non-null   float64
 5   Back Squat      5445 non-null   float64
 6   Clean_and_Jerk  5445 non-null   float64
 7   Snatch          5445 non-null   float64
 8   Deadlift        5445 non-null   float64
 9   Fight Gone Bad  5445 non-null   float64
 10  17.1_time       5445 non-null   float64
 11  17.1_reps       5445 non-null   int64  
 12  17.2_score      5445 non-null   float64
 13  17.3_time       5445 non-null   float64
 14  17.3_reps       5445 non-null   int64  
 15  17.4_score      5445 non-null   float64
 16  17.5_score      5445 non-null   int64  
dtypes: float64(11), int64(5), object(

In [587]:
combined_df.columns

Index(['Userid', 'Name', 'Age', 'Height_inches', 'Weight_lbs', 'Back Squat',
       'Clean_and_Jerk', 'Snatch', 'Deadlift', 'Fight Gone Bad', '17.1_time',
       '17.1_reps', '17.2_score', '17.3_time', '17.3_reps', '17.4_score',
       '17.5_score'],
      dtype='object')

In [588]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score,mean_absolute_error

#Let's create our training and testing data

#Get rid of the names and userid as these will not help when modeling!
combined_df.drop(['Userid','Name'], axis = 1, inplace = True)
combined_df

Unnamed: 0,Age,Height_inches,Weight_lbs,Back Squat,Clean_and_Jerk,Snatch,Deadlift,Fight Gone Bad,17.1_time,17.1_reps,17.2_score,17.3_time,17.3_reps,17.4_score,17.5_score
0,34,77.0,231.0,335.0,265.0,210.0,415.0,393.0,1200.0,218,92.0,1440.0,56,178.0,920
1,28,72.0,240.0,420.0,295.0,225.0,455.0,286.0,1200.0,221,78.0,1440.0,88,173.0,968
2,37,74.0,198.0,297.0,231.0,169.0,352.0,398.0,878.0,225,128.0,1440.0,67,186.0,794
3,42,65.0,145.0,330.0,250.0,198.0,375.0,314.0,1200.0,178,89.0,1440.0,80,176.0,947
4,36,70.0,200.0,400.0,285.0,215.0,485.0,438.0,842.0,225,182.0,1440.0,104,219.0,563
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5440,48,72.0,207.0,220.0,176.0,132.0,220.0,230.0,1200.0,186,78.0,1440.0,31,160.0,1624
5441,31,68.0,163.0,282.0,187.0,132.0,359.0,282.0,860.0,225,123.0,1440.0,80,201.0,536
5442,36,74.0,212.0,405.0,310.0,250.0,465.0,419.0,804.0,225,167.0,1440.0,130,217.0,565
5443,28,67.0,155.0,315.0,220.0,165.0,405.0,305.0,995.0,225,136.0,1440.0,80,197.0,688


In [589]:
#Let's predict our 17.5 score 

X = combined_df.drop('17.5_score', axis = 1)
y = combined_df['17.5_score']


### FEATURE SCALING HERE!

In [590]:
#scale the X features using sklearn

from sklearn.preprocessing import StandardScaler

# Assuming 'combined_df' is your DataFrame and '17.5_score' is the target column
features = combined_df.drop('17.5_score', axis=1)
target = combined_df['17.5_score']

scaler = StandardScaler()
X = scaler.fit_transform(X)


In [591]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2, random_state = 42)

model = RandomForestRegressor(n_estimators = 100, random_state = 42)

model.fit(X_train,y_train)

In [592]:
y_preds = model.predict(X_test)

# view the predictions
pd.DataFrame({'actual': y_test/60, 'preds': y_preds/60}).head(10)

Unnamed: 0,actual,preds
1468,9.45,11.738833
4766,12.25,13.107167
410,9.1,9.567
3478,11.883333,11.711833
1957,11.95,11.006667
907,12.65,13.327333
1231,14.466667,12.189333
4811,10.483333,11.7835
2977,15.666667,18.038333
530,13.766667,13.239


In [593]:
r2_score(y_test,y_preds)

0.7057817856732677

In [594]:
#Calculate MAE
mae = mean_absolute_error(y_test,y_preds)
mae

113.30373737373738

### PyTorch Approach

In [595]:
!pip install torchmetrics



In [596]:
import torch
from torch import nn
import matplotlib.pyplot as plt
X_torch = torch.from_numpy(X).type(torch.float32)
#X_torch = torch.from_numpy(X.values).type(torch.float32)
y_torch = torch.from_numpy(y.values).type(torch.float32)

In [597]:
X_train, X_test, y_train, y_test = train_test_split(X_torch, y_torch, test_size=0.33, random_state=42)

In [598]:
X_train.dtype,X_test.dtype,y_train.dtype,y_test.dtype

(torch.float32, torch.float32, torch.float32, torch.float32)

In [599]:
#Create model and instantiate

class WOD_model(nn.Module):
    def __init__(self,input_features, output_features, hidden_units):
        super().__init__()
        
        #Create the layers
        self.model = nn.Sequential(
        nn.Linear(in_features = input_features, out_features = hidden_units),
        nn.GELU(),
        nn.Linear(in_features = hidden_units, out_features = hidden_units),
        nn.GELU(),
        nn.Linear(in_features = hidden_units, out_features = hidden_units),
        nn.GELU(),
        #nn.Dropout(p=0.2),
        nn.Linear(in_features = hidden_units, out_features = hidden_units),
        nn.GELU(),
        nn.Linear(in_features = hidden_units, out_features = hidden_units),
        nn.GELU(),
        nn.Linear(in_features = hidden_units, out_features = hidden_units),
        nn.GELU(),
        nn.Linear(in_features = hidden_units, out_features = output_features),
        )
        
    def forward(self,x):
        return self.model(x)
        
#Instantiate model
model_V0 = WOD_model(input_features = combined_df.drop('17.5_score', axis = 1).shape[1], output_features = 1, hidden_units = 16)

In [600]:
model_V0.state_dict

<bound method Module.state_dict of WOD_model(
  (model): Sequential(
    (0): Linear(in_features=14, out_features=16, bias=True)
    (1): GELU(approximate=none)
    (2): Linear(in_features=16, out_features=16, bias=True)
    (3): GELU(approximate=none)
    (4): Linear(in_features=16, out_features=16, bias=True)
    (5): GELU(approximate=none)
    (6): Linear(in_features=16, out_features=16, bias=True)
    (7): GELU(approximate=none)
    (8): Linear(in_features=16, out_features=16, bias=True)
    (9): GELU(approximate=none)
    (10): Linear(in_features=16, out_features=16, bias=True)
    (11): GELU(approximate=none)
    (12): Linear(in_features=16, out_features=1, bias=True)
  )
)>

In [601]:
loss_fn = nn.L1Loss()
optimizer = torch.optim.Adam(params = model_V0.parameters(), lr = 0.03)

In [602]:
#Create training and testing loop

torch.manual_seed(42)

epochs = 1000

for epoch in range(epochs):
    model_V0.train()
    
    y_pred = model_V0(X_train)
    
    loss = loss_fn(y_pred.squeeze(),y_train)
    #print(f'Ypred shape:{y_pred.shape}||y_train shape{y_train.shape}')
    
    optimizer.zero_grad()
    
    loss.backward()
    
    optimizer.step()
    
    ### Testing loop
    
    model_V0.eval()
    
    with torch.inference_mode():
        test_pred = model_V0(X_test)
        test_loss = loss_fn(test_pred,y_test)
    if epoch%100 == 0:
            print(f"Epoch: {epoch}| Training Loss: {loss} | Test Loss: {test_loss}")

Epoch: 0| Training Loss: 896.1043090820312 | Test Loss: 893.8197021484375


  return F.l1_loss(input, target, reduction=self.reduction)


Epoch: 100| Training Loss: 107.65876007080078 | Test Loss: 294.8154296875
Epoch: 200| Training Loss: 105.74720764160156 | Test Loss: 297.2554931640625
Epoch: 300| Training Loss: 104.34107971191406 | Test Loss: 294.71893310546875
Epoch: 400| Training Loss: 103.07572937011719 | Test Loss: 294.3329772949219
Epoch: 500| Training Loss: 101.62520599365234 | Test Loss: 301.1510009765625
Epoch: 600| Training Loss: 99.005859375 | Test Loss: 297.81982421875
Epoch: 700| Training Loss: 100.19567108154297 | Test Loss: 311.33673095703125
Epoch: 800| Training Loss: 97.1650619506836 | Test Loss: 296.72943115234375
Epoch: 900| Training Loss: 100.62789916992188 | Test Loss: 304.8073425292969


In [603]:
from torchmetrics.regression import MeanAbsoluteError

mae_fn = MeanAbsoluteError()

mae_fn(y_pred.squeeze(),y_train)

tensor(94.2270, grad_fn=<SqueezeBackward0>)

In [604]:
y_pred.squeeze().detach

<function Tensor.detach>

In [605]:
pred = y_pred.squeeze().detach().numpy()
true_vals = y_train.detach().numpy()

In [606]:
r2_score(true_vals,pred)

0.7517985257778224

In [607]:
pd.DataFrame({'actual': true_vals, 'preds': pred}).head(10)

Unnamed: 0,actual,preds
0,1203.0,1315.053467
1,834.0,772.705933
2,626.0,753.686829
3,666.0,644.155151
4,738.0,673.749268
5,428.0,479.737427
6,1142.0,1064.13916
7,442.0,491.987335
8,815.0,796.46228
9,935.0,825.244324


In [608]:
#Take weights of neural network and use it to predict the output of all the 17.5 scores!

model_V0.eval()

with torch.inference_mode():
    _17_5_score_predictions = model_V0(X_torch)
    
_17_5_score_predictions = _17_5_score_predictions.squeeze().detach().numpy()
combined_df['17.5_score_preds'] = _17_5_score_predictions

In [609]:
combined_df

Unnamed: 0,Age,Height_inches,Weight_lbs,Back Squat,Clean_and_Jerk,Snatch,Deadlift,Fight Gone Bad,17.1_time,17.1_reps,17.2_score,17.3_time,17.3_reps,17.4_score,17.5_score,17.5_score_preds
0,34,77.0,231.0,335.0,265.0,210.0,415.0,393.0,1200.0,218,92.0,1440.0,56,178.0,920,1032.866699
1,28,72.0,240.0,420.0,295.0,225.0,455.0,286.0,1200.0,221,78.0,1440.0,88,173.0,968,963.541260
2,37,74.0,198.0,297.0,231.0,169.0,352.0,398.0,878.0,225,128.0,1440.0,67,186.0,794,786.723633
3,42,65.0,145.0,330.0,250.0,198.0,375.0,314.0,1200.0,178,89.0,1440.0,80,176.0,947,959.874146
4,36,70.0,200.0,400.0,285.0,215.0,485.0,438.0,842.0,225,182.0,1440.0,104,219.0,563,579.916443
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5440,48,72.0,207.0,220.0,176.0,132.0,220.0,230.0,1200.0,186,78.0,1440.0,31,160.0,1624,1520.761597
5441,31,68.0,163.0,282.0,187.0,132.0,359.0,282.0,860.0,225,123.0,1440.0,80,201.0,536,731.314453
5442,36,74.0,212.0,405.0,310.0,250.0,465.0,419.0,804.0,225,167.0,1440.0,130,217.0,565,560.563416
5443,28,67.0,155.0,315.0,220.0,165.0,405.0,305.0,995.0,225,136.0,1440.0,80,197.0,688,715.445496
