# Purpose
This notebook serves as an example of how the modeling with representations function ought to be implemented.

In [46]:
#Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import torch
%matplotlib inline

In [47]:
data = pd.read_csv('2017_mens_data_melted.csv')
data.sample(10, random_state=42)

Unnamed: 0,Userid,Name,Age,Height_inches,Weight_lbs,Back Squat,Clean_and_Jerk,Snatch,Deadlift,Fight Gone Bad,Workout,Score
14379,239451,Nick Poulin,30,68.0,185.0,385.0,305.0,225.0,485.0,420.0,17.5,641.0
13282,116663,Alexander Mercieca,38,67.0,230.0,560.0,355.0,285.0,675.0,358.0,17.5,718.0
15361,436473,Han SooJung,27,71.0,200.0,385.0,270.0,225.0,385.0,387.0,17.5,571.0
1683,66841,Brock Harling,31,68.0,195.0,435.0,285.0,230.0,450.0,330.0,17.2,119.0
10038,464993,Daniel Coia,38,69.0,172.0,360.0,260.0,185.0,415.0,297.0,17.4,190.0
4257,349434,Chase Comrie,27,73.0,175.0,340.0,260.0,210.0,355.0,316.0,17.2,129.0
1720,70896,Adam Downes,36,70.0,176.0,286.0,198.0,154.0,374.0,282.0,17.2,86.0
357,7209,David Neyens,33,71.0,195.0,395.0,305.0,235.0,475.0,422.0,17.2,156.0
10911,1787,James Bannon,35,74.0,212.0,375.0,290.0,205.0,500.0,299.0,17.5,950.0
7685,108233,Christopher W Taylor,45,70.0,180.0,280.0,220.0,175.0,360.0,346.0,17.4,186.0


## Convert workout to representation 

In [96]:
# convert workout to representation HERE
def convert_workout_to_representation(df):
    """
    This is a placeholder function which just keeps the last character of the workout string, 
    ofcourse, we would want to develop this further by adding the movements, and embeddings
    """
    workout_col = df['Workout'].astype(str)
    df['workout_num'] = workout_col.apply(lambda x: int(x[-1]))
    df = df.drop('Workout', axis=1)
    return df

data_transformed = convert_workout_to_representation(data)
data_transformed.head()

Unnamed: 0,Userid,Name,Age,Height_inches,Weight_lbs,Back Squat,Clean_and_Jerk,Snatch,Deadlift,Fight Gone Bad,Score,workout_num
0,86,Justin Bergh,34,77.0,231.0,335.0,265.0,210.0,415.0,393.0,92.0,2
1,1620,Everette Sweeting,28,72.0,240.0,420.0,295.0,225.0,455.0,286.0,78.0,2
2,1624,Troy Gordon,37,74.0,198.0,297.0,231.0,169.0,352.0,398.0,128.0,2
3,1633,Adam Eidson,42,65.0,145.0,330.0,250.0,198.0,375.0,314.0,89.0,2
4,1636,Bart Boyd,36,70.0,200.0,400.0,285.0,215.0,485.0,438.0,182.0,2


## Run Model
(this has been mainly copied over from Neel's notebook, but we can proceed with any modeling pipeline)

In [105]:
X = data_transformed.drop(['Score','Name','Userid'], axis = 1)
y = data_transformed[['Score']]


np.random.seed(42)
train_idx = np.random.choice(X.index, round(len(X) * 0.8), replace=False)
test_idx = X.index[~X.index.isin(train_idx)]

In [110]:
#scale the X features using sklearn

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score,mean_absolute_error

# Assuming 'combined_df' is your DataFrame and '17.5_score' is the target column
features = data.drop('Score', axis=1)
target = data['Score']

scaler = StandardScaler()
X = scaler.fit_transform(X)


In [111]:
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt



In [112]:
X_torch = torch.from_numpy(X).type(torch.float32)
#X_torch = torch.from_numpy(X.values).type(torch.float32)
y_torch = torch.from_numpy(y.values).type(torch.float32)
X_train, X_test, y_train, y_test = X_torch[train_idx], X_torch[test_idx], y_torch[train_idx], y_torch[test_idx]





In [113]:
#Create model and instantiate

class WOD_model(nn.Module):
    def __init__(self,input_features, output_features, hidden_units):
        super().__init__()
        
        #Create the layers
        self.model = nn.Sequential(
        nn.Linear(in_features = input_features, out_features = hidden_units),
        nn.GELU(),
        nn.Linear(in_features = hidden_units, out_features = hidden_units),
        nn.GELU(),
        nn.Linear(in_features = hidden_units, out_features = hidden_units),
        nn.GELU(),
        #nn.Dropout(p=0.2),
        nn.Linear(in_features = hidden_units, out_features = hidden_units),
        nn.GELU(),
        nn.Linear(in_features = hidden_units, out_features = hidden_units),
        nn.GELU(),
        nn.Linear(in_features = hidden_units, out_features = hidden_units),
        nn.GELU(),
        nn.Linear(in_features = hidden_units, out_features = output_features),
        )
        
    def forward(self,x):
        return self.model(x)
        
#Instantiate model
model_V0 = WOD_model(input_features = X.shape[1], output_features = 1, hidden_units = 16)

In [114]:
model_V0.state_dict
loss_fn = nn.L1Loss()
optimizer = torch.optim.Adam(params = model_V0.parameters(), lr = 0.03)


In [115]:
# add data to DataLoader
dataset = TensorDataset(X_train, y_train)
batch_size = 32  # You can adjust this batch size as per your requirement
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

torch.manual_seed(42)

# Training loop
num_epochs = 1  # You can adjust the number of epochs as per your requirement
for epoch in range(num_epochs):
    total_loss = 0
    for batch_X, batch_y in train_loader:
        # Forward pass
        outputs = model_V0(batch_X)
        
        # Calculate the loss
        loss = loss_fn(outputs, batch_y)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    # Print average loss for the epoch
    test_outputs = model_V0(X_test)
    test_loss = loss_fn(test_outputs, y_test)
    print(f"Epoch {epoch+1}, Average Train Loss: {total_loss/len(train_loader)}, Test Loss: {test_loss.item()}")

    
    

Epoch 1, Average Train Loss: 96.01246963445016, Test Loss: 72.05290222167969


In [116]:
from torchmetrics.regression import MeanAbsoluteError

mae_fn = MeanAbsoluteError()

mae_fn(test_outputs,y_test)

tensor(72.0529, grad_fn=<SqueezeBackward0>)

In [118]:
actual = test_outputs.detach().numpy().flatten()
preds = y_test.detach().numpy().flatten()
pd.DataFrame({'workout': data.loc[test_idx]['Workout'],'actual': actual, 'preds': preds}).sample(10)

Unnamed: 0,workout,actual,preds
6150,17.4,163.600922,184.0
15255,17.5,801.113525,915.0
1798,17.2,120.350525,178.0
14416,17.5,676.884766,605.0
7939,17.4,159.905121,187.0
7980,17.4,161.812653,198.0
1750,17.2,98.187531,107.0
14878,17.5,1041.109009,1693.0
7083,17.4,181.788055,180.0
2371,17.2,111.257507,159.0
