## Import Data

In [1]:
# helper.py
from helper import *

# set up GPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

In [2]:
# load data
df = pd.read_csv('./data/clean/features.csv')

# drop all aggregated cols
df = df.drop(columns=[col for col in df.columns if 'mean' in col or 'std' in col])

# check
df.shape, len(set(df.player))

((1099, 66), 237)

# Format Data

In [5]:
# non-feature columns
non_feat_cols = ['player', 'team_name', 'year', 'target']
feat_cols = [c for c in df.columns if c not in non_feat_cols]

# split df into train/val by player to avoid leakage
players = df['player'].unique()
train_players, val_players = train_test_split(players, test_size=0.2, random_state=SEED)
df_train = df[df['player'].isin(train_players)].copy()
df_val = df[df['player'].isin(val_players)].copy()

# scale features
scaler = StandardScaler()
df_train[feat_cols] = scaler.fit_transform(df_train[feat_cols])
df_val[feat_cols] = scaler.transform(df_val[feat_cols])

# recombine so our create_player_histories sees scaled data
df_scaled = pd.concat([df_train, df_val], ignore_index=True)

# build sequences
X_pad, y, lengths, mask, players = create_sequences(df_scaled)

# split train/val
idx = torch.arange(len(y))
train_idx = idx[[p in train_players for p in players]]
val_idx = idx[[p in val_players for p in players]]
X_train, X_val = X_pad[train_idx], X_pad[val_idx]
len_train, len_val = lengths[train_idx], lengths[val_idx]
y_train, y_val = y[train_idx], y[val_idx]

# show shapes
print(f'X_train: {X_train.shape}, y_train: {y_train.shape}, len_train: {len_train.shape}')
print(f'X_val: {X_val.shape}, y_val: {y_val.shape}, len_val: {len_val.shape}')

X_train: torch.Size([921, 18, 62]), y_train: torch.Size([921]), len_train: torch.Size([921])
X_val: torch.Size([178, 18, 62]), y_val: torch.Size([178]), len_val: torch.Size([178])


# Train RNN

In [10]:
# create dataset and dataloader
train_ds = SeqDataset(X_train, len_train, y_train)
val_ds = SeqDataset(X_val, len_val, y_val)
train_dl = DataLoader(train_ds, batch_size=32, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=32)

# instantiate
model = RNNRegressor(in_dim=X_pad.size(2), hidden_dim=int(X_pad.size(2) * 2)).to(device)
opt = torch.optim.AdamW(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()

# train model
train_rnn(model, opt, criterion, train_dl, val_dl, train_ds, device)

Epoch 0 | Train MSE: 4267.791 | Val RMSE: 58.822, R²: -14.303
Epoch 10 | Train MSE: 994.716 | Val RMSE: 28.055, R²: -2.481
Epoch 20 | Train MSE: 303.410 | Val RMSE: 17.063, R²: -0.288
Epoch 30 | Train MSE: 225.602 | Val RMSE: 16.173, R²: -0.157
Epoch 40 | Train MSE: 216.789 | Val RMSE: 16.137, R²: -0.152
Epoch 50 | Train MSE: 213.149 | Val RMSE: 15.999, R²: -0.132
Epoch 60 | Train MSE: 210.608 | Val RMSE: 15.947, R²: -0.125
Epoch 70 | Train MSE: 208.560 | Val RMSE: 15.913, R²: -0.120
Epoch 80 | Train MSE: 207.085 | Val RMSE: 15.929, R²: -0.122
Epoch 90 | Train MSE: 203.041 | Val RMSE: 15.811, R²: -0.106
Epoch 100 | Train MSE: 166.837 | Val RMSE: 14.985, R²: 0.007
Epoch 110 | Train MSE: 141.330 | Val RMSE: 15.151, R²: -0.015
Epoch 120 | Train MSE: 125.794 | Val RMSE: 15.532, R²: -0.067
Epoch 130 | Train MSE: 113.405 | Val RMSE: 15.801, R²: -0.104
Epoch 140 | Train MSE: 100.038 | Val RMSE: 16.141, R²: -0.152
Epoch 150 | Train MSE: 85.349 | Val RMSE: 16.720, R²: -0.236
Epoch 160 | Train M

- This val RMSE is noticeably worse than the [XGBoost](./preds.ipynb) that achieved a val RMSE of 12.22.