# Predictions for 2025 based on most successful model

In [5]:
import utils
import pandas as pd
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [6]:
# load data
data = pd.read_csv('full_data.csv')

In [7]:
# get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cpu device


In [None]:
# load model
model_path = "models/model_3.pth"
model = utils.RegressionModel(14).to(device)
model.load_state_dict(torch.load(model_path))

<All keys matched successfully>

In [9]:
# generate predictions from model
preds_2025 = pd.DataFrame({'Name': [], '2024 Plate Appearances': [], '2024 wRC+': [], '2025 Predicted wRC+': []})

data_2024 = data.loc[data['Season'] == 2024]

feature_columns = ['Age', 'wRC+', 'EV', 'LA', 'Barrel%', 'HardHit%', 
                   'BABIP', 'LD%', 'GB%', 'FB%', 'IFFB%', 'Pull%', 'Cent%', 'Oppo%']

# scale the features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(data_2024[feature_columns])

# sterate through the scaled data and generate predictions
for index, (row, scaled_row) in enumerate(zip(data_2024.iterrows(), scaled_features)):
    row_data = row[1]
    pred_input = torch.tensor(scaled_row, dtype=torch.float32)

    pred_value = model(pred_input).item()
    
    pred = pd.DataFrame({
        'Name': [row_data['Name']], 
        '2024 Plate Appearances': [row_data['PA']],
        '2024 wRC+': [row_data['wRC+']],
        '2025 Predicted wRC+': [pred_value]
    })
    preds_2025 = pd.concat([preds_2025, pred], ignore_index=True)

In [10]:
# save predictions
preds_2025.to_csv('preds_2025.csv', index=False)
preds_2025.to_excel('preds_2025.xlsx', index=False)