In [None]:
import pandas as pd
import torch
from model import RegressionRunner


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
league =  'BZ'
season = 2016
positions = [9,8,7,6,5,4,3,2]

In [3]:
feature_values = {
    9: ['Age', 'WT', 'SPE', 'OFRngDelta', 'OFArmDelta', 'OFERRDelta', 'RF'],
    8: ['Age', 'WT', 'SPE', 'OFRngDelta', 'OFArmDelta', 'OFERRDelta', 'CF'],
    7: ['Age', 'WT', 'SPE', 'OFRngDelta', 'OFArmDelta', 'OFERRDelta', 'RF'],
    6: ['Age', 'WT', 'SPE', 'IFRngDelta', 'IFArmDelta', 'IFTDPDelta', 'IFErrDelta', 'SS'],
    5: ['Age', 'WT', 'SPE', 'IFRngDelta', 'IFArmDelta', 'IFTDPDelta', 'IFErrDelta', '3B'],
    4: ['Age', 'WT', 'SPE', 'IFRngDelta', 'IFArmDelta', 'IFTDPDelta', 'IFErrDelta', '2B'],
    3: ['Age', 'WT', 'SPE', 'IFRngDelta', 'IFArmDelta', 'IFTDPDelta', 'IFErrDelta', '1B'],
    2: ['Age', 'WT', 'SPE', 'CBLKDelta', 'CARMDelta', 'CFRMDelta', 'C'],
}

targets = {
    9: ['runsPAdjSeason'],
    8: ['runsPAdjSeason'],
    7: ['runsPAdjSeason'],
    6: ['runsPAdjSeason'],
    5: ['runsPAdjSeason'],
    4: ['runsPAdjSeason'],
    3: ['runsPAdjSeason'],
    2: ['runsPAdjSeason'],
}

In [4]:
def convert_fielding_position(pos: int) -> str:
    if pos in ['P']:
        return 1
    elif pos in ['C']:
        return 2
    elif pos in ['1B']:
        return 3
    elif pos in ['2B']:
        return 4
    elif pos in ['3B']:
        return 5
    elif pos in ['SS']: 
        return 6
    elif pos in ['LF']:
        return 7
    elif pos in ['CF']:
        return 8
    elif pos in ['RF']:
        return 9
    elif pos in ['DH']:
        return 0

In [5]:
# load fielding dataset from csv
fielding = pd.read_csv(f"./files/{league}/{season}/output/{league}-{season}-fielding.csv")
player_data = pd.read_csv(f"./files/{league}/{season}/output/{league}-{season}-player-data.csv")
player_data['WT'] = player_data['WT'].apply(lambda x: int(x[:3]))
#fielding['POS'] = fielding['POS'].apply(convert_fielding_position)
with pd.option_context('future.no_silent_downcasting', True):
    fielding.replace('-', 0 , inplace=True)
    player_data.replace('-', 0 , inplace=True)



In [None]:
if torch.cuda.is_available():
    device = torch.device('cuda')
    print('CUDA is available! Using GPU.')
else:
    device = torch.device('cpu')
    print('CUDA is not available. Using CPU.')

In [None]:
#combine fielding and player data
master_data = fielding.merge(player_data, on='ID')
master_data = master_data[master_data['IPClean'] > 299]
# test for cuda

for position in positions:

    print(f"\r\nTraining model for position {position}")

    # create a dataset with a subset of the columns
    filtered_master_data_rows = master_data[master_data['POS'] == position]
    fielding_training_data_all = filtered_master_data_rows[feature_values[position] + targets[position]]
    for column in feature_values[position]:
        fielding_training_data_all = fielding_training_data_all.astype({column: 'float'})
    for column in targets[position]:
        fielding_training_data_all = fielding_training_data_all.astype({column: 'float'})


    fielding_runner = RegressionRunner(feature_values[position])
    fielding_runner.load_data(fielding_training_data_all, targets[position][0])

    epoch_count, loss = fielding_runner.train(50000)
    print(f"Epoch [{epoch_count}/{50000}], Loss: {loss:.7f}")

    test_loss = fielding_runner.evaluate()
    print(f"Test Mean Squared Error: {test_loss:.7f}")

    feature_importance = fielding_runner.feature_importance(feature_values[position])
    print(feature_importance)



Training model for position 9
Epoch [1786/50000], Loss: 0.0000098
Epoch [1785/50000], Loss: 0.0000098
Test Mean Squared Error: 608.8502197
   mean_abs_shap  stdev_abs_shap        name
3       8.167488        7.047666  OFRngDelta
6       7.225529        5.803659          RF
1       5.616851        5.302406          WT
2       5.591594        4.761744         SPE
0       5.229318        4.580750         Age
4       4.829389        6.054887  OFArmDelta
5       4.288818        4.006687  OFERRDelta

Training model for position 8
Epoch [1755/50000], Loss: 0.0000098
Epoch [1754/50000], Loss: 0.0000098
Test Mean Squared Error: 214.8876343
   mean_abs_shap  stdev_abs_shap        name
2       6.179523        5.366213         SPE
6       3.985461        3.994831          CF
0       3.497755        3.544687         Age
4       3.375534        3.018150  OFArmDelta
1       2.995937        2.354654          WT
5       2.568849        2.247817  OFERRDelta
3       2.540349        3.467644  OFRngDelta
