In [1]:
import pandas as pd
import torch
from model import RegressionRunner


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
league =  'BZ'
season = 2016

In [3]:
feature_values = ['Age', 'WT', 'SPE', 'SR', 'STE', 'RUN', 'BABIP', 'GAP', 'K\'s', 'BFH', 'BBT', 'GBT', 'FBT']

targets = ['BSR600']

In [4]:
def convert_bbt(bbt: str) -> int:
    if bbt in ['Line Drive']:
        return 1
    elif bbt in ['Flyball']:
        return 3
    elif bbt in ['Normal']:
        return 2
    elif bbt in ['Groundball']:
        return 0
    
def convert_gbt(gbt: str) -> int:
    if gbt == 'Ex. Pull':
        return 3
    elif gbt == 'Pull':
        return 2
    elif gbt == 'Normal':
        return 1
    elif gbt == 'Spray':
        return 0
    
def convert_fbt(fbt: str) -> int:
    if fbt in ['Pull']:
        return 2
    elif fbt in ['Normal']:
        return 1
    elif fbt in ['Spray']:
        return 0

In [5]:
# load fielding dataset from csv
hitting = pd.read_csv(f"./files/{league}/{season}/output/{league}-{season}-hitting.csv")
player_data = pd.read_csv(f"./files/{league}/{season}/output/{league}-{season}-player-data.csv")
player_data['WT'] = player_data['WT'].apply(lambda x: int(x[:3]))
player_data['BBT'] = player_data['BBT'].apply(convert_bbt)
player_data['GBT'] = player_data['GBT'].apply(convert_gbt)
player_data['FBT'] = player_data['FBT'].apply(convert_fbt)
with pd.option_context('future.no_silent_downcasting', True):
    hitting.replace('-', 0 , inplace=True)
    player_data.replace('-', 0 , inplace=True)



In [6]:
if torch.cuda.is_available():
    device = torch.device('cuda')
    print('CUDA is available! Using GPU.')
else:
    device = torch.device('cpu')
    print('CUDA is not available. Using CPU.')

CUDA is available! Using GPU.


In [7]:
#combine fielding and player data
master_data = hitting.merge(player_data, on='ID')
master_data = master_data[master_data['PA'] > 99]
# test for cuda


print(f"\r\nTraining model for BSR")
filtered_data = master_data[feature_values + targets]

# create a dataset with a subset of the columns
#fielding_training_data_all = filtered_master_data_rows[feature_values[position] + targets[position]]
for column in feature_values:
    filtered_data = filtered_data.astype({column: 'float'})
for column in targets:
    filtered_data = filtered_data.astype({column: 'float'})

fielding_runner = RegressionRunner(feature_values)
fielding_runner.load_data(filtered_data, targets[0])

epoch_count, loss = fielding_runner.train(50000)
print(f"Epoch [{epoch_count}/{50000}], Loss: {loss:.7f}")

test_loss = fielding_runner.evaluate()
print(f"Test Mean Squared Error: {test_loss:.7f}")

feature_importance = fielding_runner.feature_importance(feature_values)
print(feature_importance)



Training model for BSR
Epoch [2000/50000], Loss: 0.0029662
Epoch [4000/50000], Loss: 0.0013627
Epoch [6000/50000], Loss: 0.0006320
Epoch [8000/50000], Loss: 0.0002935
Epoch [10000/50000], Loss: 0.0106598
Epoch [12000/50000], Loss: 0.0000666
Epoch [14000/50000], Loss: 0.0001038
Epoch [16000/50000], Loss: 0.0050068
Epoch [16845/50000], Loss: 0.0000099
Epoch [16844/50000], Loss: 0.0000099
Test Mean Squared Error: 17.8942986
    mean_abs_shap  stdev_abs_shap   name
3        1.481759        1.711276     SR
4        1.270876        1.351517    STE
2        1.015392        1.229790    SPE
5        0.919515        1.063753    RUN
9        0.825524        1.674642    BFH
1        0.722756        0.846392     WT
6        0.654008        0.714157  BABIP
10       0.633877        0.860772    BBT
0        0.591892        0.888410    Age
12       0.587185        0.689642    FBT
7        0.586639        0.638196    GAP
8        0.583021        0.698160    K's
11       0.567049        0.581298    GBT
