In [30]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import pandas_profiling as pp
from sklearn import linear_model
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
import pickle

I pulled the last ten bronze tournaments worth of batting data. This pool of players will serve as the base for the scaling that will take place. Each of the five main batting ratings (babip, gap, power, eye, and strikeouts) will be scaled to the average and standard deviation of this pool of players.

In [9]:
df = pd.read_csv('pt_bz_batter_scaling.csv', index_col='card_id')
df

Unnamed: 0_level_0,card_title,batting_ratings_overall_contact,batting_ratings_overall_gap,batting_ratings_overall_power,batting_ratings_overall_eye,batting_ratings_overall_strikeouts,batting_ratings_overall_babip,batting_ratings_overall_hp,batting_ratings_vsr_contact,batting_ratings_vsr_gap,...,batting_ratings_vsr_strikeouts,batting_ratings_vsr_babip,batting_ratings_vsl_contact,batting_ratings_vsl_gap,batting_ratings_vsl_power,batting_ratings_vsl_eye,batting_ratings_vsl_strikeouts,batting_ratings_vsl_babip,batting_ratings_misc_bunt,batting_ratings_misc_bunt_for_hit
card_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
22991,BUILD-A-LEGEND LF Tim Raines New York Yankees ...,146,160,67,138,142,149,\N,144,168,...,142,146,148,136,69,134,146,135,9,116
21335,Snapshot LF Alan Wiggins SD 1983,128,110,11,130,154,120,\N,128,112,...,156,119,126,104,11,110,146,119,114,176
21555,Snapshot LF George Case WSA 1942,154,114,67,91,156,144,\N,154,118,...,156,143,150,106,57,99,154,136,27,166
26163,MLB 2021 Live 2B David Fletcher LAA 2021,132,138,45,91,162,115,\N,130,136,...,160,114,138,142,49,93,166,118,158,87
21297,Snapshot SS Jose Vizcaino NYM 1995,140,120,23,71,124,151,\N,138,126,...,124,149,144,102,23,69,120,145,97,99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25919,MLB 2021 Live 1B Jose Martinez NYM 2021,128,116,118,106,95,144,\N,114,110,...,91,129,166,134,142,122,108,177,124,65
26299,MLB 2021 Live SS Amed Rosario CLE 2021,124,140,93,43,112,131,\N,118,138,...,108,127,142,148,99,47,122,134,126,104
21555,Snapshot LF George Case WSA 1942,154,114,67,91,156,144,\N,154,118,...,156,143,150,106,57,99,154,136,27,166
26163,MLB 2021 Live 2B David Fletcher LAA 2021,132,138,45,91,162,115,\N,130,136,...,160,114,138,142,49,93,166,118,158,87


There's a total of 4,206 players in this set. The most commonly used players are David Fletcher, Pete Rose, and Tim Raines. Jeff Mathis is only seen once in these ten tournaments. Pour one out for the legend.

In [10]:
df.shape

(4206, 22)

In [11]:
df['card_title'].value_counts()

MLB 2021 Live 2B David Fletcher LAA 2021              183
BUILD-A-LEGEND 3B Pete Rose Cincinnati Reds 1978      174
BUILD-A-LEGEND LF Tim Raines New York Yankees 1997    140
Snapshot LF George Case WSA 1942                      136
Snapshot CF Joe Birmingham CLE 1911                   103
                                                     ... 
MLB 2021 Live C Jeff Mathis ATL 2021                    1
MLB 2021 Live 1B Travis Shaw MIL 2021                   1
MLB 2021 Live C Luis Campusano SD 2021                  1
Snapshot CF Bobby Del Greco KCA 1961                    1
One Hit Wonder 3B Shane Halter DET 2001                 1
Name: card_title, Length: 327, dtype: int64

There's a lot going on in this main cell, but only hav to focus on the first two lines, and the last one (igorning the one that just prints the df for observation). 

The first line instantiates our standard scaler that will be used throughout this process.

The second line fits (this is the only really important part) and transforms the five batting ratings to the normal scale, subtracting the mean for the value, and dividing it by the standard deviation of the dataset.

The next couple of lines just creates a data frame, and adds some labels and the index to the data frame. 

The last line converts the scaled ratings to the 1-200 scale by multiplying it by 33.667 (the standard deviation used by OOTP in this process) and adding 100 (the mean) back to the value.

The results are then printed out. You can see Jose Vizcaino has average gap power in this pool of players, while Amed Rosario has average home run power. 

If I had no interest in observing the data, I would only call the fit method on the standard scaler and skip the rest.

In [18]:
scaler = StandardScaler()


scaled = scaler.fit_transform(df[['batting_ratings_vsr_babip', 'batting_ratings_vsr_gap',
       'batting_ratings_vsr_power', 'batting_ratings_vsr_eye', 'batting_ratings_vsr_strikeouts']])
scaled = pd.DataFrame(scaled, index=df.index)
scaled.columns = ['batting_ratings_vsr_babip', 'batting_ratings_vsr_gap',
       'batting_ratings_vsr_power', 'batting_ratings_vsr_eye', 'batting_ratings_vsr_strikeouts']
scaled['card_title'] = df.loc[:, 'card_title']

scaled[['batting_ratings_vsr_babip', 'batting_ratings_vsr_gap',
       'batting_ratings_vsr_power', 'batting_ratings_vsr_eye', 'batting_ratings_vsr_strikeouts']] = scaled[['batting_ratings_vsr_babip', 'batting_ratings_vsr_gap',
       'batting_ratings_vsr_power', 'batting_ratings_vsr_eye', 'batting_ratings_vsr_strikeouts']] * 33.667 + 100

scaled

Unnamed: 0_level_0,batting_ratings_vsr_babip,batting_ratings_vsr_gap,batting_ratings_vsr_power,batting_ratings_vsr_eye,batting_ratings_vsr_strikeouts,card_title
card_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
22991,138.492852,169.261851,83.782123,148.389441,110.601876,BUILD-A-LEGEND LF Tim Raines New York Yankees ...
21335,106.010888,80.310600,36.959945,143.896618,123.586357,Snapshot LF Alan Wiggins SD 1983
21555,134.883745,89.841092,87.126565,88.859541,123.586357,Snapshot LF George Case WSA 1942
26163,99.995710,118.432565,65.387696,93.352364,127.296209,MLB 2021 Live 2B David Fletcher LAA 2021
21297,142.101959,102.548413,46.993269,70.888251,93.907543,Snapshot SS Jose Vizcaino NYM 1995
...,...,...,...,...,...,...
25919,118.041245,77.133770,119.734868,102.338009,63.301266,MLB 2021 Live 1B Jose Martinez NYM 2021
26299,115.635174,121.609395,103.848771,39.438492,79.068136,MLB 2021 Live SS Amed Rosario CLE 2021
21555,134.883745,89.841092,87.126565,88.859541,123.586357,Snapshot LF George Case WSA 1942
26163,99.995710,118.432565,65.387696,93.352364,127.296209,MLB 2021 Live 2B David Fletcher LAA 2021


With the scaling process completed, I pulled all cards from the DB that are eligible for bronze tournaments, ignoring pitchers, along with the ratings used in the model. There's some placeholders here for gb/fb hitter types that aren't included in the database, but the model themselves use. I think these are superfluous but have just thrown them in in the past and haven't gotten rid of them even if they are very very minor.

In [22]:
roster = pd.read_csv('pt_bz_batter_roster.csv', index_col='card_id')
roster

Unnamed: 0_level_0,card_title,batting_ratings_vsr_babip,batting_ratings_vsr_gap,batting_ratings_vsr_power,batting_ratings_vsr_eye,batting_ratings_vsr_strikeouts,running_ratings_speed,batting_ratings_misc_gb_hitter_type_0,batting_ratings_misc_gb_hitter_type_1,batting_ratings_misc_gb_hitter_type_2,batting_ratings_misc_gb_hitter_type_3,batting_ratings_misc_fb_hitter_type_0,batting_ratings_misc_fb_hitter_type_1,batting_ratings_misc_fb_hitter_type_2,bats_1,bats_2,bats_3
card_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
19836,Unsung Heroes CF Billy Zitzmann CIN 1927,100,108,13,102,99,130,0,0,0,0,0,0,0,1,0,0
19837,Unsung Heroes 3B Eddie Zimmerman BRO 1911,41,79,118,102,134,168,0,0,0,0,0,0,0,1,0,0
19839,Unsung Heroes LF Joe Zdeb KC 1977,107,75,51,85,108,120,0,0,0,0,0,0,0,1,0,0
19846,Unsung Heroes RF Ab Wright CLE 1935,64,124,79,81,85,87,0,0,0,0,0,0,0,1,0,0
19847,Unsung Heroes SS Chuck Wortman CHC 1916,50,67,126,91,114,146,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26841,MLB 2021 Live 2B Taylor Walls TB 2021,94,114,75,102,89,178,0,0,0,0,0,0,0,0,0,1
26843,MLB 2021 Live C Sebastian Rivero KC 2021,57,93,47,49,71,45,0,0,0,0,0,0,0,1,0,0
26845,MLB 2021 Live LF Troy Stokes Jr. PIT 2021,71,99,114,112,55,138,0,0,0,0,0,0,0,1,0,0
26846,MLB 2021 Live SS Nick Gordon MIN 2021,112,124,53,77,97,128,0,0,0,0,0,0,0,0,1,0


This next code block resembles the one from the scaling process before. Instead of calling the fit_transform method, only the transform method is called. This will scale the ratings in this pool to what's actually used. 

In [26]:
rst_scaled = scaler.transform(roster[['batting_ratings_vsr_babip', 'batting_ratings_vsr_gap',
       'batting_ratings_vsr_power', 'batting_ratings_vsr_eye', 'batting_ratings_vsr_strikeouts']])

rst_scaled = pd.DataFrame(rst_scaled, index=roster.index)

rst_scaled.columns = ['batting_ratings_vsr_babip', 'batting_ratings_vsr_gap',
       'batting_ratings_vsr_power', 'batting_ratings_vsr_eye', 'batting_ratings_vsr_strikeouts']

rst_scaled

Unnamed: 0_level_0,batting_ratings_vsr_babip,batting_ratings_vsr_gap,batting_ratings_vsr_power,batting_ratings_vsr_eye,batting_ratings_vsr_strikeouts
card_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
19836,-0.500395,-0.773549,-1.822789,0.169532,-0.869666
19837,-2.608664,-2.141774,0.784856,0.169532,0.094519
19839,-0.250261,-2.330495,-0.879070,-0.397626,-0.621732
19846,-1.786796,-0.018666,-0.183698,-0.531075,-1.255339
19847,-2.287064,-2.707936,0.983534,-0.197453,-0.456444
...,...,...,...,...,...
26841,-0.714795,-0.490467,-0.283037,0.169532,-1.145147
26843,-2.036930,-1.481251,-0.978409,-1.598666,-1.641013
26845,-1.536663,-1.198170,0.685517,0.503154,-2.081783
26846,-0.071594,-0.018666,-0.829400,-0.664524,-0.924762


This block just converts the scaled rating back to the 1-200 scale and adds in the card title. Eddie Zimmerman there has an average strikeout rating if he was used in bronze tournaments.

In [27]:
rst_scaled[['batting_ratings_vsr_babip', 'batting_ratings_vsr_gap',
       'batting_ratings_vsr_power', 'batting_ratings_vsr_eye', 'batting_ratings_vsr_strikeouts']] = rst_scaled[['batting_ratings_vsr_babip', 'batting_ratings_vsr_gap',
       'batting_ratings_vsr_power', 'batting_ratings_vsr_eye', 'batting_ratings_vsr_strikeouts']] * 33.667 + 100

rst_scaled['card_title'] = roster.loc[:, 'card_title']

rst_scaled

Unnamed: 0_level_0,batting_ratings_vsr_babip,batting_ratings_vsr_gap,batting_ratings_vsr_power,batting_ratings_vsr_eye,batting_ratings_vsr_strikeouts,card_title
card_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
19836,83.153210,73.956940,38.632165,105.707626,70.720970,Unsung Heroes CF Billy Zitzmann CIN 1927
19837,12.174104,27.892899,126.423750,105.707626,103.182173,Unsung Heroes 3B Eddie Zimmerman BRO 1911
19839,91.574460,21.539238,70.404358,86.613130,79.068136,Unsung Heroes LF Joe Zdeb KC 1977
19846,39.843925,99.371583,93.815447,82.120307,57.736488,Unsung Heroes RF Ab Wright CLE 1935
19847,23.001425,8.831917,133.112633,93.352364,84.632914,Unsung Heroes SS Chuck Wortman CHC 1916
...,...,...,...,...,...,...
26841,75.934996,83.487431,90.471006,105.707626,61.446340,MLB 2021 Live 2B Taylor Walls TB 2021
26843,31.422675,50.130712,67.059917,46.177726,44.752007,MLB 2021 Live C Sebastian Rivero KC 2021
26845,48.265175,59.661203,123.079309,116.939682,29.912600,MLB 2021 Live LF Troy Stokes Jr. PIT 2021
26846,97.589638,99.371583,72.076579,77.627484,68.866044,MLB 2021 Live SS Nick Gordon MIN 2021


Just adding in the other columns that were not scaled. Against, gb/fb hitter types are all zero but the models need them. 

In [29]:
cols = [ 'running_ratings_speed',
       'batting_ratings_misc_gb_hitter_type_0',
       'batting_ratings_misc_gb_hitter_type_1',
       'batting_ratings_misc_gb_hitter_type_2',
       'batting_ratings_misc_gb_hitter_type_3',
       'batting_ratings_misc_fb_hitter_type_0',
       'batting_ratings_misc_fb_hitter_type_1',
       'batting_ratings_misc_fb_hitter_type_2', 'bats_1', 'bats_2', 'bats_3']

for col in cols:
    rst_scaled[col] = roster.loc[:, col]
    
rst_scaled

Unnamed: 0_level_0,batting_ratings_vsr_babip,batting_ratings_vsr_gap,batting_ratings_vsr_power,batting_ratings_vsr_eye,batting_ratings_vsr_strikeouts,card_title,running_ratings_speed,batting_ratings_misc_gb_hitter_type_0,batting_ratings_misc_gb_hitter_type_1,batting_ratings_misc_gb_hitter_type_2,batting_ratings_misc_gb_hitter_type_3,batting_ratings_misc_fb_hitter_type_0,batting_ratings_misc_fb_hitter_type_1,batting_ratings_misc_fb_hitter_type_2,bats_1,bats_2,bats_3
card_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
19836,83.153210,73.956940,38.632165,105.707626,70.720970,Unsung Heroes CF Billy Zitzmann CIN 1927,130,0,0,0,0,0,0,0,1,0,0
19837,12.174104,27.892899,126.423750,105.707626,103.182173,Unsung Heroes 3B Eddie Zimmerman BRO 1911,168,0,0,0,0,0,0,0,1,0,0
19839,91.574460,21.539238,70.404358,86.613130,79.068136,Unsung Heroes LF Joe Zdeb KC 1977,120,0,0,0,0,0,0,0,1,0,0
19846,39.843925,99.371583,93.815447,82.120307,57.736488,Unsung Heroes RF Ab Wright CLE 1935,87,0,0,0,0,0,0,0,1,0,0
19847,23.001425,8.831917,133.112633,93.352364,84.632914,Unsung Heroes SS Chuck Wortman CHC 1916,146,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26841,75.934996,83.487431,90.471006,105.707626,61.446340,MLB 2021 Live 2B Taylor Walls TB 2021,178,0,0,0,0,0,0,0,0,0,1
26843,31.422675,50.130712,67.059917,46.177726,44.752007,MLB 2021 Live C Sebastian Rivero KC 2021,45,0,0,0,0,0,0,0,1,0,0
26845,48.265175,59.661203,123.079309,116.939682,29.912600,MLB 2021 Live LF Troy Stokes Jr. PIT 2021,138,0,0,0,0,0,0,0,1,0,0
26846,97.589638,99.371583,72.076579,77.627484,68.866044,MLB 2021 Live SS Nick Gordon MIN 2021,128,0,0,0,0,0,0,0,0,1,0


These next three blocks all go together. I'm just loading the high and low walk rate models built from the base game. High/Low designation is set at 100 based on the rating that has the highest correlation with the stat being modeled. Because I'm lazy, I throw all the variables I'm using at every model and just let Ridge minimize the ones that aren't meaningful.

The results of the walk rate model are shown in that third block.

In [34]:
directory = 'd:/ootp/batting/models/'
low_bb_file = directory + 'bb_vsr_low_rate'
high_bb_file = directory + 'bb_vsr_high_rate'

low_bb = pickle.load(open(low_bb_file, 'rb'))
high_bb = pickle.load(open(high_bb_file, 'rb'))

variables = ['batting_ratings_vsr_babip', 'batting_ratings_vsr_gap',
       'batting_ratings_vsr_power', 'batting_ratings_vsr_eye',
       'batting_ratings_vsr_strikeouts', 'running_ratings_speed',
       'batting_ratings_misc_gb_hitter_type_0',
       'batting_ratings_misc_gb_hitter_type_1',
       'batting_ratings_misc_gb_hitter_type_2',
       'batting_ratings_misc_gb_hitter_type_3',
       'batting_ratings_misc_fb_hitter_type_0',
       'batting_ratings_misc_fb_hitter_type_1',
       'batting_ratings_misc_fb_hitter_type_2', 'bats_1', 'bats_2', 'bats_3']

In [35]:
rating = 'batting_ratings_vsr_eye'
rst_scaled['bb_pred'] = 0
rst_scaled.loc[rst_scaled[rating] < 100, 'bb_pred'] = low_bb.predict(rst_scaled.loc[rst_scaled[rating] < 100, variables])
rst_scaled.loc[rst_scaled[rating] >= 100, 'bb_pred'] = high_bb.predict(rst_scaled.loc[rst_scaled[rating] >= 100, variables])

In [37]:
rst_scaled[['card_title', 'bb_pred']].sort_values(by='bb_pred', ascending=False)

Unnamed: 0_level_0,card_title,bb_pred
card_id,Unnamed: 1_level_1,Unnamed: 2_level_1
22691,Snapshot 1B Lu Blue DET 1926,0.161931
22718,Snapshot LF Bob Bescher CIN 1913,0.155175
26235,MLB 2021 Live 1B Carlos Santana KC 2021,0.150900
26425,MLB 2021 Live CF Aaron Hicks NYY 2021,0.147596
20705,Snapshot LF Spike Shannon NYG 1907,0.147148
...,...,...
26628,MLB 2021 Live 2B Hanser Alberto KC 2021,0.025413
21850,Snapshot RF Mark Quinn KC 2001,0.025173
22758,Snapshot C John Bateman HOU 1968,0.023684
22091,Snapshot RF Tom Long STL 1916,0.023684


Rather than repeat that process for the other five model types, these next couple of code blocks setup the loops to do them automatically.

In [39]:
rst_scaled.drop('bb_pred',axis=1, inplace=True)

def batting_models():
    directory = "d:/ootp/batting/models/"
    targets = ["bb", "k", "hr", "s", "d", "t"]
    types = ["low", "high"]
    hands = ["vsr", "vsl"]

    models = {}
    for hand in hands:
        models[hand] = {}
        for target in targets:
            for type in types:
                file = directory + "{}_{}_{}_rate".format(target, hand, type)
                model = pickle.load(open(file, "rb"))
                models[hand]["{}_{}".format(target, type)] = model

    return models


models = batting_models()
targets = ["bb", "k", "hr", "s", "d", "t"]
ratings = ["eye", "strikeouts", "power", "babip", "gap", "running_ratings_speed"]

In [41]:
for hand in models.keys():
    if hand != 'vsl':
        for target in targets:
            rate = target + '_' + hand
            if target != 't':
                rating = 'batting_ratings_{}_{}'.format(hand, ratings[targets.index(target)])
            else:
                rating = 'running_ratings_speed'
            
            rst_scaled.loc[rst_scaled.loc[rst_scaled[rating] < 100].index, rate] = models[
                hand
            ]["{}_low".format(target)].predict(
                rst_scaled.loc[rst_scaled[rating] < 100, variables]
            )

            rst_scaled.loc[rst_scaled.loc[rst_scaled[rating] >= 100].index, rate] = models[
                hand
            ]["{}_high".format(target)].predict(
                rst_scaled.loc[rst_scaled[rating] >= 100, variables]
            )

This block is just multiplying the percentages by 600 PA. Singles, Doubles, and Triples were modeled on the basis that Three True Outcomes are determined as a pool first, and then BIP events are determined. 

In [63]:
rst_scaled['bb'] = np.round(rst_scaled['bb_vsr'] * 600, 0)
rst_scaled['k'] = np.round(rst_scaled['k_vsr'] * 600, 0)
rst_scaled['hr'] = np.round(rst_scaled['hr_vsr'] * 600, 0)
rst_scaled['tto'] = rst_scaled['bb'] + rst_scaled['k'] + rst_scaled['hr']
rst_scaled['s'] = np.round((600 - rst_scaled['tto']) * rst_scaled['s_vsr'], 0)
rst_scaled['d'] = np.round((600 - rst_scaled['tto']) * rst_scaled['d_vsr'], 0)
rst_scaled['t'] = np.round((600 - rst_scaled['tto']) * rst_scaled['t_vsr'], 0)

rst_scaled['avg'] = (rst_scaled['s'] + 
                     rst_scaled['d'] + 
                     rst_scaled['t'] + 
                     rst_scaled['hr']) / (600 - rst_scaled['bb'])

rst_scaled['obp'] = (rst_scaled['s'] + 
                     rst_scaled['d'] + 
                     rst_scaled['t'] + 
                     rst_scaled['hr'] + 
                     rst_scaled['bb']) / 600

rst_scaled['slg'] = (rst_scaled['s'] + 
                     rst_scaled['d'] * 2 + 
                     rst_scaled['t'] * 3 + 
                     rst_scaled['hr'] * 4) / (600 - rst_scaled['bb'])

rst_scaled['woba'] = (rst_scaled['bb'] * .714 + 
                      rst_scaled['s'] * .918 + 
                      rst_scaled['d'] * 1.314 + 
                      rst_scaled['t'] * 1.669 + 
                      rst_scaled['hr'] * 2.166) / 600

The woba coefficients used in the last block come from tournament data since the live reset. The top 50 batters vs RHP based on this proces are shown below.

In [65]:
rst_scaled[['card_title', 'woba']].sort_values(by='woba', ascending=False).head(n=50)

Unnamed: 0_level_0,card_title,woba
card_id,Unnamed: 1_level_1,Unnamed: 2_level_1
25925,MLB 2021 Live 1B Dominic Smith NYM 2021,0.362222
22991,BUILD-A-LEGEND LF Tim Raines New York Yankees ...,0.360583
23395,BUILD-A-LEGEND RF Harold Baines Chicago White ...,0.358522
22968,BUILD-A-LEGEND 3B Pete Rose Cincinnati Reds 1978,0.354037
26191,MLB 2021 Live 2B Luis Arraez MIN 2021,0.353512
21555,Snapshot LF George Case WSA 1942,0.342717
24954,One Hit Wonder 1B Chicken Hawks PHI 1925,0.342572
25894,MLB 2021 Live 1B Alec Bohm PHI 2021,0.340232
25653,MLB 2021 Live 1B Pavin Smith ARI 2021,0.340093
25974,MLB 2021 Live LF Corey Dickerson MIA 2021,0.340043


In [66]:
rst_scaled.to_csv('bz_batters_scaled.csv')