# M01. Impute Inputs
- Normalizes model inputs
- Normalizes Steamer projections
- Uses Steamer projections to impute model inputs

In [1]:
%run "U1. Imports.ipynb"
%run "U2. Utilities.ipynb"
%run "U3. Classes.ipynb"

baseball_path = r'C:\Users\james\Documents\MLB\Database'

db_path = r'C:\Users\james\Documents\MLB\Database\MLBDB.db'
engine = create_engine(f'sqlite:///{db_path}')

In [2]:
%run "A03. Steamer.ipynb"

### Batters

##### Dataset

In [3]:
# Choose the last instance of each player in each game, assuming they have enough PAs
sql_query = f'''
WITH ranked_data AS (
  SELECT *,
         ROW_NUMBER() OVER (PARTITION BY gamePk, batter ORDER BY atBatIndex DESC) AS rn
  FROM "Dataset"
)
SELECT *
FROM ranked_data
WHERE pa_b_long >= 100 AND pa_b_long <= 300 AND rn = 1
'''

hitters_df = pd.read_sql_query(sql_query, con=engine)

In [4]:
# hitters_df = pa_share(hitters_df)

In [5]:
# Standardize the data using StandardScaler
batter_stats_scaler = StandardScaler()
hitters_df[batter_inputs] = batter_stats_scaler.fit_transform(hitters_df[batter_inputs])

# Save the trained StandardScaler object
with open(os.path.join(model_path, "batter_stats_scaler_20231027.pkl"), "wb") as file:
    pickle.dump(batter_stats_scaler, file)

##### Steamer

In [6]:
# Choose the last instance of each player in each game, assuming they have enough PAs
sql_query = f'''
  SELECT *
  FROM "Steamer Hitters"
  WHERE "PA" >= 40
'''

steamer_hitters_df = pd.read_sql_query(sql_query, con=engine)

In [7]:
# Clean
steamer_hitters_df2 = clean_steamer_hitters(steamer_hitters_df)

In [8]:
# Standardize the data using StandardScaler
batter_stats_fg_scaler = StandardScaler()
steamer_hitters_df2[batter_stats_fg] = batter_stats_fg_scaler.fit_transform(steamer_hitters_df2[batter_stats_fg])

# Print the mean and standard deviation for each feature
for feature, mean, std_dev in zip(batter_stats_fg, batter_stats_fg_scaler.mean_, batter_stats_fg_scaler.scale_):
    print(f"{feature}: Mean={mean}, Standard Deviation={std_dev}")

# Save the trained StandardScaler object
with open(os.path.join(model_path, "batter_stats_fg_scaler_20231027.pkl"), "wb") as file:
    pickle.dump(batter_stats_fg_scaler, file)

b1_rate: Mean=0.14560680719848218, Standard Deviation=0.02101432527684469
b2_rate: Mean=0.04433930835555082, Standard Deviation=0.005882444424108711
b3_rate: Mean=0.004242885478589471, Standard Deviation=0.002659578223036206
hr_rate: Mean=0.02954431229904, Standard Deviation=0.01214527563444318
bb_rate: Mean=0.0831329717618007, Standard Deviation=0.024649980286147732
hbp_rate: Mean=0.010111658728719164, Standard Deviation=0.00457293236270856
so_rate: Mean=0.21552653853839623, Standard Deviation=0.053391589354464745
woba: Mean=0.3159756808799684, Standard Deviation=0.026467956799010314
slg: Mean=0.4089152701148167, Standard Deviation=0.04933823761529574
obp: Mean=0.3185978956899952, Standard Deviation=0.025322359384553246


##### Merge

In [9]:
# Create column steamer_date column in hitters_df equal to the highest number <= a number in this list of uniques
steamer_dates = list(steamer_hitters_df2['date'].unique())

# Define a function to find the largest number in "steamer_dates" less than or equal to a given "date"
def find_steamer_date(date):
    max_steamer_date = max(filter(lambda d: d <= date, steamer_dates), default=None)
    return max_steamer_date

# Apply the function to create the "steamer_date" column in your DataFrame
hitters_df["steamer_date"] = hitters_df["date"].apply(find_steamer_date)

In [10]:
# Steamer stats we want to keep
batter_stats_fg_plus = ['mlbamid', 'steamerid', 'date'] + batter_stats_fg 
# Merge
hitters_merged_df = pd.merge(hitters_df, steamer_hitters_df2[batter_stats_fg_plus], left_on=['batter', 'steamer_date'], right_on=['mlbamid', 'date'], how='inner')
# Only keep one observation per player per game 
# Consider only keeping one observation per player per week/Steamer weekly projection
hitters_merged_df.drop_duplicates(['gamePk', 'batter'], inplace=True, keep='last')
# Only keep those without missing data
hitters_merged_df = hitters_merged_df.dropna(subset=batter_inputs)

##### Impute

In [11]:
# Add hands to use in imputation
batter_stats_fg_imp = batter_stats_fg + ['b_L', 'p_L']

# Separate the features and target columns
features = hitters_merged_df[batter_stats_fg_imp]
target = hitters_merged_df[batter_inputs]

# Create and fit the model
batter_imputations_model = keras.Sequential([
    keras.layers.Dense(30, activation='relu', input_shape=(len(batter_stats_fg_imp),)),
    keras.layers.Dense(30, activation='relu'),
    keras.layers.Dense(30, activation='relu'),
    keras.layers.Dense(len(batter_inputs))  
    ])

# Compile the model
batter_imputations_model.compile(loss='mean_squared_error', optimizer='adam')

# Train the model
batter_imputations_model.fit(features, target, epochs=10, batch_size=25)

# Pickle
with open(os.path.join(model_path, "batter_imputations_model_20231210.pkl"), "wb") as file:
    pickle.dump(batter_imputations_model, file)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
# Use the trained model to make predictions
# hitters_merged_df[batter_inputs] = batter_imputations_model.predict(hitters_merged_df[batter_stats_fg_imp])

In [13]:
hitters_merged_df[batter_inputs].describe()

Unnamed: 0,b1_b,b2_b,b3_b,hr_b,bb_b,hbp_b,so_b,fo_b,go_b,lo_b,po_b,estimated_woba_using_speedangle_b,to_left_b,to_middle_b,to_right_b,hard_hit_b,barrel_b,iso_b,slg_b,obp_b,woba_b,totalDistance_b,launchSpeed_b,b1_b_long,b2_b_long,b3_b_long,hr_b_long,bb_b_long,hbp_b_long,so_b_long,fo_b_long,go_b_long,lo_b_long,po_b_long,estimated_woba_using_speedangle_b_long,to_left_b_long,to_middle_b_long,to_right_b_long,hard_hit_b_long,barrel_b_long,iso_b_long,slg_b_long,obp_b_long,woba_b_long,totalDistance_b_long,launchSpeed_b_long
count,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0,310711.0
mean,0.020312,0.018635,0.000574,0.024509,0.01024,-0.000734,-0.037948,0.013189,0.00274,0.004601,-0.00122,0.046985,0.009891,0.015635,0.004461,0.043438,0.030427,0.028794,0.037768,0.038677,0.042021,0.026513,0.033896,0.026219,0.028652,0.004311,0.033994,0.019326,0.002962,-0.03968,0.020859,-0.004601,0.008604,-0.000353,0.062232,0.011675,0.020382,0.002894,0.053485,0.037958,0.040646,-0.001902,0.060318,0.064163,0.038509,0.036631
std,1.002434,1.004055,0.997414,1.008102,1.002338,0.997072,0.979668,0.998141,1.000224,1.002368,1.000437,0.991917,1.001255,0.997731,1.00464,0.989798,1.006219,1.004221,0.998234,0.987912,0.990574,0.983841,0.99106,0.996842,0.992476,0.995019,0.997402,1.002251,0.98884,0.975497,0.985972,0.995617,0.997965,0.995981,0.969594,1.000176,0.992768,1.00325,0.975194,0.999739,0.990567,0.980681,0.958311,0.958986,0.976515,0.991614
min,-2.554899,-1.472767,-0.392557,-1.104615,-1.681716,-0.650781,-2.55833,-2.325858,-3.069959,-1.656526,-1.357603,-3.37894,-2.465852,-3.374716,-2.467148,-3.085095,-1.249199,-1.588595,-2.753916,-3.529497,-3.241559,-5.923257,-4.602418,-3.227818,-3.056802,-0.790304,-1.813835,-2.484943,-1.073749,-3.334703,-3.248408,-3.387857,-3.123122,-2.168346,-3.763509,-2.863332,-4.201375,-2.632602,-3.747417,-1.796799,-2.687501,-1.629845,-4.299676,-4.051912,-5.475495,-3.771441
25%,-0.736281,-0.791864,-0.392557,-0.569994,-0.845133,-0.650781,-0.728683,-0.801976,-0.628113,-0.534964,-0.78875,-0.648113,-0.70632,-0.627147,-0.813828,-0.598384,-0.72321,-0.731945,-0.677758,-0.648878,-0.651065,-0.597927,-0.622128,-0.67161,-0.662284,-0.790304,-0.705229,-0.699828,-0.735294,-0.740304,-0.652366,-0.68267,-0.650227,-0.742042,-0.599356,-0.797652,-0.635763,-0.800491,-0.598981,-0.695936,-0.665475,-0.56072,-0.590748,-0.586882,-0.590575,-0.613902
50%,-0.047741,-0.130835,-0.392557,-0.273698,-0.095304,-0.650781,-0.093022,-0.040034,-0.08548,0.025816,-0.219898,-0.016738,0.047765,-0.077633,-0.105263,-0.101042,-0.197222,-0.121793,-0.063157,0.005411,-0.01622,0.098194,0.036481,-0.00604,-0.02309,-0.269589,-0.059572,-0.090118,-0.205915,-0.07183,-0.015156,-0.072295,-0.12032,-0.023607,0.0354,-0.018775,0.007202,-0.086731,0.067172,-0.017988,-0.030575,-0.24571,0.026891,0.023353,0.067011,0.036179
75%,0.66915,0.588552,-0.392557,0.557218,0.655223,0.560366,0.601845,0.721907,0.728468,0.586597,0.348955,0.664741,0.80185,0.746638,0.603303,0.644972,0.854756,0.624562,0.637436,0.688884,0.670724,0.689898,0.69509,0.689654,0.652992,0.405458,0.665435,0.615746,0.383714,0.599916,0.622055,0.646911,0.586221,0.589175,0.692158,0.760101,0.654627,0.777294,0.70267,0.74436,0.679942,0.183889,0.677477,0.673699,0.677626,0.68626
max,5.166924,5.898309,15.788805,9.343356,7.893588,11.460688,4.696036,5.293555,5.069528,6.194404,6.037481,6.640207,4.823636,5.417506,5.090885,4.623709,6.640633,8.071549,7.167019,5.035028,5.629337,3.613608,4.245852,5.930685,5.618585,10.67273,6.331733,6.487123,11.787553,6.007598,4.603257,4.410648,5.456311,4.837552,4.469427,4.208332,5.663658,3.483726,3.788063,5.191388,5.238936,10.907341,5.160044,4.924573,3.58979,3.843799


### Pitchers

##### Dataset

In [14]:
# Select dataset
# Choose the last instance of each player in each game, assuming they have enough PAs
sql_query = f'''
WITH ranked_data AS (
  SELECT *,
         ROW_NUMBER() OVER (PARTITION BY gamePk, pitcher ORDER BY atBatIndex DESC) AS rn
  FROM "Dataset"
)
SELECT *
FROM ranked_data
WHERE pa_p_long >= 50 AND pa_p_long <= 300 AND rn = 1
'''

pitchers_df = pd.read_sql_query(sql_query, con=engine)

In [15]:
# pitchers_df = pa_share(pitchers_df)

In [16]:
# Standardize the data using StandardScaler
pitcher_stats_scaler = StandardScaler()
pitchers_df[pitcher_inputs] = pitcher_stats_scaler.fit_transform(pitchers_df[pitcher_inputs])

# Save the trained StandardScaler object
with open(os.path.join(model_path, "pitcher_stats_scaler_20231027.pkl"), "wb") as file:
    pickle.dump(pitcher_stats_scaler, file)

##### Steamer

In [17]:
# Choose the last instance of each player in each game, assuming they have enough PAs
sql_query = f'''
  SELECT *
  FROM "Steamer Pitchers"
  WHERE "PA" >= 40
'''

steamer_pitchers_df = pd.read_sql_query(sql_query, con=engine)

In [18]:
# Clean
steamer_pitchers_df2 = clean_steamer_pitchers(steamer_pitchers_df)

In [19]:
steamer_pitchers_df2.dropna(subset=pitcher_stats_fg, inplace=True)

In [20]:
steamer_pitchers_df2.head()

Unnamed: 0,date,firstname,lastname,mlbamid,steamerid,H9,HR9,K9,BB9,GBrate,FBrate,LDrate,SIERA,reliability,IP_start
0,20140617,Craig,Kimbrel,518886.0,6655,5.589724,0.4786,13.6858,2.7747,0.455,0.335,0.210571,1.92,0.58064,0.0
1,20140617,Aroldis,Chapman,547973.0,10233,5.004519,0.6532,15.0427,3.4555,0.41,0.388,0.202224,1.85,0.55961,0.0
2,20140617,Koji,Uehara,493157.0,9227,6.925841,0.9029,10.8841,1.5271,0.355,0.433,0.21286,2.34,0.55498,0.0
3,20140617,Kenley,Jansen,445276.0,3096,6.112634,0.7299,12.4814,2.7269,0.391,0.409,0.199716,2.27,0.59125,0.0
4,20140617,Greg,Holland,518813.0,7196,6.376248,0.5989,11.9605,2.8349,0.437,0.358,0.205506,2.41,0.58857,0.0


In [21]:
# Standardize the data using StandardScaler
pitcher_stats_fg_scaler = StandardScaler()
steamer_pitchers_df2[pitcher_stats_fg] = pitcher_stats_fg_scaler.fit_transform(steamer_pitchers_df2[pitcher_stats_fg])



# Print the mean and standard deviation for each feature
for feature, mean, std_dev in zip(pitcher_stats_fg, pitcher_stats_fg_scaler.mean_, pitcher_stats_fg_scaler.scale_):
    print(f"{feature}: Mean={mean}, Standard Deviation={std_dev}")

# Save the trained StandardScaler object
with open(os.path.join(model_path, "pitcher_stats_fg_scaler_20231027.pkl"), "wb") as file:
    pickle.dump(pitcher_stats_fg_scaler, file)

H9: Mean=9.53007780379424, Standard Deviation=0.8384434226772676
HR9: Mean=1.3459117962282532, Standard Deviation=0.25895465843158266
K9: Mean=6.582873102164268, Standard Deviation=1.5810154251855848
BB9: Mean=4.96198949327622, Standard Deviation=1.7885069322073819
GBrate: Mean=0.4348809790797142, Standard Deviation=0.03576685285883674
FBrate: Mean=0.3600437092186263, Standard Deviation=0.03595874009647045
LDrate: Mean=0.205069528956993, Standard Deviation=0.003925116647791453
SIERA: Mean=4.970680824764909, Standard Deviation=0.9931263329927901


##### Merge

In [22]:
# Create column steamer_date column in hitters_df equal to the highest number <= a number in this list of uniques
steamer_dates = list(steamer_pitchers_df2['date'].unique())

# Define a function to find the largest number in "steamer_dates" less than or equal to a given "date"
def find_steamer_date(date):
    max_steamer_date = max(filter(lambda d: d <= date, steamer_dates), default=None)
    return max_steamer_date

# Apply the function to create the "steamer_date" column in your DataFrame
pitchers_df["steamer_date"] = pitchers_df["date"].apply(find_steamer_date)

In [23]:
# Steamer stats we want to keep
pitcher_stats_fg_plus = ['mlbamid', 'steamerid', 'date'] + pitcher_stats_fg2 
# Merge
pitchers_merged_df = pd.merge(pitchers_df, steamer_pitchers_df2[pitcher_stats_fg_plus], left_on=['pitcher', 'steamer_date'], right_on=['mlbamid', 'date'], how='inner')
# Only keep one observation per player per game 
# Consider only keeping one observation per player per week/Steamer weekly projection
pitchers_merged_df.drop_duplicates(['gamePk', 'pitcher'], inplace=True, keep='last')
# Only keep those without missing data
pitchers_merged_df = pitchers_merged_df.dropna(subset=pitcher_inputs).dropna(subset=pitcher_stats_fg2)

##### Impute

In [24]:
# Add hands to use in imputation
pitcher_stats_fg_imp = pitcher_stats_fg + ['b_L', 'p_L']

# Separate the features and target columns
features = pitchers_merged_df[pitcher_stats_fg_imp]
target = pitchers_merged_df[pitcher_inputs]

# Create and fit the model
pitcher_imputations_model = keras.Sequential([
    keras.layers.Dense(30, activation='relu', input_shape=(len(pitcher_stats_fg_imp),)),
    keras.layers.Dense(30, activation='relu'),
    keras.layers.Dense(30, activation='relu'),
    keras.layers.Dense(len(pitcher_inputs))  
    ])

# Compile the model
pitcher_imputations_model.compile(loss='mean_squared_error', optimizer='adam')

# Train the model
pitcher_imputations_model.fit(features, target, epochs=10, batch_size=35)

# Pickle
with open(os.path.join(model_path, "pitcher_imputations_model_20231210.pkl"), "wb") as file:
    pickle.dump(pitcher_imputations_model, file)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [25]:
pitchers_merged_df[pitcher_inputs].describe()

Unnamed: 0,b1_p,b2_p,b3_p,hr_p,bb_p,hbp_p,so_p,fo_p,go_p,lo_p,po_p,estimated_woba_using_speedangle_p,to_left_p,to_middle_p,to_right_p,hard_hit_p,barrel_p,iso_p,slg_p,obp_p,woba_p,maxSpeed_p,maxSpin_p,b1_p_long,b2_p_long,b3_p_long,hr_p_long,bb_p_long,hbp_p_long,so_p_long,fo_p_long,go_p_long,lo_p_long,po_p_long,estimated_woba_using_speedangle_p_long,to_left_p_long,to_middle_p_long,to_right_p_long,hard_hit_p_long,barrel_p_long,iso_p_long,slg_p_long,obp_p_long,woba_p_long,maxSpeed_p_long,maxSpin_p_long
count,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0
mean,-0.0006,0.000439,-0.000666,0.000345,0.001022,0.000666,-7.5e-05,0.000614,-0.000791,-0.000214,0.000133,0.000833,0.001085,0.001269,-0.000776,0.001129,0.000951,0.000295,0.000174,0.000578,0.000453,0.000158,0.000962,-0.000945,0.000312,-0.001175,0.001456,0.001598,0.000938,0.000682,0.000306,-0.001391,-0.000789,0.000138,0.001265,0.000579,0.000676,-0.001322,0.001565,0.002435,0.001088,-0.001627,0.001126,0.001144,0.000628,0.001328
std,0.999869,0.999969,0.998467,1.000381,1.000018,1.000447,0.99998,1.000095,0.999724,1.000198,0.999817,1.000386,1.000547,0.999687,0.999847,1.00015,1.000562,1.000308,1.000235,0.999756,0.999968,0.999838,1.000074,0.999352,0.999669,0.996226,0.999827,0.999738,0.999839,0.999838,0.999486,0.999318,0.999656,0.999285,1.000029,1.000312,0.999721,0.9998,0.999824,0.999796,0.999932,0.999623,0.999108,0.999354,0.999797,0.999959
min,-2.624412,-1.424374,-0.394147,-1.139733,-1.817757,-0.677572,-2.83931,-2.167843,-2.701669,-1.649105,-1.369442,-3.426896,-2.526544,-3.345341,-2.536972,-3.165391,-1.376099,-1.604796,-2.744308,-3.998604,-3.653756,-5.213623,-3.302127,-4.295097,-2.526817,-0.720087,-1.930334,-2.628545,-1.095956,-3.525424,-3.204459,-3.066239,-2.974225,-2.082371,-5.020548,-3.060258,-5.324005,-3.343079,-4.087612,-2.192399,-2.659537,-0.96826,-4.151203,-4.126124,-5.173512,-2.86041
25%,-0.733315,-0.739246,-0.394147,-1.139733,-0.618762,-0.677572,-0.706639,-0.676494,-0.758934,-0.465317,-0.784375,-0.698389,-0.655767,-0.810778,-0.778438,-0.714691,-0.736246,-0.725344,-0.716751,-0.690261,-0.700418,-0.575551,-0.714432,-0.678385,-0.672277,-0.720087,-0.693895,-0.700784,-0.754304,-0.688994,-0.70241,-0.714435,-0.677186,-0.735605,-0.644744,-0.769463,-0.61878,-0.742874,-0.669442,-0.719041,-0.685627,-0.558727,-0.652198,-0.65421,-0.579655,-0.776967
50%,-0.026801,-0.067684,-0.394147,-0.294016,-0.12613,-0.677572,-0.061682,0.069181,-0.030408,0.126577,-0.199309,-0.05075,-0.12126,0.034076,-0.02478,-0.170091,-0.096392,-0.129629,-0.08341,-0.021397,-0.041093,0.025062,-0.146139,-0.010679,-0.069648,-0.262916,-0.083874,-0.095911,-0.096,-0.058096,-0.05373,-0.084055,-0.029303,-0.137043,0.00122,-0.100483,-0.00903,-0.181735,-0.013586,-0.053926,-0.057927,-0.398696,-0.013811,-0.015423,0.023917,-0.119525
75%,0.676311,0.632819,-0.394147,0.524854,0.666728,0.56711,0.622979,0.814855,0.698117,0.718471,0.385757,0.640362,0.680501,0.597312,0.728877,0.646809,0.543462,0.598489,0.619307,0.670122,0.647862,0.659044,0.590139,0.632414,0.595928,0.404554,0.590777,0.596196,0.530956,0.607213,0.663443,0.601697,0.559681,0.61116,0.642068,0.728254,0.630018,0.695538,0.642271,0.658899,0.608985,0.115108,0.626973,0.621095,0.661021,0.670639
max,4.941279,6.556646,12.732234,6.979318,6.447949,8.035199,5.165819,7.153087,5.069272,6.045516,5.651355,5.351837,5.223815,4.821583,4.999602,4.731308,6.302145,6.367717,5.862079,4.666186,5.126839,32.85861,3.592438,6.488512,7.289901,20.824772,7.734408,5.976965,8.95262,5.496037,5.712697,4.328607,7.627493,8.480494,4.972244,4.879217,5.828636,5.623146,6.121195,8.290314,6.872013,9.510724,6.161358,5.702289,32.717421,2.565801


In [26]:
# Use the trained model to make predictions
pitchers_merged_df[pitcher_inputs] = pitcher_imputations_model.predict(pitchers_merged_df[pitcher_stats_fg_imp])



In [27]:
pitchers_merged_df[pitcher_inputs].describe()

Unnamed: 0,b1_p,b2_p,b3_p,hr_p,bb_p,hbp_p,so_p,fo_p,go_p,lo_p,po_p,estimated_woba_using_speedangle_p,to_left_p,to_middle_p,to_right_p,hard_hit_p,barrel_p,iso_p,slg_p,obp_p,woba_p,maxSpeed_p,maxSpin_p,b1_p_long,b2_p_long,b3_p_long,hr_p_long,bb_p_long,hbp_p_long,so_p_long,fo_p_long,go_p_long,lo_p_long,po_p_long,estimated_woba_using_speedangle_p_long,to_left_p_long,to_middle_p_long,to_right_p_long,hard_hit_p_long,barrel_p_long,iso_p_long,slg_p_long,obp_p_long,woba_p_long,maxSpeed_p_long,maxSpin_p_long
count,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0,155048.0
mean,0.03586,0.008529,-0.025021,0.00093,0.051829,0.044572,-0.063906,0.018506,-0.000407,-0.011951,-0.0368,0.021337,0.041118,0.054185,-0.039524,0.043884,-0.004166,-0.002886,0.021972,0.068052,0.045206,0.075469,0.033334,-0.006248,-0.039485,-0.011099,-0.043604,0.042391,0.022002,0.002979,-0.001235,0.002906,-0.01572,0.002207,-0.03555,0.024768,0.026234,-0.044283,0.005216,-0.049248,-0.053663,0.072333,0.00012,-0.026792,0.072592,0.018149
std,0.364234,0.221786,0.119741,0.315875,0.524707,0.186364,0.664518,0.478328,0.640238,0.192085,0.408308,0.482873,0.585744,0.396226,0.595832,0.42302,0.334672,0.357,0.364962,0.36948,0.357749,0.648509,0.273007,0.570061,0.36058,0.195063,0.51757,0.741647,0.299193,0.827061,0.701858,0.82663,0.341632,0.618505,0.685495,0.755706,0.609348,0.740372,0.588612,0.547714,0.561956,0.342828,0.535835,0.521241,0.63009,0.276227
min,-1.436712,-0.885077,-0.663767,-1.037752,-1.61266,-0.664911,-2.154918,-1.973202,-1.880349,-0.91858,-1.520226,-2.178057,-1.343432,-1.781607,-1.269648,-1.756073,-1.09576,-1.250533,-1.66077,-1.413571,-1.324323,-2.69407,-1.252401,-2.674753,-1.710942,-1.316193,-1.777155,-2.403952,-1.067821,-2.677472,-2.790245,-2.446989,-1.672596,-2.181637,-3.357185,-1.702333,-2.744978,-1.499692,-2.370927,-2.03875,-2.134199,-1.290584,-2.232985,-2.546501,-2.723546,-1.243378
25%,-0.197138,-0.135882,-0.106279,-0.213315,-0.327428,-0.093811,-0.511828,-0.298488,-0.472762,-0.123296,-0.306852,-0.28454,-0.527565,-0.191214,-0.51044,-0.215514,-0.228135,-0.238486,-0.221106,-0.181563,-0.202154,-0.37717,-0.133862,-0.361644,-0.263539,-0.141604,-0.388252,-0.493099,-0.202823,-0.551783,-0.471274,-0.603389,-0.212002,-0.409565,-0.470434,-0.704794,-0.343304,-0.632215,-0.358526,-0.407902,-0.415244,-0.152258,-0.354177,-0.370032,-0.371261,-0.142604
50%,0.051556,0.01106,-0.035403,0.003738,-0.00203,0.034119,-0.099074,0.022973,-0.037415,-0.002817,-0.057971,0.016219,0.093797,0.066756,-0.29401,0.047428,0.01197,0.003463,0.020997,0.068579,0.047629,0.117997,0.041337,0.028298,-0.026667,-0.030258,-0.028895,-0.020464,-0.008462,-0.039177,-0.002148,-0.040364,0.001872,-0.039788,-0.027418,0.072042,0.051823,-0.394585,0.023736,-0.017169,-0.027404,0.038087,0.007019,-0.009355,0.119133,0.035974
75%,0.27055,0.161188,0.056602,0.206653,0.382011,0.178991,0.33631,0.342177,0.401456,0.099289,0.208569,0.339656,0.527777,0.312271,0.486985,0.310696,0.230082,0.232002,0.263302,0.326467,0.296892,0.516951,0.206546,0.358346,0.208989,0.124018,0.311534,0.520196,0.2453,0.499468,0.470168,0.516359,0.187827,0.365343,0.426913,0.649319,0.424449,0.631168,0.389246,0.336788,0.328001,0.260423,0.370475,0.341652,0.506874,0.198574
max,1.513479,1.050343,0.404613,1.720608,2.630939,0.747061,3.070073,1.896532,2.587136,1.089454,1.975732,2.459259,1.884027,1.766645,1.874434,2.329014,1.683153,1.837452,1.999322,1.70747,1.86757,2.818081,1.875975,2.080805,1.849486,0.793419,2.611746,4.038701,1.08533,3.901657,2.798094,3.235003,1.805052,3.289819,3.246257,2.416044,2.4106,2.325231,3.153528,2.67493,2.795516,1.932903,2.364669,2.494293,2.675323,1.938057


In [None]:
# What if we only train these on small sample guys?