# M01. Impute Inputs
- Normalizes model inputs
- Normalizes Steamer projections
- Uses Steamer projections to impute model inputs

In [1]:
%run "U1. Imports.ipynb"
%run "U2. Utilities.ipynb"
%run "U3. Classes.ipynb"

baseball_path = r'C:\Users\james\Documents\MLB\Database'

db_path = r'C:\Users\james\Documents\MLB\Database\MLBDB.db'
engine = create_engine(f'sqlite:///{db_path}')

In [2]:
%run "A03. Steamer.ipynb"

### Batters

##### Dataset

In [3]:
# Choose the last instance of each player in each game, assuming they have enough PAs
sql_query = f'''
WITH ranked_data AS (
  SELECT *,
         ROW_NUMBER() OVER (PARTITION BY gamePk, batter ORDER BY atBatIndex DESC) AS rn
  FROM "Dataset"
)
SELECT *
FROM ranked_data
WHERE rn = 1
'''

hitters_df = pd.read_sql_query(sql_query, con=engine)

In [5]:
hitters_df.tail()

Unnamed: 0,date,gamePk,atBatIndex,batterName,pitcherName,batter,pitcher,batSide,pitchHand,eventsModel,pa_b,ab_b,pa_p,ab_p,pa_b_long,ab_b_long,pa_p_long,ab_p_long,b1_b,b2_b,b3_b,hr_b,bb_b,hbp_b,so_b,fo_b,go_b,lo_b,po_b,estimated_woba_using_speedangle_b,to_left_b,to_middle_b,to_right_b,hard_hit_b,barrel_b,iso_b,slg_b,obp_b,woba_b,totalDistance_b,launchSpeed_b,b1_b_long,b2_b_long,b3_b_long,hr_b_long,bb_b_long,hbp_b_long,so_b_long,fo_b_long,go_b_long,lo_b_long,po_b_long,estimated_woba_using_speedangle_b_long,to_left_b_long,to_middle_b_long,to_right_b_long,hard_hit_b_long,barrel_b_long,iso_b_long,slg_b_long,obp_b_long,woba_b_long,totalDistance_b_long,launchSpeed_b_long,b1_p,b2_p,b3_p,hr_p,bb_p,hbp_p,so_p,fo_p,go_p,lo_p,po_p,estimated_woba_using_speedangle_p,to_left_p,to_middle_p,to_right_p,hard_hit_p,barrel_p,iso_p,slg_p,obp_p,woba_p,maxSpeed_p,maxSpin_p,b1_p_long,b2_p_long,b3_p_long,hr_p_long,bb_p_long,hbp_p_long,so_p_long,fo_p_long,go_p_long,lo_p_long,po_p_long,estimated_woba_using_speedangle_p_long,to_left_p_long,to_middle_p_long,to_right_p_long,hard_hit_p_long,barrel_p_long,iso_p_long,slg_p_long,obp_p_long,woba_p_long,maxSpeed_p_long,maxSpin_p_long,venue_1,venue_2,venue_3,venue_4,venue_5,venue_7,venue_10,venue_12,venue_13,venue_14,venue_15,venue_16,venue_17,venue_19,venue_22,venue_31,venue_32,venue_680,venue_2392,venue_2394,venue_2395,venue_2535,venue_2536,venue_2602,venue_2680,venue_2681,venue_2701,venue_2735,venue_2756,venue_2889,venue_3289,venue_3309,venue_3312,venue_3313,venue_4169,venue_4705,venue_5010,venue_5325,venue_5365,venue_5381,venue_5445,year_2015,year_2016,year_2017,year_2018,year_2019,year_2020,year_2021,year_2022,year_2023,p_L,b_L,x_vect,y_vect,temperature,onFirst,onSecond,onThird,inning,top,score_diff,imp_b,imp_p,outs,halfInning,awayScore,homeScore,rbi,b1,b2,b3,hr,bb,hbp,so,fo,go,lo,po,rn
443540,20231003,748585,38,Edouard Julien,Erik Swanson,666397,657024,L,R,go,50.0,40.0,49.0,44.0,299.0,241.0,296.0,277.0,0.117181,0.015385,0.0,0.054749,0.196588,0.0,0.307226,0.08,0.14,0.06,0.02,0.23508,0.06,0.24,0.2,0.2,0.06,0.179631,0.009174,0.383902,0.372096,452.0,106.2,0.143236,0.040598,0.003876,0.03769,0.190134,0.003333,0.304771,0.06,0.163333,0.046667,0.006667,0.2455,0.12,0.18,0.2,0.246667,0.073333,0.161419,0.001605,0.418867,0.395818,452.0,108.2,0.184824,0.018868,0.0,0.035556,0.10382,0.0,0.247626,0.14,0.18,0.04,0.04,0.24506,0.22,0.16,0.26,0.26,0.04,0.125537,0.008291,0.343068,0.332618,97.4,2460.0,0.132063,0.046857,0.003333,0.021742,0.065355,0.0,0.297474,0.15,0.18,0.04,0.05,0.21917,0.166667,0.206667,0.25,0.216667,0.043333,0.118749,0.001165,0.269351,0.271447,98.5,2989.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,-15.0,0.0,84,0,0,0,5,0,3.0,0,0,1,bottom,0,3,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,1,0,0,1
443541,20231003,748585,62,Royce Lewis,Jordan Hicks,668904,663855,R,R,go,50.0,41.0,50.0,47.0,217.0,199.0,298.0,263.0,0.101541,0.036036,0.0,0.108471,0.177165,0.0,0.218661,0.12,0.12,0.04,0.06,0.39882,0.24,0.26,0.1,0.34,0.18,0.36145,0.014817,0.423213,0.48212,426.0,112.9,0.175962,0.030301,0.0,0.079454,0.072755,0.009174,0.21765,0.123853,0.16055,0.050459,0.073394,0.310876,0.284404,0.279817,0.133028,0.316514,0.09633,0.268665,0.002786,0.367647,0.415253,426.0,114.0,0.122553,0.049637,0.0,0.015504,0.064853,0.0,0.307571,0.04,0.3,0.06,0.04,0.17148,0.22,0.26,0.16,0.22,0.04,0.096149,0.006039,0.252547,0.247943,103.6,2711.0,0.14135,0.0274,0.003584,0.012839,0.107396,0.01,0.298362,0.06,0.283333,0.036667,0.023333,0.169393,0.19,0.273333,0.136667,0.166667,0.023333,0.073085,0.000982,0.302569,0.273301,104.9,2843.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,-15.0,0.0,84,0,0,0,8,0,2.0,0,0,1,bottom,1,3,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,1,0,0,1
443542,20231003,748585,51,Matt Wallner,Chad Green,670242,643338,L,R,go,50.0,42.0,49.0,44.0,257.0,212.0,298.0,277.0,0.181343,0.072039,0.0,0.036567,0.138635,0.02,0.26996,0.06,0.14,0.04,0.04,0.27614,0.16,0.12,0.3,0.32,0.12,0.18174,0.011231,0.448584,0.437174,463.0,116.4,0.13023,0.042438,0.004524,0.053316,0.120961,0.054475,0.328212,0.07393,0.120623,0.019455,0.054475,0.25416,0.11284,0.143969,0.241245,0.268482,0.097276,0.211436,0.002085,0.405945,0.409158,463.0,116.4,0.118546,0.079735,0.008696,0.0,0.103498,0.0,0.28705,0.18,0.12,0.04,0.04,0.24266,0.1,0.32,0.18,0.34,0.02,0.097126,0.006911,0.310474,0.290874,98.5,2691.0,0.087792,0.048212,0.004033,0.035362,0.0681,0.0,0.345572,0.156667,0.12,0.05,0.07,0.23389,0.126667,0.24,0.206667,0.243333,0.06,0.162364,0.001219,0.243499,0.265185,98.8,2847.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,-15.0,0.0,84,0,0,0,6,0,2.0,0,0,3,bottom,1,3,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,1,0,0,1
443543,20231003,748585,61,Alejandro Kirk,Griffin Jax,672386,643377,R,R,go,50.0,44.0,50.0,47.0,298.0,265.0,297.0,279.0,0.121277,0.042397,0.0,0.035556,0.105063,0.02,0.059295,0.1,0.38,0.12,0.02,0.30402,0.2,0.44,0.18,0.38,0.04,0.149066,0.007916,0.324293,0.32135,394.0,108.3,0.161451,0.032807,0.0,0.017831,0.090566,0.023333,0.095901,0.136667,0.326667,0.086667,0.026667,0.268683,0.183333,0.37,0.236667,0.326667,0.043333,0.086299,0.001126,0.325988,0.30051,410.0,108.3,0.103235,0.055454,0.0,0.037736,0.021277,0.04,0.306307,0.06,0.3,0.02,0.06,0.23532,0.26,0.2,0.18,0.22,0.04,0.168661,0.007768,0.257701,0.28298,98.9,3327.0,0.164293,0.028786,0.0,0.018862,0.050441,0.01,0.245313,0.1,0.276667,0.06,0.036667,0.229253,0.216667,0.28,0.196667,0.253333,0.03,0.085371,0.001066,0.272381,0.262785,98.9,3327.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,-15.0,0.0,84,0,1,0,8,1,-2.0,0,0,3,top,1,3,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,1,0,0,1
443544,20231003,748585,66,Ryan Jeffers,Jordan Hicks,680777,663855,R,R,bb,49.0,46.0,50.0,46.0,295.0,264.0,298.0,264.0,0.115793,0.018018,0.023256,0.036567,0.058353,0.0,0.224473,0.18,0.16,0.12,0.04,0.30098,0.26,0.26,0.18,0.28,0.08,0.174231,0.007997,0.251987,0.278405,465.0,110.2,0.151086,0.048068,0.007752,0.029301,0.076657,0.026667,0.273334,0.123333,0.16,0.053333,0.036667,0.238883,0.23,0.226667,0.16,0.256667,0.063333,0.151475,0.001468,0.339531,0.339557,465.0,117.4,0.144292,0.030769,0.0,0.015504,0.084655,0.0,0.307571,0.02,0.3,0.06,0.04,0.16352,0.22,0.26,0.14,0.24,0.04,0.077281,0.005823,0.27522,0.257034,103.6,2711.0,0.144973,0.0274,0.003584,0.012839,0.107428,0.006667,0.298362,0.06,0.286667,0.036667,0.02,0.1715,0.193333,0.273333,0.136667,0.173333,0.023333,0.073085,0.000992,0.302891,0.274127,104.9,2843.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,-15.0,0.0,84,1,0,1,8,0,2.0,0,0,2,bottom,1,3,0,0.0,0.0,0.0,0.0,0.990099,0,0.0,0,0,0,0,1


In [13]:
# hitters_df = pa_share(hitters_df)

In [14]:
# Standardize the data using StandardScaler
batter_stats_scaler = StandardScaler()
hitters_df[batter_inputs] = batter_stats_scaler.fit_transform(hitters_df[batter_inputs])

# Save the trained StandardScaler object
with open(os.path.join(model_path, "batter_stats_scaler_20231027.pkl"), "wb") as file:
    pickle.dump(batter_stats_scaler, file)

##### Steamer

In [15]:
# Choose the last instance of each player in each game, assuming they have enough PAs
sql_query = f'''
  SELECT *
  FROM "Steamer Hitters"
'''

steamer_hitters_df = pd.read_sql_query(sql_query, con=engine)

In [16]:
# Clean
steamer_hitters_df2 = clean_steamer_hitters(steamer_hitters_df)

In [17]:
# Standardize the data using StandardScaler
batter_stats_fg_scaler = StandardScaler()
steamer_hitters_df2[batter_stats_fg] = batter_stats_fg_scaler.fit_transform(steamer_hitters_df2[batter_stats_fg])

# Print the mean and standard deviation for each feature
for feature, mean, std_dev in zip(batter_stats_fg, batter_stats_fg_scaler.mean_, batter_stats_fg_scaler.scale_):
    print(f"{feature}: Mean={mean}, Standard Deviation={std_dev}")

# Save the trained StandardScaler object
with open(os.path.join(model_path, "batter_stats_fg_scaler_20231027.pkl"), "wb") as file:
    pickle.dump(batter_stats_fg_scaler, file)

b1_rate: Mean=0.13296247565214536, Standard Deviation=0.019902178079786136
b2_rate: Mean=0.03137276993870695, Standard Deviation=0.010154460245274565
b3_rate: Mean=0.0031924832506096856, Standard Deviation=0.0017964860307704274
hr_rate: Mean=0.01711153781206499, Standard Deviation=0.009168360039807501
bb_rate: Mean=0.05672550200233217, Standard Deviation=0.022557026314489005
hbp_rate: Mean=0.00967180176502107, Standard Deviation=0.0027268910257308675
so_rate: Mean=0.2603879338587934, Standard Deviation=0.06238002245642874
woba: Mean=0.2439668470431368, Standard Deviation=0.04622896762373168
slg: Mean=0.2985866602316107, Standard Deviation=0.06881805883276533
obp: Mean=0.2524655949016026, Standard Deviation=0.04464417614932697


##### Merge

In [18]:
# Create column steamer_date column in hitters_df equal to the highest number <= a number in this list of uniques
steamer_dates = list(steamer_hitters_df2['date'].unique())

# Define a function to find the largest number in "steamer_dates" less than or equal to a given "date"
def find_steamer_date(date):
    max_steamer_date = max(filter(lambda d: d <= date, steamer_dates), default=None)
    return max_steamer_date

# Apply the function to create the "steamer_date" column in your DataFrame
hitters_df["steamer_date"] = hitters_df["date"].apply(find_steamer_date)

In [19]:
# Steamer stats we want to keep
batter_stats_fg_plus = ['mlbamid', 'steamerid', 'date'] + batter_stats_fg 
# Merge
hitters_merged_df = pd.merge(hitters_df, steamer_hitters_df2[batter_stats_fg_plus], left_on=['batter', 'steamer_date'], right_on=['mlbamid', 'date'], how='inner')
# Only keep one observation per player per game 
# Consider only keeping one observation per player per week/Steamer weekly projection
hitters_merged_df.drop_duplicates(['gamePk', 'batter'], inplace=True, keep='last')
# Only keep those without missing data
hitters_merged_df = hitters_merged_df.dropna(subset=batter_inputs)

##### Impute

In [22]:
len(hitters_merged_df.query('pa_p < 40').iloc[10000:])

41393

In [23]:
# Testing:
hitters_merged_df = hitters_merged_df.iloc[10000:].query('pa_p < 40')

# Add hands to use in imputation
batter_stats_fg_imp = batter_stats_fg + ['b_L', 'p_L']

# Separate the features and target columns
features = hitters_merged_df[batter_stats_fg_imp]
target = hitters_merged_df[batter_inputs]

# Create and fit the model
batter_imputations_model = keras.Sequential([
    keras.layers.Dense(30, activation='relu', input_shape=(len(batter_stats_fg_imp),)),
    keras.layers.Dense(30, activation='relu'),
    keras.layers.Dense(30, activation='relu'),
    keras.layers.Dense(len(batter_inputs))  
    ])

# Compile the model
batter_imputations_model.compile(loss='mean_squared_error', optimizer='adam')

# Train the model
batter_imputations_model.fit(features, target, epochs=10, batch_size=25)

# Pickle
with open(os.path.join(model_path, "batter_imputations_model_20231210.pkl"), "wb") as file:
    pickle.dump(batter_imputations_model, file)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# Use the trained model to make predictions
# hitters_merged_df[batter_inputs] = batter_imputations_model.predict(hitters_merged_df[batter_stats_fg_imp])

In [None]:
hitters_merged_df[batter_inputs].describe()

### Pitchers

##### Dataset

In [24]:
# Select dataset
# Choose the last instance of each player in each game, assuming they have enough PAs
sql_query = f'''
WITH ranked_data AS (
  SELECT *,
         ROW_NUMBER() OVER (PARTITION BY gamePk, pitcher ORDER BY atBatIndex DESC) AS rn
  FROM "Dataset"
)
SELECT *
FROM ranked_data
WHERE rn = 1
'''

pitchers_df = pd.read_sql_query(sql_query, con=engine)

In [25]:
# pitchers_df = pa_share(pitchers_df)

In [26]:
# Standardize the data using StandardScaler
pitcher_stats_scaler = StandardScaler()
pitchers_df[pitcher_inputs] = pitcher_stats_scaler.fit_transform(pitchers_df[pitcher_inputs])

# Save the trained StandardScaler object
with open(os.path.join(model_path, "pitcher_stats_scaler_20231027.pkl"), "wb") as file:
    pickle.dump(pitcher_stats_scaler, file)

##### Steamer

In [27]:
# Choose the last instance of each player in each game, assuming they have enough PAs
sql_query = f'''
  SELECT *
  FROM "Steamer Pitchers"
'''

steamer_pitchers_df = pd.read_sql_query(sql_query, con=engine)

In [28]:
# Clean
steamer_pitchers_df2 = clean_steamer_pitchers(steamer_pitchers_df)

In [29]:
steamer_pitchers_df2.dropna(subset=pitcher_stats_fg, inplace=True)

In [30]:
steamer_pitchers_df2.head()

Unnamed: 0,date,firstname,lastname,mlbamid,steamerid,H9,HR9,K9,BB9,GBrate,FBrate,LDrate,SIERA,reliability,IP_start
0,20140617,Craig,Kimbrel,518886.0,6655,5.589724,0.4786,13.6858,2.7747,0.455,0.335,0.210571,1.92,0.58064,0.0
1,20140617,Aroldis,Chapman,547973.0,10233,5.004519,0.6532,15.0427,3.4555,0.41,0.388,0.202224,1.85,0.55961,0.0
2,20140617,Koji,Uehara,493157.0,9227,6.925841,0.9029,10.8841,1.5271,0.355,0.433,0.21286,2.34,0.55498,0.0
3,20140617,Kenley,Jansen,445276.0,3096,6.112634,0.7299,12.4814,2.7269,0.391,0.409,0.199716,2.27,0.59125,0.0
4,20140617,Greg,Holland,518813.0,7196,6.376248,0.5989,11.9605,2.8349,0.437,0.358,0.205506,2.41,0.58857,0.0


In [31]:
# Standardize the data using StandardScaler
pitcher_stats_fg_scaler = StandardScaler()
steamer_pitchers_df2[pitcher_stats_fg] = pitcher_stats_fg_scaler.fit_transform(steamer_pitchers_df2[pitcher_stats_fg])



# Print the mean and standard deviation for each feature
for feature, mean, std_dev in zip(pitcher_stats_fg, pitcher_stats_fg_scaler.mean_, pitcher_stats_fg_scaler.scale_):
    print(f"{feature}: Mean={mean}, Standard Deviation={std_dev}")

# Save the trained StandardScaler object
with open(os.path.join(model_path, "pitcher_stats_fg_scaler_20231027.pkl"), "wb") as file:
    pickle.dump(pitcher_stats_fg_scaler, file)

H9: Mean=9.53007780379424, Standard Deviation=0.8384434226772676
HR9: Mean=1.3459117962282532, Standard Deviation=0.25895465843158266
K9: Mean=6.582873102164268, Standard Deviation=1.5810154251855848
BB9: Mean=4.96198949327622, Standard Deviation=1.7885069322073819
GBrate: Mean=0.4348809790797142, Standard Deviation=0.03576685285883674
FBrate: Mean=0.3600437092186263, Standard Deviation=0.03595874009647045
LDrate: Mean=0.205069528956993, Standard Deviation=0.003925116647791453
SIERA: Mean=4.970680824764909, Standard Deviation=0.9931263329927901


##### Merge

In [32]:
# Create column steamer_date column in hitters_df equal to the highest number <= a number in this list of uniques
steamer_dates = list(steamer_pitchers_df2['date'].unique())

# Define a function to find the largest number in "steamer_dates" less than or equal to a given "date"
def find_steamer_date(date):
    max_steamer_date = max(filter(lambda d: d <= date, steamer_dates), default=None)
    return max_steamer_date

# Apply the function to create the "steamer_date" column in your DataFrame
pitchers_df["steamer_date"] = pitchers_df["date"].apply(find_steamer_date)

In [33]:
# Steamer stats we want to keep
pitcher_stats_fg_plus = ['mlbamid', 'steamerid', 'date'] + pitcher_stats_fg2 
# Merge
pitchers_merged_df = pd.merge(pitchers_df, steamer_pitchers_df2[pitcher_stats_fg_plus], left_on=['pitcher', 'steamer_date'], right_on=['mlbamid', 'date'], how='inner')
# Only keep one observation per player per game 
# Consider only keeping one observation per player per week/Steamer weekly projection
pitchers_merged_df.drop_duplicates(['gamePk', 'pitcher'], inplace=True, keep='last')
# Only keep those without missing data
pitchers_merged_df = pitchers_merged_df.dropna(subset=pitcher_inputs).dropna(subset=pitcher_stats_fg2)

##### Impute

In [34]:
# Testing:
hitters_merged_df = hitters_merged_df.iloc[10000:].query('pa_b < 40')

# Add hands to use in imputation
pitcher_stats_fg_imp = pitcher_stats_fg + ['b_L', 'p_L']

# Separate the features and target columns
features = pitchers_merged_df[pitcher_stats_fg_imp]
target = pitchers_merged_df[pitcher_inputs]

# Create and fit the model
pitcher_imputations_model = keras.Sequential([
    keras.layers.Dense(30, activation='relu', input_shape=(len(pitcher_stats_fg_imp),)),
    keras.layers.Dense(30, activation='relu'),
    keras.layers.Dense(30, activation='relu'),
    keras.layers.Dense(len(pitcher_inputs))  
    ])

# Compile the model
pitcher_imputations_model.compile(loss='mean_squared_error', optimizer='adam')

# Train the model
pitcher_imputations_model.fit(features, target, epochs=10, batch_size=35)

# Pickle
with open(os.path.join(model_path, "pitcher_imputations_model_20231210.pkl"), "wb") as file:
    pickle.dump(pitcher_imputations_model, file)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [35]:
pitchers_merged_df[pitcher_inputs].describe()

Unnamed: 0,b1_p,b2_p,b3_p,hr_p,bb_p,hbp_p,so_p,fo_p,go_p,lo_p,po_p,estimated_woba_using_speedangle_p,to_left_p,to_middle_p,to_right_p,hard_hit_p,barrel_p,iso_p,slg_p,obp_p,woba_p,maxSpeed_p,maxSpin_p,b1_p_long,b2_p_long,b3_p_long,hr_p_long,bb_p_long,hbp_p_long,so_p_long,fo_p_long,go_p_long,lo_p_long,po_p_long,estimated_woba_using_speedangle_p_long,to_left_p_long,to_middle_p_long,to_right_p_long,hard_hit_p_long,barrel_p_long,iso_p_long,slg_p_long,obp_p_long,woba_p_long,maxSpeed_p_long,maxSpin_p_long
count,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0,179750.0
mean,-0.002748,-0.00212,-6.1e-05,-0.003518,-0.004075,-0.003288,0.007769,-0.002349,0.001152,3.4e-05,-0.000444,0.000692,0.000415,0.003572,0.001237,0.001977,-0.000987,-0.003933,-0.013311,-0.000433,-0.007097,0.018395,0.015013,-0.003349,-0.002902,-1.7e-05,-0.00411,-0.004934,-0.004187,0.009752,-0.003299,0.001264,-0.000106,-0.000648,0.000587,-0.000292,0.003504,0.001229,0.002033,-0.001134,-0.004706,-0.013652,-0.000564,-0.008755,0.01879,0.015551
std,0.971587,0.969531,0.99117,0.957149,0.939431,0.932726,0.986712,0.970642,0.985217,0.973715,0.978594,0.969691,0.980342,0.978885,0.982397,0.979784,0.966729,0.960673,0.86545,0.953557,0.965936,0.930067,0.96339,0.954717,0.949502,0.984781,0.929766,0.909316,0.89387,0.979933,0.954624,0.978429,0.95398,0.965509,0.953517,0.970085,0.965879,0.974022,0.968262,0.939542,0.936034,0.865978,0.926111,0.944499,0.929778,0.966834
min,-2.113626,-1.140167,-0.316907,-0.90513,-1.524692,-0.54734,-2.484256,-1.804991,-2.383475,-1.342363,-1.140384,-3.166498,-2.178002,-2.952878,-2.22579,-2.629003,-1.159184,-1.293535,-0.200407,-3.439189,-3.147967,-9.776242,-7.032769,-2.616373,-1.434837,-0.407678,-1.109549,-1.819283,-0.66401,-2.952928,-2.176957,-2.822814,-1.736453,-1.398326,-3.787907,-2.57098,-3.636285,-2.635474,-3.143991,-1.45263,-1.594843,-0.123514,-4.22878,-3.907307,-9.772408,-6.600603
25%,-0.607054,-0.606802,-0.316907,-0.90513,-0.569814,-0.54734,-0.630938,-0.571226,-0.66888,-0.858042,-0.651094,-0.579156,-0.762858,-0.614556,-0.66214,-0.548855,-0.605841,-0.649874,-0.108082,-0.587129,-0.609233,-0.091788,-0.456314,-0.472036,-0.451145,-0.407678,-0.477713,-0.545072,-0.66401,-0.570302,-0.547327,-0.604852,-0.43896,-0.534446,-0.439539,-0.619917,-0.438028,-0.602894,-0.513276,-0.52558,-0.481608,-0.103695,-0.451166,-0.460998,-0.0885,-0.526824
50%,-0.036379,-0.073026,-0.316907,-0.260639,-0.12496,-0.54734,-0.047967,0.045657,-0.025907,0.110599,-0.161805,-0.016291,-0.055285,0.086941,0.007996,-0.0866,-0.052499,-0.132072,-0.077037,-0.02532,-0.053107,0.103546,-0.038761,-0.024698,-0.066469,-0.271221,-0.078147,-0.093665,-0.216756,-0.033956,-0.053005,-0.060824,-0.053219,-0.088108,0.041916,-0.062058,0.046557,-0.116843,0.031923,-0.043996,-0.062721,-0.098508,-0.029079,-0.038527,0.105178,-0.041757
75%,0.555784,0.502042,-0.316907,0.418287,0.5452,0.440328,0.58234,0.662539,0.617066,0.59492,0.327484,0.574147,0.652287,0.554605,0.678131,0.606783,0.500844,0.484019,-0.039972,0.55175,0.532862,0.29888,0.496228,0.408118,0.34908,0.170823,0.346184,0.424074,0.281481,0.528638,0.446748,0.525053,0.381903,0.415823,0.500367,0.615756,0.517296,0.590142,0.527325,0.475633,0.373371,-0.069629,0.3978,0.386684,0.30905,0.543109
max,14.62606,30.861778,74.763333,37.68481,12.260635,32.374907,10.246069,13.617075,8.332741,22.873673,23.324081,22.066353,9.614868,8.738731,8.943138,8.927375,26.507941,33.61826,76.678399,11.273683,22.584125,10.230072,2.730135,18.064276,38.860701,93.747707,47.208548,14.777486,39.887086,12.213798,16.563797,9.731686,29.403378,28.837492,26.783194,11.541709,10.901248,10.620482,10.949128,34.702295,41.965516,76.310499,13.931159,28.188923,10.064313,2.081861


In [36]:
# Use the trained model to make predictions
pitchers_merged_df[pitcher_inputs] = pitcher_imputations_model.predict(pitchers_merged_df[pitcher_stats_fg_imp])

