# Explore and Model StatFox Matchup Data
`mlb_bet_notebooks/model_statfox_matchups.ipynb`
- Explore features
- Convert historical moneylines to break-even probabilities
- Model pre-computed features with RF and maybe PCA
- Compare model predictions to historical moneylines
    - Use break-even probabilities as alternative model and compare ROC
Jonathan Sims 2020-02-24

In [198]:
import math
import pandas as pd 
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
import numpy as np
from sklearn import decomposition
from sklearn.preprocessing import StandardScaler

In [265]:
file_in1 = '20200310.skr_statfox.2010-2018.0.7400.tsv.gz'
file_in2 = '20200310.skr_statfox.2010-2018.7400.10000.tsv.gz'
file_in3 = '20200310.skr_statfox.2010-2018.10000.12000.tsv.gz'
file_in4 = '20200310.skr_statfox.2010-2018.12000.16000.tsv.gz'
file_in5 = '20200310.skr_statfox.2010-2018.16000.20600.tsv.gz'
file_in6 = '20200310.skr_statfox.2010-2018.20600.tsv.gz''

SyntaxError: EOL while scanning string literal (<ipython-input-265-741c2e99deda>, line 6)

### Import features, moneylines, target

In [200]:
### Import chunks and append

df1 = pd.read_csv(file_in1, sep='\t', low_memory=False)
df2 = pd.read_csv(file_in2, sep='\t', low_memory=False)
df3 = pd.read_csv(file_in3, sep='\t', low_memory=False)
df4 = pd.read_csv(file_in4, sep='\t', low_memory=False)
df5 = pd.read_csv(file_in5, sep='\t', low_memory=False)
df6 = pd.read_csv(file_in6, sep='\t', low_memory=False)

df = pd.concat([df1, df2, df3, df4, df5, df6], ignore_index=False, sort=True)

In [201]:
### Clear useless dataframes

df1 = []
df2 = []
df3 = []
df4 = []
df5 = []

In [202]:
### Give proper name to the match index

df['matchidx'] = df['Unnamed: 0']
df = df.drop(['Unnamed: 0'], axis=1)

In [203]:
df.shape

(11765, 590)

In [204]:
df.tail()

Unnamed: 0,H_Bullpen_BB_AllGames,H_Bullpen_BB_HomeGames,H_Bullpen_BSV_AllGames,H_Bullpen_BSV_HomeGames,H_Bullpen_ERA_AllGames,H_Bullpen_ERA_HomeGames,H_Bullpen_ER_AllGames,H_Bullpen_ER_HomeGames,H_Bullpen_HR_AllGames,H_Bullpen_HR_HomeGames,...,V__Latest_Line,V__Latest_Total,V__Opening_Line,V__Opening_Total,sc_h,sc_v,tm_h,tm_v,win_h,matchidx
3991,112,55,7,4,3.8,3.62,120,67,31,16,...,180,"Ov 10,-110",200,"Ov 10,-110",13,2,BOSTON,MINNESOTA,1,20160721964963
3992,132,59,14,2,3.57,2.44,108,40,19,5,...,110,"Ov 10.5,-115",115,"Ov 10.5,+100",1,2,CHIWHITESOX,DETROIT,0,20160721966965
3993,81,35,7,3,3.51,2.36,111,40,37,18,...,110,"Ov 9,+105",-115,"Ov 9,-105",1,4,NYYANKEES,BALTIMORE,0,20160721962961
3994,111,55,11,5,4.22,3.53,156,69,40,18,...,105,"Ov 8,-120",125,"Ov 8,+100",3,7,OAKLAND,TAMPABAY,0,20160721968967
3995,108,60,11,7,4.93,6.19,169,116,40,28,...,165,"Ov 12,-105",155,"Ov 12,-120",7,3,COLORADO,ATLANTA,1,20160721960959


### Clean up OU Totals, OSB, and DP features
- This should really be in skr_statfox_matchups.ipynb 

In [205]:
### Split by a comma then drop Ov and Un text

ou_cols = [col for col in df.columns if '_Total' in col]

if ou_cols != []:
    for x in ou_cols:
        xO = x.replace('_Total', '_Tot')
        xU = x.replace('_Total', '_TotLn')
        
        df[[xO,xU]] = df[x].str.split(",", expand=True)    
        df[xO] = df[xO].str.replace("Ov ","")
        df[xO] = df[xO].str.replace("Un ","")
        df = df.drop(x, axis=1)
    
new_ou_cols = [col for col in df.columns if '_Tot' in col]    
df[new_ou_cols].head()

Unnamed: 0,H__Latest_Tot,H__Latest_TotLn,H__Opening_Tot,H__Opening_TotLn,V__Latest_Tot,V__Latest_TotLn,V__Opening_Tot,V__Opening_TotLn
0,8.5,-105,8.5,-105,8.5,-115,8.5,-115
1,9.0,-115,9.0,-115,9.0,-105,9.0,-105
2,7.0,-110,7.0,-110,7.0,-110,7.0,-110
3,7.0,105,7.0,105,7.0,-125,7.0,-125
4,9.0,-115,9.5,-120,9.0,-105,9.5,100


In [206]:
### Remove parantheses from numeric values

osb_cols = [col for col in df.columns if '_OSB_' in col]
dp_cols = [col for col in df.columns if '_DP_' in col]
paren_cols = osb_cols + dp_cols

if paren_cols != []:
    for x in paren_cols:
        df[x] = df[x].str.replace("(","")
        df[x] = df[x].str.replace(")","")
        
df[paren_cols].head()        

Unnamed: 0,H_HitField_TeamFielding_OSB_AllGames,H_HitField_TeamFielding_OSB_HomeGames,H_HitField_TeamFielding_OSB_LeftyStarters,H_HitField_TeamFielding_OSB_RightyStarters,V_HitField_TeamFielding_OSB_AllGames,V_HitField_TeamFielding_OSB_LeftyStarters,V_HitField_TeamFielding_OSB_RightyStarters,V_HitField_TeamFielding_OSB_RoadGames,H_HitField_TeamFielding_DP_AllGames,H_HitField_TeamFielding_DP_HomeGames,H_HitField_TeamFielding_DP_LeftyStarters,H_HitField_TeamFielding_DP_RightyStarters,V_HitField_TeamFielding_DP_AllGames,V_HitField_TeamFielding_DP_LeftyStarters,V_HitField_TeamFielding_DP_RightyStarters,V_HitField_TeamFielding_DP_RoadGames
0,129,64,,90,104,,70.0,37,182,94,,114,138,,77.0,67
1,124,66,,88,111,31.0,,58,157,77,,103,170,32.0,,82
2,108,47,,78,86,,61.0,45,160,98,,96,164,,112.0,88
3,86,40,,59,65,,46.0,26,155,78,,104,150,,98.0,68
4,97,50,,62,87,,58.0,48,168,97,,109,168,,112.0,83


In [207]:
### Remove percent signs

perc = [col for col in df.columns if '_Pct_' in col]

if perc != []:
    for x in perc:
        df[x] = df[x].str.replace("%","")
        df[x] = df[x].str.replace(" ","")

df[perc].head()

Unnamed: 0,H_Bullpen_Pct_AllGames,H_Bullpen_Pct_HomeGames,V_Bullpen_Pct_AllGames,V_Bullpen_Pct_RoadGames
0,76.1,73.5,73.8,79.3
1,66.7,68.0,58.1,53.6
2,60.7,57.7,65.6,58.1
3,76.0,77.3,63.6,60.0
4,78.9,82.8,62.5,68.4


### Dedupe on matchup index (YYYYMMDDHHHVVV)

In [208]:
df = df.drop_duplicates(['matchidx'])

In [209]:
df.shape

(11708, 594)

### Separate features and targets

In [210]:
lines = [col for col in df.columns if 'Latest_Line' in col]
teams = ['tm_h', 'tm_v']
scores = ['sc_h','sc_v']
feats = ['win_h']
# drop = pd.concat([lines, totals, scores, feats])
drop = lines+scores+feats
drop

['H__Latest_Line', 'V__Latest_Line', 'sc_h', 'sc_v', 'win_h']

In [211]:
df_feat = df.drop(drop, axis=1).reset_index(drop=True)
df_targ = pd.to_numeric(df['win_h'], errors='coerce').reset_index(drop=True)
df_openline = pd.to_numeric(df['H__Opening_Line'], errors='coerce').reset_index(drop=True)
df_lateline = pd.to_numeric(df['H__Latest_Line'], errors='coerce').reset_index(drop=True)

### Add year and month variable

In [212]:
def get_month(x):
    """Take match index YYYYMMDDHHHVVV and return the month of game
    """
    flr = math.floor(x/100000000)
    flrmod = flr%100
    return str(flrmod)

def get_year(x):
    """Take match index YYYYMMDDHHHVVV and return the year of game
    """
    flr = math.floor(x/10000000000)
    return str(flr)

In [213]:
df_feat['month'] = df_feat['matchidx'].apply(get_month)
df_feat['year'] = df_feat['matchidx'].apply(get_year)

In [214]:
df_feat.shape

(11708, 591)

In [215]:
df_feat[['matchidx','year','month']].head()

Unnamed: 0,matchidx,year,month
0,20100405924923,2010,4
1,20100405918917,2010,4
2,20100405922921,2010,4
3,20100405926925,2010,4
4,20100405920919,2010,4


### Final Clean
- Try to convert object to numeric
- If except: convert object to binary dummies

In [302]:
def preprocess_features(x):
    """Clean features to get everything numeric.
    1) Strip percent sign, space, and commas
    2) Convert objects to numeric if possible
    3) Else, convert objects to dummies
    """
#     output = pd.DataFrame()
    
    for col, col_data in x.iteritems():
        
        if col_data.dtype == object:
        
            try:
                col_data = pd.to_numeric(col_data)
                x = x.drop(col, axis=1)
                
            except:
                col_data = pd.get_dummies(col_data, prefix=col)
                x = x.drop(col, axis=1)
            
        x = pd.concat([x, col_data], axis=1)
        
    return x

In [262]:
df_feat_num = preprocess_features(df_feat)

In [303]:
df_feat_num = df_feat_num.fillna(value=0)

In [304]:
df_feat_num.head()

Unnamed: 0,H_Bullpen_BB_AllGames,H_Bullpen_BB_HomeGames,H_Bullpen_BSV_AllGames,H_Bullpen_BSV_HomeGames,H_Bullpen_ERA_AllGames,H_Bullpen_ERA_HomeGames,H_Bullpen_ER_AllGames,H_Bullpen_ER_HomeGames,H_Bullpen_HR_AllGames,H_Bullpen_HR_HomeGames,...,H__Latest_Tot,H__Latest_TotLn,H__Opening_Tot,H__Opening_TotLn,V__Latest_Tot,V__Latest_TotLn,V__Opening_Tot,V__Opening_TotLn,month,year
0,228,117,17,9,4.42,4.69,253,138,56,30,...,8.5,-105.0,8.5,-105.0,8.5,-115.0,8.5,-115.0,4,2010
1,190,107,18,8,4.06,4.41,212,126,49,25,...,9.0,-115.0,9.0,-115.0,9.0,-105.0,9.0,-105.0,4,2010
2,244,114,22,11,5.02,5.52,266,153,57,23,...,7.0,-110.0,7.0,-110.0,7.0,-110.0,7.0,-110.0,4,2010
3,194,81,12,5,3.56,2.87,221,90,42,13,...,7.0,105.0,7.0,105.0,7.0,-125.0,7.0,-125.0,4,2010
4,194,102,12,5,3.95,3.91,213,113,49,30,...,9.0,-115.0,9.5,-120.0,9.0,-105.0,9.5,100.0,4,2010


In [305]:
df_feat_num['H__Latest_TotLn']

0       -105.0
1       -115.0
2       -110.0
3        105.0
4       -115.0
5       -110.0
6       -115.0
7       -110.0
8       -115.0
9       -115.0
10       105.0
11      -110.0
12      -130.0
13      -110.0
14      -110.0
15      -120.0
16      -115.0
17      -120.0
18      -120.0
19      -115.0
20      -105.0
21      -120.0
22      -120.0
23      -120.0
24      -120.0
25      -120.0
26      -105.0
27      -105.0
28      -120.0
29      -110.0
         ...  
11678   -110.0
11679   -120.0
11680   -115.0
11681   -135.0
11682   -115.0
11683   -110.0
11684    100.0
11685   -115.0
11686   -110.0
11687   -105.0
11688   -105.0
11689   -105.0
11690   -115.0
11691   -130.0
11692   -115.0
11693   -115.0
11694   -105.0
11695    105.0
11696   -115.0
11697    115.0
11698    110.0
11699   -110.0
11700   -115.0
11701   -120.0
11702   -115.0
11703   -110.0
11704   -105.0
11705   -125.0
11706    100.0
11707   -115.0
Name: H__Latest_TotLn, Length: 11708, dtype: float64

In [306]:
[x for x in df_feat_num.columns if 'tm_' in x]

['tm_h_ARIZONA',
 'tm_h_ATLANTA',
 'tm_h_BALTIMORE',
 'tm_h_BOSTON',
 'tm_h_CHICAGOCUBS',
 'tm_h_CHIWHITESOX',
 'tm_h_CINCINNATI',
 'tm_h_CLEVELAND',
 'tm_h_COLORADO',
 'tm_h_DETROIT',
 'tm_h_FLORIDA',
 'tm_h_HOUSTON',
 'tm_h_KANSASCITY',
 'tm_h_LAANGELS',
 'tm_h_LADODGERS',
 'tm_h_MIAMI',
 'tm_h_MILWAUKEE',
 'tm_h_MINNESOTA',
 'tm_h_NYMETS',
 'tm_h_NYYANKEES',
 'tm_h_OAKLAND',
 'tm_h_PHILADELPHIA',
 'tm_h_PITTSBURGH',
 'tm_h_SANDIEGO',
 'tm_h_SANFRANCISCO',
 'tm_h_SEATTLE',
 'tm_h_STLOUIS',
 'tm_h_TAMPABAY',
 'tm_h_TEXAS',
 'tm_h_TORONTO',
 'tm_h_WASHINGTON',
 'tm_v_ARIZONA',
 'tm_v_ATLANTA',
 'tm_v_BALTIMORE',
 'tm_v_BOSTON',
 'tm_v_CHICAGOCUBS',
 'tm_v_CHIWHITESOX',
 'tm_v_CINCINNATI',
 'tm_v_CLEVELAND',
 'tm_v_COLORADO',
 'tm_v_DETROIT',
 'tm_v_FLORIDA',
 'tm_v_HOUSTON',
 'tm_v_KANSASCITY',
 'tm_v_LAANGELS',
 'tm_v_LADODGERS',
 'tm_v_MIAMI',
 'tm_v_MILWAUKEE',
 'tm_v_MINNESOTA',
 'tm_v_NYMETS',
 'tm_v_NYYANKEES',
 'tm_v_OAKLAND',
 'tm_v_PHILADELPHIA',
 'tm_v_PITTSBURGH',
 'tm_v_SAN

### Convert open and close moneylines to probability

In [307]:
def american_to_probability(x):
    """Turns American +/- odds into probability 0 to 1 exclusive
    NOTE: Returns 0 if missing since 0 and 1 are impossible from lines
    """
    if x < -99:
        num = abs(x)
        pr = num/(100+num)
        return pr

    elif x >= 100:
        num = x
        pr = 100/(100+num)
        return pr
    
    elif x == None:
        pr = 0

#     else:
#         print('Error: No sign found in betting line string')

### Calculate open and close moneyline ROC AUC

In [308]:
df_lateline.shape

(11708,)

In [309]:
df_lateline.head()

0   -130.0
1   -160.0
2   -110.0
3    120.0
4   -140.0
Name: H__Latest_Line, dtype: float64

In [310]:
df_lateline_prob = df_lateline.apply(american_to_probability)

df_lateline_prob = df_lateline_prob[lambda x: (x > 0) & (x < 1)]

In [311]:
df_targ_keeps = df_targ[df_lateline_prob.index]

roc_auc_score(df_targ_keeps, df_lateline_prob)

0.5921597934812142

In [312]:
df_targ.shape

(11708,)

In [313]:
df_targ_keeps.shape

(11693,)

In [314]:
df_lateline_prob.shape

(11693,)

In [315]:
df_openline_prob = df_openline.apply(american_to_probability)

keeps = df_openline_prob[lambda x: (0 <= x) & (x <= 1)]

df_openline_prob = df_openline_prob[keeps.index]

df_targ_keeps = df_targ[keeps.index]

roc_auc_score(df_targ_keeps, df_openline_prob)

0.5873601475140908

### df RF with AUC and no preprocessing

In [316]:
df_feat_num.shape

(11708, 1487)

In [317]:
nsplit = round(len(df_feat_num)*0.8)
nsplit

9366

In [318]:
df_feat_num_train = df_feat_num.iloc[:nsplit]
df_targ_train = df_targ.iloc[:nsplit]
df_feat_num_df = df_feat_num.iloc[nsplit:]
df_targ_df = df_targ.iloc[nsplit:]

In [320]:
clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1)
df_fit = clf.fit(df_feat_num_train, df_targ_train)
df_pred = df_fit.predict(df_feat_num_df)
roc_auc_score(df_targ_df, df_pred)

0.515078362675863

### Examine overall features

In [325]:
df_feat_num[df_feat_num.columns[10:30]].describe()

Unnamed: 0,H_Bullpen_H_AllGames,H_Bullpen_H_AllGames.1,H_Bullpen_H_HomeGames,H_Bullpen_H_HomeGames.1,H_Bullpen_IP_AllGames,H_Bullpen_IP_AllGames.1,H_Bullpen_IP_HomeGames,H_Bullpen_IP_HomeGames.1,H_Bullpen_L_AllGames,H_Bullpen_L_AllGames.1,...,H_Bullpen_W_HomeGames,H_Bullpen_W_HomeGames.1,H_HitField_TeamBatting_2B_AllGames,H_HitField_TeamBatting_2B_AllGames.1,H_HitField_TeamBatting_2B_HomeGames,H_HitField_TeamBatting_2B_HomeGames.1,H_HitField_TeamBatting_2B_LeftyStarters,H_HitField_TeamBatting_2B_LeftyStarters.1,H_HitField_TeamBatting_2B_RightyStarters,H_HitField_TeamBatting_2B_RightyStarters.1
count,11708.0,11708.0,11708.0,11708.0,11708.0,11708.0,11708.0,11708.0,11708.0,11708.0,...,11708.0,11708.0,11708.0,11708.0,11708.0,11708.0,11708.0,11708.0,11708.0,11708.0
mean,206.42834,206.42834,106.004527,106.004527,226.271541,226.271541,117.864956,117.864956,10.902887,10.902887,...,6.334131,6.334131,37.384438,37.384438,18.626836,18.626836,2.84959,2.84959,19.65152,19.65152
std,133.248325,133.248325,70.647088,70.647088,142.955889,142.955889,75.599792,75.599792,7.597416,7.597416,...,4.738799,4.738799,68.783904,68.783904,34.887889,34.887889,11.503622,11.503622,44.25821,44.25821
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,90.75,90.75,45.0,45.0,101.3,101.3,52.0,52.0,5.0,5.0,...,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,194.0,194.0,98.0,98.0,214.8,214.8,111.0,111.0,10.0,10.0,...,6.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,310.0,310.0,159.0,159.0,340.7,340.7,178.7,178.7,16.0,16.0,...,10.0,10.0,50.25,50.25,23.0,23.0,0.0,0.0,0.0,0.0
max,659.0,659.0,395.0,395.0,625.4,625.4,349.7,349.7,39.0,39.0,...,23.0,23.0,325.0,325.0,177.0,177.0,109.0,109.0,237.0,237.0


## df out PCA

### Standardize Features

In [326]:
def RunPCA(X,n):
    """Takes an input data set X and returns n principal components
    """
    # Create a scaler object
    sc = StandardScaler()
    
    # Fit the scaler to the features and transform
    X_std = sc.fit_transform(X)

    # Create a pca object with the 2 components as a parameter
    pca = decomposition.PCA(n_components=n)

    # Fit the PCA and transform the data
    X_std_pca = pca.fit_transform(X_std)
    
    return X_std_pca

In [327]:
df_feat_num_train = df_feat_num.iloc[:nsplit]
df_targ_train = df_targ.iloc[:nsplit]
df_feat_num_df = df_feat_num.iloc[nsplit:]
df_targ_df = df_targ.iloc[nsplit:]

### Practice with PCA

transform df with fit on train

In [328]:
X1 = df_feat_num_train
X2 = df_feat_num_df

# Create a scaler object
sc = StandardScaler()

# Fit the scaler to the features and transform
X1_std = sc.fit_transform(X1)
X2_std = sc.fit(X1).transform(X2)

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)


### Tune n_components param

In [None]:
scores = dict()

for cnt in range(1,102,5):

    # Create a pca object with the 2 components as a parameter
    pca = decomposition.PCA(n_components=cnt)

    # Fit the PCA and transform the data
    X1_std_pca = pca.fit_transform(X1_std)
    X2_std_pca = pca.fit(X1_std).transform(X2_std)

    std_pca_train = X1_std_pca
    std_pca_df = X2_std_pca

    clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1, random_state=1)
    df_fit = clf.fit(std_pca_train, df_targ_train)
    df_pred = df_fit.predict(std_pca_df)
    score = roc_auc_score(df_targ_df, df_pred)
    
    # Append score to dict
    scores[cnt] = score
    
    # Print for OCD
    print(cnt,' ',score)

1   0.4881798845335764
6   0.49023587513225186
11   0.5088148400347059
16   0.5008815572225089
21   0.5032721588267381
26   0.5109920081419571


In [54]:
df_feat_num_desc = df_feat_num_train.describe().loc[['mean', 'std']]

In [55]:
df_feat_num_desc

Unnamed: 0.1,Unnamed: 0,H_Bullpen_BB_AllGames,H_Bullpen_BB_HomeGames,H_Bullpen_BSV_AllGames,H_Bullpen_BSV_HomeGames,H_Bullpen_ERA_AllGames,H_Bullpen_ERA_HomeGames,H_Bullpen_ER_AllGames,H_Bullpen_ER_HomeGames,H_Bullpen_HR_AllGames,...,tm_v_PHILADELPHIA,tm_v_PITTSBURGH,tm_v_SANDIEGO,tm_v_SANFRANCISCO,tm_v_SEATTLE,tm_v_STLOUIS,tm_v_TAMPABAY,tm_v_TEXAS,tm_v_TORONTO,tm_v_WASHINGTON
mean,20115710000000.0,65.045066,32.309246,6.18648,2.983683,3.775439,3.654254,67.433178,34.60878,16.561772,...,0.031857,0.033023,0.0338,0.032634,0.034577,0.033023,0.033411,0.03108,0.034965,0.034965
std,8656925000.0,46.171964,23.484619,4.564964,2.565982,1.268546,1.634877,49.550267,27.706424,12.417527,...,0.175653,0.17873,0.180748,0.177711,0.18274,0.17873,0.179742,0.173568,0.183727,0.183727


In [56]:
for x in df_feat_num_desc.columns:
    print(df_feat_num_desc[x])

mean    2.011571e+13
std     8.656925e+09
Name: Unnamed: 0, dtype: float64
mean    65.045066
std     46.171964
Name: H_Bullpen_BB_AllGames, dtype: float64
mean    32.309246
std     23.484619
Name: H_Bullpen_BB_HomeGames, dtype: float64
mean    6.186480
std     4.564964
Name: H_Bullpen_BSV_AllGames, dtype: float64
mean    2.983683
std     2.565982
Name: H_Bullpen_BSV_HomeGames, dtype: float64
mean    3.775439
std     1.268546
Name: H_Bullpen_ERA_AllGames, dtype: float64
mean    3.654254
std     1.634877
Name: H_Bullpen_ERA_HomeGames, dtype: float64
mean    67.433178
std     49.550267
Name: H_Bullpen_ER_AllGames, dtype: float64
mean    34.608780
std     27.706424
Name: H_Bullpen_ER_HomeGames, dtype: float64
mean    16.561772
std     12.417527
Name: H_Bullpen_HR_AllGames, dtype: float64
mean    8.475524
std     7.072441
Name: H_Bullpen_HR_HomeGames, dtype: float64
mean    150.372183
std     108.654996
Name: H_Bullpen_H_AllGames, dtype: float64
mean    76.515929
std     58.972913
Name: H_B

In [70]:
scores = dict()
for cnt in range(4):
    scores[cnt] = cnt*4

In [71]:
scores

{0: 0, 1: 4, 2: 8, 3: 12}