# Effect of Travel on Baseball Performance
***
**Team Members:** Brandon Zink, Cameron Connor, Abiel Fattore

In this project, we will look at the effect of travel in terms of distance in miles and number of time zones changed on the traveling teams performance.

In [58]:
import numpy as np 
import pandas as pd
import matplotlib.pylab as plt
%matplotlib inline
from math import sin, cos, sqrt, atan2, radians
import math
from scipy import stats

In [59]:
#Read in the data, store in appropriate data frames

dfStadiumsInfo = pd.read_csv('data/Stadium_Info.csv')

#Import and clean the game logs data
dfGameInfo = pd.read_csv('data/Game_Logs.csv', low_memory=False, header=None)
#Name all of the columns
dfGameInfo.columns = (['date','gameNumber','day','awayTeam','awayTeamLg','awayTeamGmNmbr','homeTeam','homeTeamLg','homeTeamGmNmbr','awayScore','homeScore','lengthInOuts','timeOfDay','completion','forfeit','protest','parkID','attendance','lengthOfGameInMinutes','awayLineScore','homeLineScore','awayAB','awayH','away2B','away3B','awayHR','awayRBI','awaySH','awaySF','awayHBP','awayBB','awayIBB','awaySO','awaySB','awayCS','awayGIDP','awayCatchInf','awayLOB','awayPitchersUsed','awayIndivER','awayER','awayWildPitch','awayBalk','awayPO','awayAssists','awayE','awayPassedBalls','awayDoubPlay','awayTripPlay','homeAB','homeH','home2B','home3B','homeHR','homeRBI','homeSH','homeSF','homeHBP','homeBB','homeIBB','homeSO','homeSB','homeCS','homeGIDP','homeCatchInf','homeLOB','homePitchersUsed','homeIndivER','homeER','homeWildPitch','homeBalk','homePO','homeAssists','homeE','homePassedBalls','homeDoubPlay','homeTripPlay','homePlateUmpID','homePlateUmpName','1BUmpID','1BUmpName','2BUmpID','2BUmpName','3BUmpID','3BUmpName','LFUmpID','LFUmpName','RFUmpID','RFUmpName','awayManagerID','awayManagerName','homeManagerID','homeManagerName','winPitcherID','winPitcherName','losePitcherID','losePitcherName','savePitcherID','savePitcherName','GWRBIHitterID','GWRBIHitterName','awaySPID','awaySPName','homeSPID','homeSPName','away1ID','away1Name','away1POS','away2ID','away2Name','away2POS','away3ID','away3Name','away3POS','away4ID','away4Name','away4POS','away5ID','away5Name','away5POS','away6ID','away6Name','away6POS','away7ID','away7Name','away7POS','away8ID','away8Name','away8POS','away9ID','away9Name','away9POS','home1ID','home1Name','home1POS','home2ID','home2Name','home2POS','home3ID','home3Name','home3POS','home4ID','home4Name','home4POS','home5ID','home5Name','home5POS','home6ID','home6Name','home6POS','home7ID','home7Name','home7POS','home8ID','home8Name','home8POS','home9ID','home9Name','home9POS','addInfo','infoAquisition'])
#Remove the last two digits of parkID since they are useless
dfGameInfo['parkID'] = dfGameInfo['parkID'].astype(str).str[:-2].astype(str)
#Get rid of games in Tokyo, Montreal, Puerto Rico, Disney World, Sydney, Fort Bragg
dfGameInfo = dfGameInfo[dfGameInfo['parkID'] != 'TOK']
dfGameInfo = dfGameInfo[dfGameInfo['parkID'] != 'MON']
dfGameInfo = dfGameInfo[dfGameInfo['parkID'] != 'SJU']
dfGameInfo = dfGameInfo[dfGameInfo['parkID'] != 'LBV']
dfGameInfo = dfGameInfo[dfGameInfo['parkID'] != 'SYD']
dfGameInfo = dfGameInfo[dfGameInfo['parkID'] != 'FTB']

dfGameInfo.head()

Unnamed: 0,date,gameNumber,day,awayTeam,awayTeamLg,awayTeamGmNmbr,homeTeam,homeTeamLg,homeTeamGmNmbr,awayScore,...,home7Name,home7POS,home8ID,home8Name,home8POS,home9ID,home9Name,home9POS,addInfo,infoAquisition
2,20000403,0,Mon,COL,NL,1,ATL,NL,1,0,...,Eddie Perez,2,weisw001,Walt Weiss,6,maddg002,Greg Maddux,1,,Y
3,20000403,0,Mon,MIL,NL,1,CIN,NL,1,3,...,Aaron Boone,5,tuckm001,Michael Tucker,7,harnp001,Pete Harnisch,1,,Y
4,20000403,0,Mon,SFN,NL,1,FLO,NL,1,4,...,Brant Brown,9,redmm001,Mike Redmond,2,ferna001,Alex Fernandez,1,,Y
6,20000403,0,Mon,SDN,NL,1,NYN,NL,3,1,...,Melvin Mora,8,ordor001,Rey Ordonez,6,leita001,Al Leiter,1,,Y
7,20000403,0,Mon,CHN,NL,3,SLN,NL,1,1,...,Eric Davis,9,mathm001,Mike Matheny,2,kiled001,Darryl Kile,1,,Y


In [60]:
#Team ID Lookup
#This is used across multiple functions to give each team a specific number that can be used in lookups (returns 0 through 29, else -1)

def team_ID_lookup(teamID):
    teamMatrix = ['ANA','ARI','ATL','BAL','BOS','CHN','CHA','CIN','CLE','COL','DET','FLO','HOU','KCA','LAN','MIL','MIN','WAS','NYN','NYA','OAK','PHI','PIT','SLN','SDN','SFN','SEA','TBA','TEX','TOR']
    for i in range (0,30):
        if(teamMatrix[i] == teamID):
            return i
    return -1

In [61]:
#Find Distance
#This function finds the distance between two sets of lat, long

def find_distance(latitude1, longitude1, latitude2, longitude2):
    # approximate radius of earth in mi
    R = 3959.0

    lat1 = radians(latitude1)
    lon1 = radians(longitude1)
    lat2 = radians(latitude2)
    lon2 = radians(longitude2)

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c
    return distance


In [62]:
#Get the time zone difference between two stadiums, returns a value between -3 and 3, where -3 means that the team traveled
#3 hours westward, and 3 means that the team traveled 3 hours eastward. Takes inputs as 3 letter code for teams. Returns
#-5 if either team is not found.

def find_time_zone_difference(awayTeam, homeTeam):
    #Convert the dataframe to an array to make it easier to loop through
    distance_array = dfStadiumsInfo.values
    
    homeTeamTZ = 1000
    awayTeamTZ = -1000
    
    #Find the time zones for the two teams
    for i in range(0,30):
        if(distance_array[i][1] == homeTeam):
            homeTeamTZ = distance_array[i][4]
        if(distance_array[i][1] == awayTeam):
            awayTeamTZ = distance_array[i][4]
    
    if(homeTeamTZ - awayTeamTZ > 3 or homeTeamTZ - awayTeamTZ < -3):
        return -5
    else:
        return homeTeamTZ - awayTeamTZ

In [63]:
#Gets the distance stored in the distance matrix, returns 0 if not yet updated

def get_distance_matrix(matrix, team1ID, team2ID):
    if(matrix[team1ID][team2ID] > 0):
        return matrix[team1ID][team2ID]
    elif (matrix[team2ID][team1ID] > 0):
        return matrix[team2ID][team1ID]
    else:
        return 0

In [64]:
#Creates/resets the distance matrix to all 0, returns the matrix

def start_distance_matrix():
    return np.zeros((30, 30), dtype=float)

#initialize the distance matrix
dist_matrix = start_distance_matrix()

In [65]:
#Updates a position in the matrix to the specified distance, returns the input matrix

def update_distance_matrix(matrix, team1ID, team2ID, distance):
    matrix[team1ID][team2ID] = distance
    matrix[team2ID][team1ID] = distance
    return matrix

In [66]:
#This gets the distance between two stadiums from the stored matrix, and if it's not there, it calculates it and stores it
#in there (dist_matrix)

def get_distance(awayTeam, homeTeam):
    global dist_matrix
    
    stadium_info_array = dfStadiumsInfo.values
    distance = 0
    #get the distance between the stadiums from the distance matrix if it has already been updated
    distance = get_distance_matrix(dist_matrix, team_ID_lookup(awayTeam), team_ID_lookup(homeTeam))
    
    #if it has not been updated yet, update it
    if(distance == 0):
        #loop through the stadium matrix to find the correct row, get the lat and long from that row, calculate the distance,
        #and then update the matrix to include that distance
        awayLat = awayLong = homeLat = homeLong = -1.0
        for i in range(0,30):
            
            if(stadium_info_array[i][1] == awayTeam):
                awayLat = stadium_info_array[i][2]
                awayLong = stadium_info_array[i][3]
                
            if(stadium_info_array[i][1] == homeTeam):
                homeLat = stadium_info_array[i][2]
                homeLong = stadium_info_array[i][3]
            
        distance = find_distance(awayLat, awayLong, homeLat, homeLong)
                
        dist_matrix = update_distance_matrix(dist_matrix, team_ID_lookup(awayTeam), team_ID_lookup(homeTeam), distance)
        
    return distance

In [67]:
def OBP_calc(H, BB, HBP, AB, SF):
    return ((H+BB+HBP)/(BB+HBP+AB+SF))

def SLG_calc(H, twoB, threeB, HR, AB):
    return (((H-(twoB+threeB+HR))+(twoB*2)+(threeB*3)+(HR*4))/AB)
    
def ISO_calc(SLG, BA):
    return SLG-BA
    
def FIP_calc(HR, HBP, BB, K, IP):
    return ((((HR*13)+(3*(HBP+BB))-(2*K))/IP)+3.2)

In [68]:
#This just checks the distance matrix to make sure it is updating correctly

def check_distance_matrix():
    for i in range(0,30):
        print(dist_matrix[i])

In [89]:
#This gets us the data that we need for the analysis
dfGameData = dfGameInfo[['date','awayTeam','homeTeam','awayScore','homeScore','lengthInOuts','attendance','awayAB','awayH',
                                  'away2B','away3B','awayHR','awayRBI','awaySH','awaySF','awayHBP','awayBB','awaySO',
                                 'awayER','homeAB','homeH','home2B','home3B','homeHR','homeRBI','homeSH','homeSF','homeHBP',
                                  'homeBB','homeSO','homeER']].copy()


#Here we add the columns that we need including distance, time zones, OBP, SLG, etc.
dfGameData['distance'] = -1
dfGameData['timeZoneChange'] = -1
dfGameData['away Batting R/9'] = -1.0
dfGameData['away Batting H/9'] = -1.0
dfGameData['away Batting BB/9'] = -1.0
dfGameData['away Batting K/9'] = -1.0
dfGameData['awayBA'] = -1.0
dfGameData['awayOBP'] = -1.0
dfGameData['awaySLG'] = -1.0
dfGameData['awayISO'] = -1.0
dfGameData['away Pitching H/9'] = -1.0
dfGameData['away Pitching BB/9'] = -1.0
dfGameData['away Pitching K/9'] = -1.0
dfGameData['awayERA'] = -1.0
dfGameData['awayFIP'] = -1.0


#Loop through an calculate the values 
#WARNING: Not pretty
for i, row in dfGameInfo.iterrows():
    
    #The number of innings the away team hit and pitched
    away_innings_hit = math.ceil(row['lengthInOuts']/6.0)
    away_innings_pitch = math.floor(row['lengthInOuts']/6.0)
    
    #distance
    distance_val = get_distance(str(row['awayTeam']), str(row['homeTeam']))
    dfGameData.set_value(i, 'distance', distance_val)
    
    #time zone change
    t_z = find_time_zone_difference(str(row['awayTeam']), str(row['homeTeam']))
    dfGameData.set_value(i, 'timeZoneChange', t_z)
    
    #away batting R/9
    bat_run_per_nine = row['awayScore']*(9.0/away_innings_hit)
    dfGameData.set_value(i, 'away Batting R/9', bat_run_per_nine)
    
    #away batting H/9
    bat_hit_per_nine = row['awayH']*(9.0/away_innings_hit)
    dfGameData.set_value(i, 'away Batting H/9', bat_hit_per_nine)
    
    #away batting BB/9
    bat_walk_per_nine = row['awayBB']*(9.0/away_innings_hit)
    dfGameData.set_value(i, 'away Batting BB/9', bat_walk_per_nine)
    
    #away batting K/9
    bat_k_per_nine = row['awaySO']*(9.0/away_innings_hit)
    dfGameData.set_value(i, 'away Batting K/9', bat_k_per_nine)
    
    #away BA
    BA = row['awayH']/row['awayAB']
    dfGameData.set_value(i, 'awayBA', BA)
    
    #away OBP
    OBP = OBP_calc(row['awayH'], row['awayBB'], row['awayHBP'], row['awayAB'], row['awaySF'])
    dfGameData.set_value(i, 'awayOBP', OBP)
    
    #away SLG
    SLG = SLG_calc(row['awayH'], row['away2B'], row['away3B'], row['awayHR'], row['awayAB'])
    dfGameData.set_value(i, 'awaySLG', SLG)
    
    #away ISO
    ISO = ISO_calc(SLG_calc(row['awayH'], row['away2B'], row['away3B'], row['awayHR'], row['awayAB']), row['awayH']/row['awayAB'])
    dfGameData.set_value(i, 'awayISO', ISO)
    
    #away pitching H/9
    pit_hit_per_nine = row['homeH']*(9.0/away_innings_pitch)
    dfGameData.set_value(i, 'away Pitching H/9', pit_hit_per_nine)
    
    #away pitching BB/9
    pit_walk_per_nine = row['homeBB']*(9.0/away_innings_pitch)
    dfGameData.set_value(i, 'away Pitching BB/9', pit_walk_per_nine)
    
    #away pitching K/9
    pit_k_per_nine = row['homeSO']*(9.0/away_innings_pitch)
    dfGameData.set_value(i, 'away Pitching K/9', pit_k_per_nine)
    
    #away ERA
    ERA = row['homeER']*(9.0/away_innings_pitch)
    dfGameData.set_value(i, 'awayERA', ERA)
    
    #away FIP
    FIP = FIP_calc(row['homeHR'], row['homeHBP'], row['homeBB'], row['homeSO'], away_innings_pitch)
    dfGameData.set_value(i, 'awayFIP', FIP)

In [70]:
#Create a dataframe for each distance section and time zone change
dfGameData = dfGameData.dropna()
dfGameData0_500 = dfGameData.loc[(dfGameData['distance'] < 500) & (dfGameData['distance'] >= 1)]
dfGameData500_1000 = dfGameData.loc[(dfGameData['distance'] < 1000) & (dfGameData['distance'] >= 500)]
dfGameData1000_2000 = dfGameData.loc[(dfGameData['distance'] < 2000) & (dfGameData['distance'] >= 1000)]
dfGameData2000 = dfGameData.loc[(dfGameData['distance'] < 10000) & (dfGameData['distance'] >= 2000)]
dfGameData3hourswest = dfGameData.loc[dfGameData['timeZoneChange'] == -3]
dfGameData2hourswest = dfGameData.loc[dfGameData['timeZoneChange'] == -2]
dfGameData1hourswest = dfGameData.loc[dfGameData['timeZoneChange'] == -1]
dfGameData0hours = dfGameData.loc[dfGameData['timeZoneChange'] == 0]
dfGameData1hourseast = dfGameData.loc[dfGameData['timeZoneChange'] == 1]
dfGameData2hourseast = dfGameData.loc[dfGameData['timeZoneChange'] == 2]
dfGameData3hourseast = dfGameData.loc[dfGameData['timeZoneChange'] == 3]

In [71]:
dfGameData3hourseast.mean()

date                 NaN
awayTeam             NaN
homeTeam             NaN
awayScore            NaN
homeScore            NaN
lengthInOuts         NaN
attendance           NaN
awayAB               NaN
awayH                NaN
away2B               NaN
away3B               NaN
awayHR               NaN
awayRBI              NaN
awaySH               NaN
awaySF               NaN
awayHBP              NaN
awayBB               NaN
awaySO               NaN
awayER               NaN
homeAB               NaN
homeH                NaN
home2B               NaN
home3B               NaN
homeHR               NaN
homeRBI              NaN
homeSH               NaN
homeSF               NaN
homeHBP              NaN
homeBB               NaN
homeSO               NaN
homeER               NaN
distance             NaN
timeZoneChange       NaN
away Batting R/9     NaN
away Batting H/9     NaN
away Batting BB/9    NaN
away Batting K/9     NaN
awayBA               NaN
awayOBP              NaN
awaySLG              NaN


In [72]:
print ("p values at < 500 miles traveled")
print ("--------------------------------")
print ("< 500 miles distance count: \t\t\t\t",len(dfGameData0_500.index))
print ("< 500 miles distance away bat. R/9 p value: \t\t",(stats.ttest_ind(dfGameData0_500['away Batting R/9'], dfGameData['away Batting R/9'])[1]))
print ("< 500 miles distance away bat. H/9 p value: \t\t",(stats.ttest_ind(dfGameData0_500['away Batting H/9'], dfGameData['away Batting H/9'])[1]))
print ("< 500 miles distance away bat. BB/9 p value: \t\t",(stats.ttest_ind(dfGameData0_500['away Batting BB/9'], dfGameData['away Batting BB/9'])[1]))
print ("< 500 miles distance away bat.  K/9 p value: \t\t",(stats.ttest_ind(dfGameData0_500['away Batting K/9'], dfGameData['away Batting K/9'])[1]))
print ("< 500 miles distance away bat. BA p value: \t\t",(stats.ttest_ind(dfGameData0_500['awayBA'], dfGameData['awayBA'])[1]))
print ("< 500 miles distance away bat. OBP p value: \t\t",(stats.ttest_ind(dfGameData0_500['awayOBP'], dfGameData['awayOBP'])[1]))
print ("< 500 miles distance away bat. SLG p value: \t\t",(stats.ttest_ind(dfGameData0_500['awaySLG'], dfGameData['awaySLG'])[1]))
print ("< 500 miles distance away bat. ISO p value: \t\t",(stats.ttest_ind(dfGameData0_500['awayISO'], dfGameData['awayISO'])[1]))
print ("< 500 miles distance away pit. H/9 p value: \t\t",(stats.ttest_ind(dfGameData0_500['away Pitching H/9'], dfGameData['away Pitching H/9'])[1]))
print ("< 500 miles distance away pit. BB/9 p value: \t\t",(stats.ttest_ind(dfGameData0_500['away Pitching BB/9'], dfGameData['away Pitching BB/9'])[1]))
print ("< 500 miles distance away pit. K/9 p value: \t\t",(stats.ttest_ind(dfGameData0_500['away Pitching K/9'], dfGameData['away Pitching K/9'])[1]))
print ("< 500 miles distance away pit. ERA p value: \t\t",(stats.ttest_ind(dfGameData0_500['awayERA'], dfGameData['awayERA'])[1]))
print ("< 500 miles distance away pit. FIP p value: \t\t",(stats.ttest_ind(dfGameData0_500['awayFIP'], dfGameData['awayFIP'])[1]))

p values at < 500 miles traveled
--------------------------------
< 500 miles distance count: 				 0
< 500 miles distance away bat. R/9 p value: 		 nan
< 500 miles distance away bat. H/9 p value: 		 nan
< 500 miles distance away bat. BB/9 p value: 		 nan
< 500 miles distance away bat.  K/9 p value: 		 nan
< 500 miles distance away bat. BA p value: 		 nan
< 500 miles distance away bat. OBP p value: 		 nan
< 500 miles distance away bat. SLG p value: 		 nan
< 500 miles distance away bat. ISO p value: 		 nan
< 500 miles distance away pit. H/9 p value: 		 nan
< 500 miles distance away pit. BB/9 p value: 		 nan
< 500 miles distance away pit. K/9 p value: 		 nan
< 500 miles distance away pit. ERA p value: 		 nan
< 500 miles distance away pit. FIP p value: 		 nan


In [73]:
print ("p values at 500 - 1000 miles traveled")
print ("-------------------------------------")
print ("500 - 1000 miles distance count: \t\t\t\t",len(dfGameData500_1000.index))
print ("500 - 1000 miles distance away bat. R/9 p value: \t\t",(stats.ttest_ind(dfGameData500_1000['away Batting R/9'], dfGameData['away Batting R/9'])[1]))
print ("500 - 1000 miles distance away bat. H/9 p value: \t\t",(stats.ttest_ind(dfGameData500_1000['away Batting H/9'], dfGameData['away Batting H/9'])[1]))
print ("500 - 1000 miles distance away bat. BB/9 p value: \t\t",(stats.ttest_ind(dfGameData500_1000['away Batting BB/9'], dfGameData['away Batting BB/9'])[1]))
print ("500 - 1000 miles distance away bat.  K/9 p value: \t\t",(stats.ttest_ind(dfGameData500_1000['away Batting K/9'], dfGameData['away Batting K/9'])[1]))
print ("500 - 1000 miles distance away bat. BA p value: \t\t",(stats.ttest_ind(dfGameData500_1000['awayBA'], dfGameData['awayBA'])[1]))
print ("500 - 1000 miles distance away bat. OBP p value: \t\t",(stats.ttest_ind(dfGameData500_1000['awayOBP'], dfGameData['awayOBP'])[1]))
print ("500 - 1000 miles distance away bat. SLG p value: \t\t",(stats.ttest_ind(dfGameData500_1000['awaySLG'], dfGameData['awaySLG'])[1]))
print ("500 - 1000 miles distance away bat. ISO p value: \t\t",(stats.ttest_ind(dfGameData500_1000['awayISO'], dfGameData['awayISO'])[1]))
print ("500 - 1000 miles distance away pit. H/9 p value: \t\t",(stats.ttest_ind(dfGameData500_1000['away Pitching H/9'], dfGameData['away Pitching H/9'])[1]))
print ("500 - 1000 miles distance away pit. BB/9 p value: \t\t",(stats.ttest_ind(dfGameData500_1000['away Pitching BB/9'], dfGameData['away Pitching BB/9'])[1]))
print ("500 - 1000 miles distance away pit. K/9 p value: \t\t",(stats.ttest_ind(dfGameData500_1000['away Pitching K/9'], dfGameData['away Pitching K/9'])[1]))
print ("500 - 1000 miles distance away pit. ERA p value: \t\t",(stats.ttest_ind(dfGameData500_1000['awayERA'], dfGameData['awayERA'])[1]))
print ("500 - 1000 miles distance away pit. FIP p value: \t\t",(stats.ttest_ind(dfGameData500_1000['awayFIP'], dfGameData['awayFIP'])[1]))

p values at 500 - 1000 miles traveled
-------------------------------------
500 - 1000 miles distance count: 				 0
500 - 1000 miles distance away bat. R/9 p value: 		 nan
500 - 1000 miles distance away bat. H/9 p value: 		 nan
500 - 1000 miles distance away bat. BB/9 p value: 		 nan
500 - 1000 miles distance away bat.  K/9 p value: 		 nan
500 - 1000 miles distance away bat. BA p value: 		 nan
500 - 1000 miles distance away bat. OBP p value: 		 nan
500 - 1000 miles distance away bat. SLG p value: 		 nan
500 - 1000 miles distance away bat. ISO p value: 		 nan
500 - 1000 miles distance away pit. H/9 p value: 		 nan
500 - 1000 miles distance away pit. BB/9 p value: 		 nan
500 - 1000 miles distance away pit. K/9 p value: 		 nan
500 - 1000 miles distance away pit. ERA p value: 		 nan
500 - 1000 miles distance away pit. FIP p value: 		 nan


In [74]:
print ("p values at 1000 - 2000 miles traveled")
print ("--------------------------------------")
print ("1000 - 2000 miles distance count: \t\t\t\t",len(dfGameData1000_2000.index))
print ("1000 - 2000 miles distance away bat. R/9 p value: \t\t",(stats.ttest_ind(dfGameData1000_2000['away Batting R/9'], dfGameData['away Batting R/9'])[1]))
print ("1000 - 2000 miles distance away bat. H/9 p value: \t\t",(stats.ttest_ind(dfGameData1000_2000['away Batting H/9'], dfGameData['away Batting H/9'])[1]))
print ("1000 - 2000 miles distance away bat. BB/9 p value: \t\t",(stats.ttest_ind(dfGameData1000_2000['away Batting BB/9'], dfGameData['away Batting BB/9'])[1]))
print ("1000 - 2000 miles distance away bat.  K/9 p value: \t\t",(stats.ttest_ind(dfGameData1000_2000['away Batting K/9'], dfGameData['away Batting K/9'])[1]))
print ("1000 - 2000 miles distance away bat. BA p value: \t\t",(stats.ttest_ind(dfGameData1000_2000['awayBA'], dfGameData['awayBA'])[1]))
print ("1000 - 2000 miles distance away bat. OBP p value: \t\t",(stats.ttest_ind(dfGameData1000_2000['awayOBP'], dfGameData['awayOBP'])[1]))
print ("1000 - 2000 miles distance away bat. SLG p value: \t\t",(stats.ttest_ind(dfGameData1000_2000['awaySLG'], dfGameData['awaySLG'])[1]))
print ("1000 - 2000 miles distance away bat. ISO p value: \t\t",(stats.ttest_ind(dfGameData1000_2000['awayISO'], dfGameData['awayISO'])[1]))
print ("1000 - 2000 miles distance away pit. H/9 p value: \t\t",(stats.ttest_ind(dfGameData1000_2000['away Pitching H/9'], dfGameData['away Pitching H/9'])[1]))
print ("1000 - 2000 miles distance away pit. BB/9 p value: \t\t",(stats.ttest_ind(dfGameData1000_2000['away Pitching BB/9'], dfGameData['away Pitching BB/9'])[1]))
print ("1000 - 2000 miles distance away pit. K/9 p value: \t\t",(stats.ttest_ind(dfGameData1000_2000['away Pitching K/9'], dfGameData['away Pitching K/9'])[1]))
print ("1000 - 2000 miles distance away pit. ERA p value: \t\t",(stats.ttest_ind(dfGameData1000_2000['awayERA'], dfGameData['awayERA'])[1]))
print ("1000 - 2000 miles distance away pit. FIP p value: \t\t",(stats.ttest_ind(dfGameData1000_2000['awayFIP'], dfGameData['awayFIP'])[1]))

p values at 1000 - 2000 miles traveled
--------------------------------------
1000 - 2000 miles distance count: 				 0
1000 - 2000 miles distance away bat. R/9 p value: 		 nan
1000 - 2000 miles distance away bat. H/9 p value: 		 nan
1000 - 2000 miles distance away bat. BB/9 p value: 		 nan
1000 - 2000 miles distance away bat.  K/9 p value: 		 nan
1000 - 2000 miles distance away bat. BA p value: 		 nan
1000 - 2000 miles distance away bat. OBP p value: 		 nan
1000 - 2000 miles distance away bat. SLG p value: 		 nan
1000 - 2000 miles distance away bat. ISO p value: 		 nan
1000 - 2000 miles distance away pit. H/9 p value: 		 nan
1000 - 2000 miles distance away pit. BB/9 p value: 		 nan
1000 - 2000 miles distance away pit. K/9 p value: 		 nan
1000 - 2000 miles distance away pit. ERA p value: 		 nan
1000 - 2000 miles distance away pit. FIP p value: 		 nan


In [75]:
print ("p values at > 2000 miles traveled")
print ("---------------------------------")
print ("> 2000 miles distance count: \t\t\t\t",len(dfGameData2000.index))
print ("> 2000 miles distance away bat. R/9 p value: \t\t",(stats.ttest_ind(dfGameData2000['away Batting R/9'], dfGameData['away Batting R/9'])[1]))
print ("> 2000 miles distance away bat. H/9 p value: \t\t",(stats.ttest_ind(dfGameData2000['away Batting H/9'], dfGameData['away Batting H/9'])[1]))
print ("> 2000 miles distance away bat. BB/9 p value: \t\t",(stats.ttest_ind(dfGameData2000['away Batting BB/9'], dfGameData['away Batting BB/9'])[1]))
print ("> 2000 miles distance away bat.  K/9 p value: \t\t",(stats.ttest_ind(dfGameData2000['away Batting K/9'], dfGameData['away Batting K/9'])[1]))
print ("> 2000 miles distance away bat. BA p value: \t\t",(stats.ttest_ind(dfGameData2000['awayBA'], dfGameData['awayBA'])[1]))
print ("> 2000 miles distance away bat. OBP p value: \t\t",(stats.ttest_ind(dfGameData2000['awayOBP'], dfGameData['awayOBP'])[1]))
print ("> 2000 miles distance away bat. SLG p value: \t\t",(stats.ttest_ind(dfGameData2000['awaySLG'], dfGameData['awaySLG'])[1]))
print ("> 2000 miles distance away bat. ISO p value: \t\t",(stats.ttest_ind(dfGameData2000['awayISO'], dfGameData['awayISO'])[1]))
print ("> 2000 miles distance away pit. H/9 p value: \t\t",(stats.ttest_ind(dfGameData2000['away Pitching H/9'], dfGameData['away Pitching H/9'])[1]))
print ("> 2000 miles distance away pit. BB/9 p value: \t\t",(stats.ttest_ind(dfGameData2000['away Pitching BB/9'], dfGameData['away Pitching BB/9'])[1]))
print ("> 2000 miles distance away pit. K/9 p value: \t\t",(stats.ttest_ind(dfGameData2000['away Pitching K/9'], dfGameData['away Pitching K/9'])[1]))
print ("> 2000 miles distance away pit. ERA p value: \t\t",(stats.ttest_ind(dfGameData2000['awayERA'], dfGameData['awayERA'])[1]))
print ("> 2000 miles distance away pit. FIP p value: \t\t",(stats.ttest_ind(dfGameData2000['awayFIP'], dfGameData['awayFIP'])[1]))

p values at > 2000 miles traveled
---------------------------------
> 2000 miles distance count: 				 0
> 2000 miles distance away bat. R/9 p value: 		 nan
> 2000 miles distance away bat. H/9 p value: 		 nan
> 2000 miles distance away bat. BB/9 p value: 		 nan
> 2000 miles distance away bat.  K/9 p value: 		 nan
> 2000 miles distance away bat. BA p value: 		 nan
> 2000 miles distance away bat. OBP p value: 		 nan
> 2000 miles distance away bat. SLG p value: 		 nan
> 2000 miles distance away bat. ISO p value: 		 nan
> 2000 miles distance away pit. H/9 p value: 		 nan
> 2000 miles distance away pit. BB/9 p value: 		 nan
> 2000 miles distance away pit. K/9 p value: 		 nan
> 2000 miles distance away pit. ERA p value: 		 nan
> 2000 miles distance away pit. FIP p value: 		 nan


In [76]:
print ("p values at 3 time zones traveled west")
print ("--------------------------------------")
print ("3 hours west traveled count: \t\t\t\t",len(dfGameData3hourswest.index))
print ("3 hours west traveled away bat. R/9 p value: \t\t",(stats.ttest_ind(dfGameData3hourswest['away Batting R/9'], dfGameData['away Batting R/9'])[1]))
print ("3 hours west traveled away bat. H/9 p value: \t\t",(stats.ttest_ind(dfGameData3hourswest['away Batting H/9'], dfGameData['away Batting H/9'])[1]))
print ("3 hours west traveled away bat. BB/9 p value: \t\t",(stats.ttest_ind(dfGameData3hourswest['away Batting BB/9'], dfGameData['away Batting BB/9'])[1]))
print ("3 hours west traveled away bat.  K/9 p value: \t\t",(stats.ttest_ind(dfGameData3hourswest['away Batting K/9'], dfGameData['away Batting K/9'])[1]))
print ("3 hours west traveled away bat. BA p value: \t\t",(stats.ttest_ind(dfGameData3hourswest['awayBA'], dfGameData['awayBA'])[1]))
print ("3 hours west traveled away bat. OBP p value: \t\t",(stats.ttest_ind(dfGameData3hourswest['awayOBP'], dfGameData['awayOBP'])[1]))
print ("3 hours west traveled away bat. SLG p value: \t\t",(stats.ttest_ind(dfGameData3hourswest['awaySLG'], dfGameData['awaySLG'])[1]))
print ("3 hours west traveled away bat. ISO p value: \t\t",(stats.ttest_ind(dfGameData3hourswest['awayISO'], dfGameData['awayISO'])[1]))
print ("3 hours west traveled away pit. H/9 p value: \t\t",(stats.ttest_ind(dfGameData3hourswest['away Pitching H/9'], dfGameData['away Pitching H/9'])[1]))
print ("3 hours west traveled away pit. BB/9 p value: \t\t",(stats.ttest_ind(dfGameData3hourswest['away Pitching BB/9'], dfGameData['away Pitching BB/9'])[1]))
print ("3 hours west traveled away pit. K/9 p value: \t\t",(stats.ttest_ind(dfGameData3hourswest['away Pitching K/9'], dfGameData['away Pitching K/9'])[1]))
print ("3 hours west traveled away pit. ERA p value: \t\t",(stats.ttest_ind(dfGameData3hourswest['awayERA'], dfGameData['awayERA'])[1]))
print ("3 hours west traveled away pit. FIP p value: \t\t",(stats.ttest_ind(dfGameData3hourswest['awayFIP'], dfGameData['awayFIP'])[1]))

p values at 3 time zones traveled west
--------------------------------------
3 hours west traveled count: 				 0
3 hours west traveled away bat. R/9 p value: 		 nan
3 hours west traveled away bat. H/9 p value: 		 nan
3 hours west traveled away bat. BB/9 p value: 		 nan
3 hours west traveled away bat.  K/9 p value: 		 nan
3 hours west traveled away bat. BA p value: 		 nan
3 hours west traveled away bat. OBP p value: 		 nan
3 hours west traveled away bat. SLG p value: 		 nan
3 hours west traveled away bat. ISO p value: 		 nan
3 hours west traveled away pit. H/9 p value: 		 nan
3 hours west traveled away pit. BB/9 p value: 		 nan
3 hours west traveled away pit. K/9 p value: 		 nan
3 hours west traveled away pit. ERA p value: 		 nan
3 hours west traveled away pit. FIP p value: 		 nan


In [77]:
print ("p values at 2 time zones traveled west")
print ("--------------------------------------")
print ("2 hours west traveled count: \t\t\t\t",len(dfGameData2hourswest.index))
print ("2 hours west traveled away bat. R/9 p value: \t\t",(stats.ttest_ind(dfGameData2hourswest['away Batting R/9'], dfGameData['away Batting R/9'])[1]))
print ("2 hours west traveled away bat. H/9 p value: \t\t",(stats.ttest_ind(dfGameData2hourswest['away Batting H/9'], dfGameData['away Batting H/9'])[1]))
print ("2 hours west traveled away bat. BB/9 p value: \t\t",(stats.ttest_ind(dfGameData2hourswest['away Batting BB/9'], dfGameData['away Batting BB/9'])[1]))
print ("2 hours west traveled away bat.  K/9 p value: \t\t",(stats.ttest_ind(dfGameData2hourswest['away Batting K/9'], dfGameData['away Batting K/9'])[1]))
print ("2 hours west traveled away bat. BA p value: \t\t",(stats.ttest_ind(dfGameData2hourswest['awayBA'], dfGameData['awayBA'])[1]))
print ("2 hours west traveled away bat. OBP p value: \t\t",(stats.ttest_ind(dfGameData2hourswest['awayOBP'], dfGameData['awayOBP'])[1]))
print ("2 hours west traveled away bat. SLG p value: \t\t",(stats.ttest_ind(dfGameData2hourswest['awaySLG'], dfGameData['awaySLG'])[1]))
print ("2 hours west traveled away bat. ISO p value: \t\t",(stats.ttest_ind(dfGameData2hourswest['awayISO'], dfGameData['awayISO'])[1]))
print ("2 hours west traveled away pit. H/9 p value: \t\t",(stats.ttest_ind(dfGameData2hourswest['away Pitching H/9'], dfGameData['away Pitching H/9'])[1]))
print ("2 hours west traveled away pit. BB/9 p value: \t\t",(stats.ttest_ind(dfGameData2hourswest['away Pitching BB/9'], dfGameData['away Pitching BB/9'])[1]))
print ("2 hours west traveled away pit. K/9 p value: \t\t",(stats.ttest_ind(dfGameData2hourswest['away Pitching K/9'], dfGameData['away Pitching K/9'])[1]))
print ("2 hours west traveled away pit. ERA p value: \t\t",(stats.ttest_ind(dfGameData2hourswest['awayERA'], dfGameData['awayERA'])[1]))
print ("2 hours west traveled away pit. FIP p value: \t\t",(stats.ttest_ind(dfGameData2hourswest['awayFIP'], dfGameData['awayFIP'])[1]))

p values at 2 time zones traveled west
--------------------------------------
2 hours west traveled count: 				 0
2 hours west traveled away bat. R/9 p value: 		 nan
2 hours west traveled away bat. H/9 p value: 		 nan
2 hours west traveled away bat. BB/9 p value: 		 nan
2 hours west traveled away bat.  K/9 p value: 		 nan
2 hours west traveled away bat. BA p value: 		 nan
2 hours west traveled away bat. OBP p value: 		 nan
2 hours west traveled away bat. SLG p value: 		 nan
2 hours west traveled away bat. ISO p value: 		 nan
2 hours west traveled away pit. H/9 p value: 		 nan
2 hours west traveled away pit. BB/9 p value: 		 nan
2 hours west traveled away pit. K/9 p value: 		 nan
2 hours west traveled away pit. ERA p value: 		 nan
2 hours west traveled away pit. FIP p value: 		 nan


In [78]:
print ("p values at 1 time zone traveled west")
print ("-------------------------------------")
print ("1 hour west traveled count: \t\t\t\t",len(dfGameData1hourswest.index))
print ("1 hour west traveled away bat. R/9 p value: \t\t",(stats.ttest_ind(dfGameData1hourswest['away Batting R/9'], dfGameData['away Batting R/9'])[1]))
print ("1 hour west traveled away bat. H/9 p value: \t\t",(stats.ttest_ind(dfGameData1hourswest['away Batting H/9'], dfGameData['away Batting H/9'])[1]))
print ("1 hour west traveled away bat. BB/9 p value: \t\t",(stats.ttest_ind(dfGameData1hourswest['away Batting BB/9'], dfGameData['away Batting BB/9'])[1]))
print ("1 hour west traveled away bat.  K/9 p value: \t\t",(stats.ttest_ind(dfGameData1hourswest['away Batting K/9'], dfGameData['away Batting K/9'])[1]))
print ("1 hour west traveled away bat. BA p value: \t\t",(stats.ttest_ind(dfGameData1hourswest['awayBA'], dfGameData['awayBA'])[1]))
print ("1 hour west traveled away bat. OBP p value: \t\t",(stats.ttest_ind(dfGameData1hourswest['awayOBP'], dfGameData['awayOBP'])[1]))
print ("1 hour west traveled away bat. SLG p value: \t\t",(stats.ttest_ind(dfGameData1hourswest['awaySLG'], dfGameData['awaySLG'])[1]))
print ("1 hour west traveled away bat. ISO p value: \t\t",(stats.ttest_ind(dfGameData1hourswest['awayISO'], dfGameData['awayISO'])[1]))
print ("1 hour west traveled away pit. H/9 p value: \t\t",(stats.ttest_ind(dfGameData1hourswest['away Pitching H/9'], dfGameData['away Pitching H/9'])[1]))
print ("1 hour west traveled away pit. BB/9 p value: \t\t",(stats.ttest_ind(dfGameData1hourswest['away Pitching BB/9'], dfGameData['away Pitching BB/9'])[1]))
print ("1 hour west traveled away pit. K/9 p value: \t\t",(stats.ttest_ind(dfGameData1hourswest['away Pitching K/9'], dfGameData['away Pitching K/9'])[1]))
print ("1 hour west traveled away pit. ERA p value: \t\t",(stats.ttest_ind(dfGameData1hourswest['awayERA'], dfGameData['awayERA'])[1]))
print ("1 hour west traveled away pit. FIP p value: \t\t",(stats.ttest_ind(dfGameData1hourswest['awayFIP'], dfGameData['awayFIP'])[1]))

p values at 1 time zone traveled west
-------------------------------------
1 hour west traveled count: 				 0
1 hour west traveled away bat. R/9 p value: 		 nan
1 hour west traveled away bat. H/9 p value: 		 nan
1 hour west traveled away bat. BB/9 p value: 		 nan
1 hour west traveled away bat.  K/9 p value: 		 nan
1 hour west traveled away bat. BA p value: 		 nan
1 hour west traveled away bat. OBP p value: 		 nan
1 hour west traveled away bat. SLG p value: 		 nan
1 hour west traveled away bat. ISO p value: 		 nan
1 hour west traveled away pit. H/9 p value: 		 nan
1 hour west traveled away pit. BB/9 p value: 		 nan
1 hour west traveled away pit. K/9 p value: 		 nan
1 hour west traveled away pit. ERA p value: 		 nan
1 hour west traveled away pit. FIP p value: 		 nan


In [79]:
print ("p values at 0 time zone traveled")
print ("--------------------------------")
print ("0 hours traveled count: \t\t\t\t",len(dfGameData0hours.index))
print ("0 hours traveled away bat. R/9 p value: \t\t",(stats.ttest_ind(dfGameData0hours['away Batting R/9'], dfGameData['away Batting R/9'])[1]))
print ("0 hours traveled away bat. H/9 p value: \t\t",(stats.ttest_ind(dfGameData0hours['away Batting H/9'], dfGameData['away Batting H/9'])[1]))
print ("0 hours traveled away bat. BB/9 p value: \t\t",(stats.ttest_ind(dfGameData0hours['away Batting BB/9'], dfGameData['away Batting BB/9'])[1]))
print ("0 hours traveled away bat.  K/9 p value: \t\t",(stats.ttest_ind(dfGameData0hours['away Batting K/9'], dfGameData['away Batting K/9'])[1]))
print ("0 hours traveled away bat. BA p value: \t\t\t",(stats.ttest_ind(dfGameData0hours['awayBA'], dfGameData['awayBA'])[1]))
print ("0 hours traveled away bat. OBP p value: \t\t",(stats.ttest_ind(dfGameData0hours['awayOBP'], dfGameData['awayOBP'])[1]))
print ("0 hours traveled away bat. SLG p value: \t\t",(stats.ttest_ind(dfGameData0hours['awaySLG'], dfGameData['awaySLG'])[1]))
print ("0 hours traveled away bat. ISO p value: \t\t",(stats.ttest_ind(dfGameData0hours['awayISO'], dfGameData['awayISO'])[1]))
print ("0 hours traveled away pit. H/9 p value: \t\t",(stats.ttest_ind(dfGameData0hours['away Pitching H/9'], dfGameData['away Pitching H/9'])[1]))
print ("0 hours traveled away pit. BB/9 p value: \t\t",(stats.ttest_ind(dfGameData0hours['away Pitching BB/9'], dfGameData['away Pitching BB/9'])[1]))
print ("0 hours traveled away pit. K/9 p value: \t\t",(stats.ttest_ind(dfGameData0hours['away Pitching K/9'], dfGameData['away Pitching K/9'])[1]))
print ("0 hours traveled away pit. ERA p value: \t\t",(stats.ttest_ind(dfGameData0hours['awayERA'], dfGameData['awayERA'])[1]))
print ("0 hours traveled away pit. FIP p value: \t\t",(stats.ttest_ind(dfGameData0hours['awayFIP'], dfGameData['awayFIP'])[1]))

p values at 0 time zone traveled
--------------------------------
0 hours traveled count: 				 0
0 hours traveled away bat. R/9 p value: 		 nan
0 hours traveled away bat. H/9 p value: 		 nan
0 hours traveled away bat. BB/9 p value: 		 nan
0 hours traveled away bat.  K/9 p value: 		 nan
0 hours traveled away bat. BA p value: 			 nan
0 hours traveled away bat. OBP p value: 		 nan
0 hours traveled away bat. SLG p value: 		 nan
0 hours traveled away bat. ISO p value: 		 nan
0 hours traveled away pit. H/9 p value: 		 nan
0 hours traveled away pit. BB/9 p value: 		 nan
0 hours traveled away pit. K/9 p value: 		 nan
0 hours traveled away pit. ERA p value: 		 nan
0 hours traveled away pit. FIP p value: 		 nan


In [80]:
print ("p values at 1 time zone traveled east")
print ("-------------------------------------")
print ("1 hour east traveled count: \t\t\t\t",len(dfGameData1hourseast.index))
print ("1 hour east traveled away bat. R/9 p value: \t\t",(stats.ttest_ind(dfGameData1hourseast['away Batting R/9'], dfGameData['away Batting R/9'])[1]))
print ("1 hour east traveled away bat. H/9 p value: \t\t",(stats.ttest_ind(dfGameData1hourseast['away Batting H/9'], dfGameData['away Batting H/9'])[1]))
print ("1 hour east traveled away bat. BB/9 p value: \t\t",(stats.ttest_ind(dfGameData1hourseast['away Batting BB/9'], dfGameData['away Batting BB/9'])[1]))
print ("1 hour east traveled away bat.  K/9 p value: \t\t",(stats.ttest_ind(dfGameData1hourseast['away Batting K/9'], dfGameData['away Batting K/9'])[1]))
print ("1 hour east traveled away bat. BA p value: \t\t",(stats.ttest_ind(dfGameData1hourseast['awayBA'], dfGameData['awayBA'])[1]))
print ("1 hour east traveled away bat. OBP p value: \t\t",(stats.ttest_ind(dfGameData1hourseast['awayOBP'], dfGameData['awayOBP'])[1]))
print ("1 hour east traveled away bat. SLG p value: \t\t",(stats.ttest_ind(dfGameData1hourseast['awaySLG'], dfGameData['awaySLG'])[1]))
print ("1 hour east traveled away bat. ISO p value: \t\t",(stats.ttest_ind(dfGameData1hourseast['awayISO'], dfGameData['awayISO'])[1]))
print ("1 hour east traveled away pit. H/9 p value: \t\t",(stats.ttest_ind(dfGameData1hourseast['away Pitching H/9'], dfGameData['away Pitching H/9'])[1]))
print ("1 hour east traveled away pit. BB/9 p value: \t\t",(stats.ttest_ind(dfGameData1hourseast['away Pitching BB/9'], dfGameData['away Pitching BB/9'])[1]))
print ("1 hour east traveled away pit. K/9 p value: \t\t",(stats.ttest_ind(dfGameData1hourseast['away Pitching K/9'], dfGameData['away Pitching K/9'])[1]))
print ("1 hour east traveled away pit. ERA p value: \t\t",(stats.ttest_ind(dfGameData1hourseast['awayERA'], dfGameData['awayERA'])[1]))
print ("1 hour east traveled away pit. FIP p value: \t\t",(stats.ttest_ind(dfGameData1hourseast['awayFIP'], dfGameData['awayFIP'])[1]))

p values at 1 time zone traveled east
-------------------------------------
1 hour east traveled count: 				 0
1 hour east traveled away bat. R/9 p value: 		 nan
1 hour east traveled away bat. H/9 p value: 		 nan
1 hour east traveled away bat. BB/9 p value: 		 nan
1 hour east traveled away bat.  K/9 p value: 		 nan
1 hour east traveled away bat. BA p value: 		 nan
1 hour east traveled away bat. OBP p value: 		 nan
1 hour east traveled away bat. SLG p value: 		 nan
1 hour east traveled away bat. ISO p value: 		 nan
1 hour east traveled away pit. H/9 p value: 		 nan
1 hour east traveled away pit. BB/9 p value: 		 nan
1 hour east traveled away pit. K/9 p value: 		 nan
1 hour east traveled away pit. ERA p value: 		 nan
1 hour east traveled away pit. FIP p value: 		 nan


In [81]:
print ("p values at 2 time zones traveled east")
print ("--------------------------------------")
print ("2 hours east traveled count: \t\t\t\t",len(dfGameData2hourseast.index))
print ("2 hours east traveled away bat. R/9 p value: \t\t",(stats.ttest_ind(dfGameData2hourseast['away Batting R/9'], dfGameData['away Batting R/9'])[1]))
print ("2 hours east traveled away bat. H/9 p value: \t\t",(stats.ttest_ind(dfGameData2hourseast['away Batting H/9'], dfGameData['away Batting H/9'])[1]))
print ("2 hours east traveled away bat. BB/9 p value: \t\t",(stats.ttest_ind(dfGameData2hourseast['away Batting BB/9'], dfGameData['away Batting BB/9'])[1]))
print ("2 hours east traveled away bat.  K/9 p value: \t\t",(stats.ttest_ind(dfGameData2hourseast['away Batting K/9'], dfGameData['away Batting K/9'])[1]))
print ("2 hours east traveled away bat. BA p value: \t\t",(stats.ttest_ind(dfGameData2hourseast['awayBA'], dfGameData['awayBA'])[1]))
print ("2 hours east traveled away bat. OBP p value: \t\t",(stats.ttest_ind(dfGameData2hourseast['awayOBP'], dfGameData['awayOBP'])[1]))
print ("2 hours east traveled away bat. SLG p value: \t\t",(stats.ttest_ind(dfGameData2hourseast['awaySLG'], dfGameData['awaySLG'])[1]))
print ("2 hours east traveled away bat. ISO p value: \t\t",(stats.ttest_ind(dfGameData2hourseast['awayISO'], dfGameData['awayISO'])[1]))
print ("2 hours east traveled away pit. H/9 p value: \t\t",(stats.ttest_ind(dfGameData2hourseast['away Pitching H/9'], dfGameData['away Pitching H/9'])[1]))
print ("2 hours east traveled away pit. BB/9 p value: \t\t",(stats.ttest_ind(dfGameData2hourseast['away Pitching BB/9'], dfGameData['away Pitching BB/9'])[1]))
print ("2 hours east traveled away pit. K/9 p value: \t\t",(stats.ttest_ind(dfGameData2hourseast['away Pitching K/9'], dfGameData['away Pitching K/9'])[1]))
print ("2 hours east traveled away pit. ERA p value: \t\t",(stats.ttest_ind(dfGameData2hourseast['awayERA'], dfGameData['awayERA'])[1]))
print ("2 hours east traveled away pit. FIP p value: \t\t",(stats.ttest_ind(dfGameData2hourseast['awayFIP'], dfGameData['awayFIP'])[1]))

p values at 2 time zones traveled east
--------------------------------------
2 hours east traveled count: 				 0
2 hours east traveled away bat. R/9 p value: 		 nan
2 hours east traveled away bat. H/9 p value: 		 nan
2 hours east traveled away bat. BB/9 p value: 		 nan
2 hours east traveled away bat.  K/9 p value: 		 nan
2 hours east traveled away bat. BA p value: 		 nan
2 hours east traveled away bat. OBP p value: 		 nan
2 hours east traveled away bat. SLG p value: 		 nan
2 hours east traveled away bat. ISO p value: 		 nan
2 hours east traveled away pit. H/9 p value: 		 nan
2 hours east traveled away pit. BB/9 p value: 		 nan
2 hours east traveled away pit. K/9 p value: 		 nan
2 hours east traveled away pit. ERA p value: 		 nan
2 hours east traveled away pit. FIP p value: 		 nan


In [82]:
print ("p values at 3 time zones traveled east")
print ("--------------------------------------")
print ("3 hours east traveled count: \t\t\t\t",len(dfGameData3hourseast.index))
print ("3 hours east traveled away bat. R/9 p value: \t\t",(stats.ttest_ind(dfGameData3hourseast['away Batting R/9'], dfGameData['away Batting R/9'])[1]))
print ("3 hours east traveled away bat. H/9 p value: \t\t",(stats.ttest_ind(dfGameData3hourseast['away Batting H/9'], dfGameData['away Batting H/9'])[1]))
print ("3 hours east traveled away bat. BB/9 p value: \t\t",(stats.ttest_ind(dfGameData3hourseast['away Batting BB/9'], dfGameData['away Batting BB/9'])[1]))
print ("3 hours east traveled away bat.  K/9 p value: \t\t",(stats.ttest_ind(dfGameData3hourseast['away Batting K/9'], dfGameData['away Batting K/9'])[1]))
print ("3 hours east traveled away bat. BA p value: \t\t",(stats.ttest_ind(dfGameData3hourseast['awayBA'], dfGameData['awayBA'])[1]))
print ("3 hours east traveled away bat. OBP p value: \t\t",(stats.ttest_ind(dfGameData3hourseast['awayOBP'], dfGameData['awayOBP'])[1]))
print ("3 hours east traveled away bat. SLG p value: \t\t",(stats.ttest_ind(dfGameData3hourseast['awaySLG'], dfGameData['awaySLG'])[1]))
print ("3 hours east traveled away bat. ISO p value: \t\t",(stats.ttest_ind(dfGameData3hourseast['awayISO'], dfGameData['awayISO'])[1]))
print ("3 hours east traveled away pit. H/9 p value: \t\t",(stats.ttest_ind(dfGameData3hourseast['away Pitching H/9'], dfGameData['away Pitching H/9'])[1]))
print ("3 hours east traveled away pit. BB/9 p value: \t\t",(stats.ttest_ind(dfGameData3hourseast['away Pitching BB/9'], dfGameData['away Pitching BB/9'])[1]))
print ("3 hours east traveled away pit. K/9 p value: \t\t",(stats.ttest_ind(dfGameData3hourseast['away Pitching K/9'], dfGameData['away Pitching K/9'])[1]))
print ("3 hours east traveled away pit. ERA p value: \t\t",(stats.ttest_ind(dfGameData3hourseast['awayERA'], dfGameData['awayERA'])[1]))
print ("3 hours east traveled away pit. FIP p value: \t\t",(stats.ttest_ind(dfGameData3hourseast['awayFIP'], dfGameData['awayFIP'])[1]))

p values at 3 time zones traveled east
--------------------------------------
3 hours east traveled count: 				 0
3 hours east traveled away bat. R/9 p value: 		 nan
3 hours east traveled away bat. H/9 p value: 		 nan
3 hours east traveled away bat. BB/9 p value: 		 nan
3 hours east traveled away bat.  K/9 p value: 		 nan
3 hours east traveled away bat. BA p value: 		 nan
3 hours east traveled away bat. OBP p value: 		 nan
3 hours east traveled away bat. SLG p value: 		 nan
3 hours east traveled away bat. ISO p value: 		 nan
3 hours east traveled away pit. H/9 p value: 		 nan
3 hours east traveled away pit. BB/9 p value: 		 nan
3 hours east traveled away pit. K/9 p value: 		 nan
3 hours east traveled away pit. ERA p value: 		 nan
3 hours east traveled away pit. FIP p value: 		 nan


In [131]:
#Adjusted run weights for each of the travel distances 
#and time zones, if significant, returns bat, pit

def run_change_miles(distance):
    
    if distance < 500:
        return [1, 1]
    
    if distance >= 500 & distance < 1000:
        return [1, 1]
    
    if distance >= 1000 & distance < 2000:
        return [1, 1]
    
    if distance >= 2000:
        return [(4.469/4.363), (4.261/4.146)]
    
def run_change_tz(time_zone):
    
    if time_zone < -2:
        return [(4.469/4.195), (4.261/3.994)]
    
    if time_zone == -2:
        return [1, 1]
    
    if time_zone == -1:
        return [(4.469/4.591), (4.261/4.391)]
    
    if time_zone == 0:
        return [1, 1]
    
    if time_zone == 1:
        return [(4.469/4.558), (4.261/4.344)]
    
    if time_zone == 2:
        return [1, 1]
    
    if time_zone > 2:
        return [1, 1]

In [84]:
dfGameData['adjAwayScoreMi'] = -1.0
dfGameData['adjAwayScoreTZ'] = -1.0
dfGameData['adjHomeScoreMi'] = -1.0
dfGameData['adjHomeScoreTZ'] = -1.0

In [132]:
#Loop through an calculate the adjusted scores for every game (home and away)

for i, row in dfGameData.iterrows():
    
    dist = row['distance']
    tz = row['timeZoneChange']
    
    adj_away_score_mi = row['awayScore']*(run_change_miles(dist)[0])
    dfGameData.set_value(i, 'adjAwayScoreMi', adj_away_score_mi)
    
    adj_away_score_tz = row['awayScore']*(run_change_tz(tz)[0])
    dfGameData.set_value(i, 'adjAwayScoreTZ', adj_away_score_tz)
    
    adj_home_score_mi = row['homeScore']*(run_change_miles(dist)[1])
    dfGameData.set_value(i, 'adjHomeScoreMi', adj_home_score_mi)
    
    adj_home_score_tz = row['homeScore']*(run_change_tz(tz)[1])
    dfGameData.set_value(i, 'adjHomeScoreTZ', adj_home_score_tz)

In [116]:
dfGameData['year'] = int(str(dfGameData['date'])[:4])
dfGameData['date'].head(100000)

2        20000403
3        20000403
4        20000403
6        20000403
7        20000403
8        20000403
9        20000403
10       20000403
11       20000403
12       20000403
13       20000403
14       20000404
15       20000404
16       20000404
17       20000404
19       20000404
20       20000404
21       20000404
22       20000404
23       20000404
24       20000404
25       20000404
26       20000405
27       20000405
28       20000405
29       20000405
31       20000405
32       20000405
33       20000405
34       20000405
           ...   
65289    19901001
65290    19901001
65291    19901001
65293    19901001
65294    19901001
65295    19901001
65296    19901002
65297    19901002
65298    19901002
65299    19901002
65300    19901002
65301    19901002
65302    19901002
65303    19901002
65304    19901002
65306    19901002
65307    19901002
65308    19901002
65309    19901003
65310    19901003
65311    19901003
65312    19901003
65313    19901003
65314    19901003
65315    1

In [117]:
dfYearData = dfGameData.groupby(['awayTeam']).sum()

In [118]:
dfYearData.head(50)

Unnamed: 0_level_0,date,awayScore,homeScore,lengthInOuts,attendance,awayAB,awayH,away2B,away3B,awayHR,...,away Pitching H/9,away Pitching BB/9,away Pitching K/9,awayERA,awayFIP,year,adjAwayScoreMi,adjAwayScoreTZ,adjHomeScoreMi,adjHomeScoreTZ
awayTeam,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ANA,34040120682,8165,7746,90612,48276614.0,59583,15789,3131,310,1760,...,15735.689931,5874.653436,11439.16973,7684.834257,7868.533582,3392,8165.0,8163.850706,7746.0,7746.142853
ARI,31982423136,6793,7025,85819,50600025.0,55546,13752,2725,280,1609,...,14416.559303,5340.750887,11873.345167,6336.548826,6951.245589,3186,6793.0,6779.130108,7025.0,7010.180803
ATL,42724633203,9692,8789,114726,65646328.0,74440,19066,3754,360,2255,...,18956.132468,7095.239735,14862.810639,9061.203372,8881.607248,4264,9692.0,9789.840958,8789.0,8875.419625
BAL,44702877740,10334,10881,119216,62938608.0,78458,20428,4111,344,2371,...,21372.382203,8309.562075,14514.552669,9762.907699,10674.865854,4462,10334.0,10385.787,10881.0,10932.728194
BOS,44423044517,10558,9955,119367,70829275.0,78274,20299,3902,374,2526,...,19782.280124,7609.334788,15673.333364,9925.222453,9894.011039,4434,10558.0,10609.773403,9955.0,10003.861395
CAL,10542847060,2309,2539,28163,14330088.0,18408,4821,863,91,438,...,5089.027098,2054.973951,3074.486014,2139.88951,2517.192075,1058,2309.0,2459.814303,2539.0,2708.732849
CHA,44782435654,10385,10499,119897,62939744.0,78407,20557,3886,404,2298,...,21030.713861,7772.689948,14519.654658,9789.106181,10245.290718,4470,10385.0,10305.577786,10499.0,10418.140222
CHN,43126329178,9383,9671,115458,71497066.0,75244,19002,3774,400,2225,...,19265.76337,7863.4518,15647.074114,8739.113214,9547.852363,4304,9383.0,9288.396019,9671.0,9575.832878
CIN,43345644251,9510,9933,116331,66675676.0,75564,19240,3716,424,2166,...,20311.071632,7553.796888,14490.847685,8877.692942,9831.733495,4326,9510.0,9544.402052,9933.0,9960.876165
CLE,44802453580,10889,10610,119808,62988320.0,78852,20852,4173,426,2477,...,21232.983036,7646.317741,14980.614745,10319.943243,10182.840121,4472,10889.0,10887.537542,10610.0,10604.376925


In [121]:
dfWPChange = dfYearData[['awayScore', 'homeScore', 'adjAwayScoreMi', 'adjHomeScoreMi', 'adjAwayScoreTZ', 'adjHomeScoreTZ']].copy()

In [141]:
dfWPChange.head(35)

Unnamed: 0_level_0,awayScore,homeScore,adjAwayScoreMi,adjHomeScoreMi,adjAwayScoreTZ,adjHomeScoreTZ,baseWP,adjMilesWP,adjMilesTZ,adjTZWP,WinsChange
awayTeam,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
ANA,8165,7746,8165.0,7746.0,8163.850706,7746.142853,0.526316,0.526316,0.526236,0.526236,-0.00643
ARI,6793,7025,6793.0,7025.0,6779.130108,7010.180803,0.483215,0.483215,0.483249,0.483249,0.002745
ATL,9692,8789,9692.0,8789.0,9789.840958,8875.419625,0.548745,0.548745,0.548873,0.548873,0.010419
BAL,10334,10881,10334.0,10881.0,10385.787,10932.728194,0.474233,0.474233,0.474361,0.474361,0.010344
BOS,10558,9955,10558.0,9955.0,10609.773403,10003.861395,0.529371,0.529371,0.529368,0.529368,-0.000181
CAL,2309,2539,2309.0,2539.0,2459.814303,2708.732849,0.452664,0.452664,0.451951,0.451951,-0.057762
CHA,10385,10499,10385.0,10499.0,10305.577786,10418.140222,0.494541,0.494541,0.494569,0.494569,0.002199
CHN,9383,9671,9383.0,9671.0,9288.396019,9575.832878,0.484889,0.484889,0.484766,0.484766,-0.009891
CIN,9510,9933,9510.0,9933.0,9544.402052,9960.876165,0.478254,0.478254,0.478658,0.478658,0.032681
CLE,10889,10610,10889.0,10610.0,10887.537542,10604.376925,0.512975,0.512975,0.513173,0.513173,0.016019


In [138]:
dfWPChange['baseWP'] = (dfWPChange['awayScore']**2)/((dfWPChange['awayScore']**2)+(dfWPChange['homeScore']**2))
dfWPChange['adjMilesWP'] = (dfWPChange['adjAwayScoreMi']**2)/((dfWPChange['adjAwayScoreMi']**2)+(dfWPChange['adjHomeScoreMi']**2))
dfWPChange['adjTZWP'] = (dfWPChange['adjAwayScoreTZ']**2)/((dfWPChange['adjAwayScoreTZ']**2)+(dfWPChange['adjHomeScoreTZ']**2))
dfWPChange['WinsChange'] = (((dfWPChange['adjMilesWP']+dfWPChange['adjTZWP'])/2)*162)-(dfWPChange['baseWP']*162)