In [2]:
import pandas as pd

In [401]:
batting = pd.read_csv("baseballdatabank-master/core/batting.csv")
pitching = pd.read_csv("baseballdatabank-master/core/pitching.csv")
schedule2019 = pd.read_excel("baseballdatabank-master/core/2019.xlsx")

In [5]:
#Our with this initial step of the project is to create a strength of schedule statistic that is based off
#the batting average allowed by the pitchers on every team as another consideration for predicting batting average.
pitching.head()

Unnamed: 0,playerID,yearID,stint,teamID,lgID,W,L,G,GS,CG,...,IBB,WP,HBP,BK,BFP,GF,R,SH,SF,GIDP
0,bechtge01,1871,1,PH1,,1,2,3,3,2,...,,7,,0,146.0,0,42,,,
1,brainas01,1871,1,WS3,,12,15,30,30,30,...,,7,,0,1291.0,0,292,,,
2,fergubo01,1871,1,NY2,,0,0,1,0,0,...,,2,,0,14.0,0,9,,,
3,fishech01,1871,1,RC1,,4,16,24,24,22,...,,20,,0,1080.0,1,257,,,
4,fleetfr01,1871,1,NY2,,0,1,1,1,1,...,,0,,0,57.0,0,21,,,


In [6]:
#We want to get rid of 2020 because it was not a full season and we are going to be completely ignoring it for the sake out our project
no2020 = pitching[pitching['yearID'] != 2020]

In [7]:
#These are the only vital columns we need from the pitching table
lesscols = no2020[['playerID','yearID','stint','teamID','BAOpp','BFP']]

In [8]:
#Here we are taking all rows from 2019 in order to figure out all people who pitched during the year
pitching2019 = lesscols[lesscols["yearID"] == 2019]

In [9]:
#After we only have 2019 data we just need ID's of all players who pitched in 2019
pitch_id = pitching2019['playerID']

In [10]:
#Using the ID's of all players who pitched in 2019 we can extract the career data of every pitcher that pitched in 2019
all2019pitchers = lesscols[lesscols['playerID'].isin(pitch_id)]

In [11]:
#This is a step is making a column full of 1's for use in aggregating years played later
ones = [1] * len(all2019pitchers.index)
all2019pitchers['years'] = ones

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all2019pitchers['years'] = ones


In [12]:
all2019pitchers

Unnamed: 0,playerID,yearID,stint,teamID,BAOpp,BFP,years
34083,sabatcc01,2001,1,CLE,0.228,763.0,1
34667,perezol01,2002,1,SDN,0.218,387.0,1
34728,rodnefe01,2002,1,DET,0.329,89.0,1
34744,sabatcc01,2002,1,CLE,0.252,891.0,1
35165,jacksed01,2003,1,LAN,0.221,91.0,1
...,...,...,...,...,...,...,...
47624,zamorda01,2019,1,NYN,0.294,41.0,1
47625,zeuchtj01,2019,1,TOR,0.250,99.0,1
47626,zimmejo02,2019,1,DET,0.311,504.0,1
47627,zimmeky01,2019,1,KCA,0.337,102.0,1


In [13]:
#Our data goes up to the year 2019 but in our hypothetical scenario the 2019 season has not happened yet so we will remove the year 2019
existingpitchers = all2019pitchers[all2019pitchers['yearID'] != 2019]

In [14]:
#In order to figure out everyones BAOpp per year we need to average every stint each person had each year so we have overall year performance

#(We are only including the years column in this aggregation so it stays intact for the next stage)
stintavg = existingpitchers.groupby(['playerID','yearID']).agg({'BAOpp':'mean','BFP':'sum','years':'sum'})

In [15]:
stintavg

Unnamed: 0_level_0,Unnamed: 1_level_0,BAOpp,BFP,years
playerID,yearID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
abadfe01,2010,0.200,76.0,1
abadfe01,2011,0.326,99.0,1
abadfe01,2012,0.311,208.0,1
abadfe01,2013,0.271,166.0,1
abadfe01,2014,0.175,216.0,1
...,...,...,...,...
zimmejo02,2014,0.244,800.0,1
zimmejo02,2015,0.264,831.0,1
zimmejo02,2016,0.284,450.0,1
zimmejo02,2017,0.313,713.0,1


In [16]:
#Now that we have an accurate BAOpp for every year we want we want to average every year together to get their career BAOpp

#Additionally when we collect all the years into a career stat we are also finally suming the years column so we know how many years they played
#and we are summing BFP so we know how many hitters a pitcher faced their entire career
careeravg = stintavg.groupby(['playerID']).agg({'BAOpp':'mean','BFP':'sum', 'years':'sum'})

In [17]:
careeravg

Unnamed: 0_level_0,BAOpp,BFP,years
playerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
abadfe01,0.251688,1350.0,9
adamja01,0.246000,142.0,1
adamsau01,0.306333,267.0,3
adamsau02,0.230500,36.0,2
adamsch01,0.267000,34.0,1
...,...,...,...
yarbrry01,0.246000,628.0,1
yateski01,0.251400,913.0,6
ynoaga01,0.311000,235.0,2
zamorda01,0.194000,36.0,1


In [18]:
#In order to create a weighted average on BAOpp that emphasises players 2018 BAOpp we need to extract just their 2018 BAOpp
pitching2018 = existingpitchers[existingpitchers['yearID'] == 2018]

In [19]:
#This part is necessary in order to average every players stints in 2018 in order to get a total year performance
pitching2018avg = pitching2018.groupby(['playerID']).agg({'BAOpp':'mean'})

In [20]:
#Here we want to join each of the last two tables we made on each player so we can have each players career and 2018 BAOpp and side by side
pitchingBAOpp = careeravg.merge(pitching2018avg, on='playerID', how='left', suffixes=['_career', '_2018'])

In [21]:
#We noticed that some players did not play in 2018 so we want to fill in the 2018 column
pitchingBAOpp

Unnamed: 0_level_0,BAOpp_career,BFP,years,BAOpp_2018
playerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
abadfe01,0.251688,1350.0,9,
adamja01,0.246000,142.0,1,0.246
adamsau01,0.306333,267.0,3,
adamsau02,0.230500,36.0,2,0.250
adamsch01,0.267000,34.0,1,0.267
...,...,...,...,...
yarbrry01,0.246000,628.0,1,0.246
yateski01,0.251400,913.0,6,0.181
ynoaga01,0.311000,235.0,2,
zamorda01,0.194000,36.0,1,0.194


In [22]:
#This code fills all NaN values for players who didn't play in 2018 with their career BAOpp
noNulls = pd.DataFrame(pitchingBAOpp['BAOpp_career'], columns=pitchingBAOpp.columns).ffill(axis=1)
pitchingBAOpp = pitchingBAOpp.combine_first(noNulls)

In [23]:
#Here we are creating a weighted average of career BAOpp and last year BAOpp in order to get a whole view of their career while stressing last years performance
pitchingBAOpp['BAOpp_weighted'] = (2 * pitchingBAOpp['BAOpp_career'] + pitchingBAOpp['BAOpp_2018']) / 3

In [24]:
pitchingBAOpp

Unnamed: 0_level_0,BAOpp_career,BFP,years,BAOpp_2018,BAOpp_weighted
playerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
abadfe01,0.251688,1350.0,9.0,0.251688,0.251688
adamja01,0.246000,142.0,1.0,0.246000,0.246000
adamsau01,0.306333,267.0,3.0,0.306333,0.306333
adamsau02,0.230500,36.0,2.0,0.250000,0.237000
adamsch01,0.267000,34.0,1.0,0.267000,0.267000
...,...,...,...,...,...
yarbrry01,0.246000,628.0,1.0,0.246000,0.246000
yateski01,0.251400,913.0,6.0,0.181000,0.227933
ynoaga01,0.311000,235.0,2.0,0.311000,0.311000
zamorda01,0.194000,36.0,1.0,0.194000,0.194000


In [25]:
#Now we have player ID's associated with a weighted BAOpp career value but it is missing team data
#so we need to narrow down our original 2019 pitchers table to only include the year 2019 so we know
#what all the pitching rosters were at the beginning of 2019
just2019pitchers = all2019pitchers[all2019pitchers['yearID'] == 2019]

In [26]:
#We only want stint 1 of the data because it would show strictly the rosters that people started on
unique2019pitchers = just2019pitchers[just2019pitchers['stint'] == 1]

In [27]:
#Now that we have weighted BAOpp information per player and team information at the beginning of 2019 we need to join them together
final_pitcherBAOpp = pitchingBAOpp.merge(unique2019pitchers, on='playerID', how='left', suffixes=['_career','_2019'])

In [28]:
final_pitcherBAOpp

Unnamed: 0,playerID,BAOpp_career,BFP_career,years_career,BAOpp_2018,BAOpp_weighted,yearID,stint,teamID,BAOpp,BFP_2019,years_2019
0,abadfe01,0.251688,1350.0,9.0,0.251688,0.251688,2019,1,SFN,0.196,49.0,1
1,adamja01,0.246000,142.0,1.0,0.246000,0.246000,2019,1,TOR,0.200,91.0,1
2,adamsau01,0.306333,267.0,3.0,0.306333,0.306333,2019,1,MIN,0.333,15.0,1
3,adamsau02,0.230500,36.0,2.0,0.250000,0.237000,2019,1,WAS,0.000,6.0,1
4,adamsch01,0.267000,34.0,1.0,0.267000,0.267000,2019,1,NYA,0.351,124.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
636,yarbrry01,0.246000,628.0,1.0,0.246000,0.246000,2019,1,TBA,0.228,563.0,1
637,yateski01,0.251400,913.0,6.0,0.181000,0.227933,2019,1,SDN,0.186,243.0,1
638,ynoaga01,0.311000,235.0,2.0,0.311000,0.311000,2019,1,BAL,0.280,480.0,1
639,zamorda01,0.194000,36.0,1.0,0.194000,0.194000,2019,1,NYN,0.294,41.0,1


In [29]:
#This step isnt completely necessary but it simply eliminates information from the table that is no longer useful to us
final_pitcherBAOpp = final_pitcherBAOpp[['playerID','teamID','BAOpp_weighted','BFP_career','years_career']]

In [30]:
final_pitcherBAOpp

Unnamed: 0,playerID,teamID,BAOpp_weighted,BFP_career,years_career
0,abadfe01,SFN,0.251688,1350.0,9.0
1,adamja01,TOR,0.246000,142.0,1.0
2,adamsau01,MIN,0.306333,267.0,3.0
3,adamsau02,WAS,0.237000,36.0,2.0
4,adamsch01,NYA,0.267000,34.0,1.0
...,...,...,...,...,...
636,yarbrry01,TBA,0.246000,628.0,1.0
637,yateski01,SDN,0.227933,913.0,6.0
638,ynoaga01,BAL,0.311000,235.0,2.0
639,zamorda01,NYN,0.194000,36.0,1.0


In [31]:
#Now we want to get BFP per year avg
final_pitcherBAOpp['BFPperyear'] = final_pitcherBAOpp['BFP_career'] / final_pitcherBAOpp['years_career']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_pitcherBAOpp['BFPperyear'] = final_pitcherBAOpp['BFP_career'] / final_pitcherBAOpp['years_career']


In [32]:
final_pitcherBAOpp

Unnamed: 0,playerID,teamID,BAOpp_weighted,BFP_career,years_career,BFPperyear
0,abadfe01,SFN,0.251688,1350.0,9.0,150.000000
1,adamja01,TOR,0.246000,142.0,1.0,142.000000
2,adamsau01,MIN,0.306333,267.0,3.0,89.000000
3,adamsau02,WAS,0.237000,36.0,2.0,18.000000
4,adamsch01,NYA,0.267000,34.0,1.0,34.000000
...,...,...,...,...,...,...
636,yarbrry01,TBA,0.246000,628.0,1.0,628.000000
637,yateski01,SDN,0.227933,913.0,6.0,152.166667
638,ynoaga01,BAL,0.311000,235.0,2.0,117.500000
639,zamorda01,NYN,0.194000,36.0,1.0,36.000000


In [33]:
#playerID is the key we want to use
pitchersbyteam = final_pitcherBAOpp.set_index(['playerID'])

In [34]:
#Here we are summing BFP_career for every player on each team to have team totals
totalBFPperyear = pitchersbyteam.groupby('teamID').agg({'BFPperyear':'sum'})

In [35]:
#Now we are joining the total BFP per team as a column on each player row
BFPteam = pitchersbyteam.merge(totalBFPperyear, on='teamID', how='left', suffixes=['_ind','_team']).set_index(pitchersbyteam.index)

In [36]:
#This creates a new column of the ratio of BFP for each player vs the entire team
BFPteam['BFPratio'] = BFPteam['BFPperyear_ind'] / BFPteam['BFPperyear_team']

In [37]:
BFPteam

Unnamed: 0_level_0,teamID,BAOpp_weighted,BFP_career,years_career,BFPperyear_ind,BFPperyear_team,BFPratio
playerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
abadfe01,SFN,0.251688,1350.0,9.0,150.000000,5932.280159,0.025285
adamja01,TOR,0.246000,142.0,1.0,142.000000,5922.755952,0.023975
adamsau01,MIN,0.306333,267.0,3.0,89.000000,5145.285714,0.017297
adamsau02,WAS,0.237000,36.0,2.0,18.000000,5426.750974,0.003317
adamsch01,NYA,0.267000,34.0,1.0,34.000000,5550.158594,0.006126
...,...,...,...,...,...,...,...
yarbrry01,TBA,0.246000,628.0,1.0,628.000000,4754.030303,0.132098
yateski01,SDN,0.227933,913.0,6.0,152.166667,4723.491667,0.032215
ynoaga01,BAL,0.311000,235.0,2.0,117.500000,4838.595238,0.024284
zamorda01,NYN,0.194000,36.0,1.0,36.000000,6026.703419,0.005973


In [38]:
#To get each persons weighted BAOpp as a team we need to multiply each players ratio by BAOpp
BFPteam['indBAOpp'] = BFPteam['BAOpp_weighted'] * BFPteam['BFPratio']

In [39]:
#Now to get the team totals we just need to add up each players BAOpp that was multiplied by their ratio
final_teamBAOpp = BFPteam.groupby('teamID').agg({'indBAOpp':'sum'}).sort_values(by='indBAOpp')

In [40]:
#Here we have a BAOpp value that is averaged for each team across all their pitchers and weighted by how much they are expected to contribute
final_teamBAOpp

Unnamed: 0_level_0,indBAOpp
teamID,Unnamed: 1_level_1
HOU,0.229675
LAN,0.233121
WAS,0.236953
NYA,0.2375
SLN,0.237583
ATL,0.240259
CHN,0.241354
MIL,0.243577
TBA,0.244541
BOS,0.24561


In [402]:
new_header = schedule2019.iloc[0]
schedule2019.columns = new_header
schedule2019 = schedule2019[1:]
schedule2019

Unnamed: 0,DATE,SEA,SF,OAK,LA,LAA,SD,ARI,COL,TEX,...,NYY,NYM,PHI,BAL,WAS,ATL,TB,MIA,DATE.1,# Games
1,2019-03-20 00:00:00,,,Mariners,,,,,,,...,,,,,,,,,2019-03-20 00:00:00,1
2,2019-03-21 00:00:00,,,Mariners,,,,,,,...,,,,,,,,,2019-03-21 00:00:00,1
3,2019-03-22 00:00:00,,,,,,,,,,...,,,,,,,,,2019-03-22 00:00:00,0
4,2019-03-23 00:00:00,,,,,,,,,,...,,,,,,,,,2019-03-23 00:00:00,0
5,2019-03-24 00:00:00,,,,,,,,,,...,,,,,,,,,2019-03-24 00:00:00,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
194,2019-09-29 00:00:00,Athletics,Dodgers,,,Astros,,Padres,Brewers,Yankees,...,,Braves,Marlins,,Indians,,,,2019-09-29 00:00:00,15
195,Home Total,81,81,81,81,81,81,81,81,81,...,81,81,81,81,81,81,81,81,2019-09-30 00:00:00,2430
196,,"* - Tokyo, Japan",,,,,,,,,...,,,,,,,,,,
197,,** - Day/Night Doubleheader,,,,,,,,,...,,,,,,,,,1,


In [403]:
schedule2019.shape
schedule2019 = schedule2019.drop(columns = ['DATE' , '# Games'])
schedule2019

Unnamed: 0,SEA,SF,OAK,LA,LAA,SD,ARI,COL,TEX,HOU,...,TOR,BOS,NYY,NYM,PHI,BAL,WAS,ATL,TB,MIA
1,,,Mariners,,,,,,,,...,,,,,,,,,,
2,,,Mariners,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
194,Athletics,Dodgers,,,Astros,,Padres,Brewers,Yankees,,...,Rays,Orioles,,Braves,Marlins,,Indians,,,
195,81,81,81,81,81,81,81,81,81,81,...,81,81,81,81,81,81,81,81,81,81
196,"* - Tokyo, Japan",,,,,,,,,,...,,,,,,,,,,
197,** - Day/Night Doubleheader,,,,,,,,,,...,,,,,,,,,,


In [404]:
schedule2019 = schedule2019.apply(pd.value_counts)
schedule2019

Unnamed: 0,SEA,SF,OAK,LA,LAA,SD,ARI,COL,TEX,HOU,...,TOR,BOS,NYY,NYM,PHI,BAL,WAS,ATL,TB,MIA
Angels,9.0,,10.0,2.0,,,,,10.0,9.0,...,4.0,4.0,3.0,,,3.0,,,4.0,
Astros,9.0,,9.0,,10.0,,,2.0,10.0,,...,3.0,3.0,4.0,,,3.0,,,4.0,
Athletics,9.0,2.0,,,9.0,,,,9.0,10.0,...,3.0,3.0,3.0,,,4.0,,,3.0,
Blue Jays,3.0,2.0,3.0,3.0,3.0,,,3.0,3.0,3.0,...,,9.0,9.0,,,10.0,,2.0,10.0,
Cardinals,3.0,3.0,2.0,3.0,,3.0,3.0,3.0,3.0,,...,,,,4.0,3.0,,4.0,3.0,,3.0
Cubs,2.0,3.0,,4.0,,4.0,3.0,3.0,3.0,3.0,...,,,,3.0,3.0,,3.0,3.0,,3.0
Indians,3.0,,3.0,,3.0,,,,4.0,4.0,...,3.0,3.0,4.0,3.0,,3.0,3.0,,3.0,2.0
Orioles,4.0,,3.0,,4.0,2.0,3.0,3.0,3.0,3.0,...,9.0,10.0,9.0,,,,2.0,,9.0,
Padres,2.0,9.0,,10.0,,,10.0,10.0,,,...,3.0,,3.0,3.0,3.0,2.0,3.0,4.0,,3.0
Rangers,10.0,,10.0,,9.0,,2.0,,,9.0,...,3.0,4.0,3.0,,,4.0,,,3.0,


In [405]:
schedule2019 = schedule2019.drop([81, '* - Tokyo, Japan', '** - Day/Night Doubleheader','*** - Williamsport, PA','All Star Game'])
schedule2019

Unnamed: 0,SEA,SF,OAK,LA,LAA,SD,ARI,COL,TEX,HOU,...,TOR,BOS,NYY,NYM,PHI,BAL,WAS,ATL,TB,MIA
Angels,9.0,,10.0,2.0,,,,,10.0,9.0,...,4.0,4.0,3.0,,,3.0,,,4.0,
Astros,9.0,,9.0,,10.0,,,2.0,10.0,,...,3.0,3.0,4.0,,,3.0,,,4.0,
Athletics,9.0,2.0,,,9.0,,,,9.0,10.0,...,3.0,3.0,3.0,,,4.0,,,3.0,
Blue Jays,3.0,2.0,3.0,3.0,3.0,,,3.0,3.0,3.0,...,,9.0,9.0,,,10.0,,2.0,10.0,
Cardinals,3.0,3.0,2.0,3.0,,3.0,3.0,3.0,3.0,,...,,,,4.0,3.0,,4.0,3.0,,3.0
Cubs,2.0,3.0,,4.0,,4.0,3.0,3.0,3.0,3.0,...,,,,3.0,3.0,,3.0,3.0,,3.0
Indians,3.0,,3.0,,3.0,,,,4.0,4.0,...,3.0,3.0,4.0,3.0,,3.0,3.0,,3.0,2.0
Orioles,4.0,,3.0,,4.0,2.0,3.0,3.0,3.0,3.0,...,9.0,10.0,9.0,,,,2.0,,9.0,
Padres,2.0,9.0,,10.0,,,10.0,10.0,,,...,3.0,,3.0,3.0,3.0,2.0,3.0,4.0,,3.0
Rangers,10.0,,10.0,,9.0,,2.0,,,9.0,...,3.0,4.0,3.0,,,4.0,,,3.0,


In [406]:
index_names = list(['LAA','HOU','OAK','TOR','STL','CHC','CLE','BAL','SD','TEX','TB','BOS','CIN','KC','DET','MIN','CWS','NYY','COL','LA','ARI','PHI','PIT','ATL','NYM','MIL','MIA','WAS','SEA','SF'])
len(set(index_names))

30

In [407]:
schedule2019.index = index_names
schedule2019.sum(axis=1)

LAA    81.0
HOU    81.0
OAK    81.0
TOR    81.0
STL    81.0
CHC    81.0
CLE    81.0
BAL    81.0
SD     81.0
TEX    81.0
TB     81.0
BOS    81.0
CIN    81.0
KC     81.0
DET    81.0
MIN    81.0
CWS    81.0
NYY    81.0
COL    81.0
LA     81.0
ARI    80.0
PHI    81.0
PIT    81.0
ATL    81.0
NYM    81.0
MIL    81.0
MIA    81.0
WAS    81.0
SEA    81.0
SF     81.0
dtype: float64

In [408]:

schedule2019 = schedule2019.sort_index()
schedule2019 = schedule2019.sort_index(axis=1)
schedule2019

Unnamed: 0,ARI,ATL,BAL,BOS,CHC,CIN,CLE,COL,CWS,DET,...,PHI,PIT,SD,SEA,SF,STL,TB,TEX,TOR,WAS
ARI,,3.0,,,3.0,3.0,,10.0,,,...,3.0,4.0,9.0,,8.0,3.0,3.0,2.0,3.0,4.0
ATL,4.0,,,,4.0,3.0,3.0,3.0,,,...,10.0,3.0,3.0,,4.0,3.0,,,2.0,9.0
BAL,3.0,,,10.0,,,4.0,3.0,3.0,4.0,...,,,2.0,4.0,,,9.0,3.0,9.0,2.0
BOS,3.0,,9.0,,,,3.0,2.0,4.0,3.0,...,2.0,,3.0,4.0,,,10.0,3.0,10.0,
CHC,3.0,3.0,,,,10.0,,3.0,2.0,,...,3.0,10.0,4.0,2.0,3.0,9.0,,3.0,,3.0
CIN,3.0,4.0,,,9.0,,2.0,3.0,,,...,3.0,10.0,4.0,3.0,3.0,9.0,,,,3.0
CLE,,,3.0,3.0,,2.0,,,9.0,9.0,...,,,,3.0,,,3.0,4.0,3.0,3.0
COL,9.0,3.0,,2.0,3.0,3.0,,,,,...,3.0,3.0,9.0,,10.0,4.0,3.0,,,4.0
CWS,,3.0,3.0,3.0,2.0,,10.0,,,10.0,...,3.0,,,3.0,,,3.0,3.0,3.0,2.0
DET,,3.0,3.0,4.0,,,10.0,,9.0,,...,2.0,2.0,,4.0,,,3.0,3.0,4.0,


In [416]:
schedule2019 = schedule2019.fillna(0)
schedule2019['ARI'].sum()

81.0

In [410]:
def count_schedule(df):
    total_schedule = {}
    for i in df.index:
        temp = {}
        for c in df.columns:
            temp[c] = (df[c].loc[i] + df[i].loc[c])
        total_schedule[i] = temp
    return total_schedule

In [411]:
schedule_counts = count_schedule(schedule2019)

In [412]:
schedule_counts = pd.DataFrame(schedule_counts)

In [413]:
schedule2019.sum(axis=1)

ARI    80.0
ATL    81.0
BAL    81.0
BOS    81.0
CHC    81.0
CIN    81.0
CLE    81.0
COL    81.0
CWS    81.0
DET    81.0
HOU    81.0
KC     81.0
LA     81.0
LAA    81.0
MIA    81.0
MIL    81.0
MIN    81.0
NYM    81.0
NYY    81.0
OAK    81.0
PHI    81.0
PIT    81.0
SD     81.0
SEA    81.0
SF     81.0
STL    81.0
TB     81.0
TEX    81.0
TOR    81.0
WAS    81.0
dtype: float64

In [415]:
schedule_counts.sum(axis=1)

ARI    161.0
ATL    162.0
BAL    162.0
BOS    162.0
CHC    162.0
CIN    162.0
CLE    162.0
COL    162.0
CWS    162.0
DET    162.0
HOU    162.0
KC     162.0
LA     162.0
LAA    162.0
MIA    162.0
MIL    162.0
MIN    162.0
NYM    162.0
NYY    162.0
OAK    162.0
PHI    162.0
PIT    162.0
SD     162.0
SEA    162.0
SF     161.0
STL    162.0
TB     162.0
TEX    162.0
TOR    162.0
WAS    162.0
dtype: float64