# Tracking Single Players on Fangraphs

### MSP 8.5.2017

Can the fangraphs page be readable smoothly?

In [1]:
from IPython.display import HTML
HTML('''<script>code_show=true; function code_toggle() { if (code_show){ $('div.input').hide();} else { $('div.input').show(); } code_show = !code_show} $( document ).ready(code_toggle);</script><form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [2]:
# boilerplate imports

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
%matplotlib inline

In [3]:
# imports for scraping

from bs4 import BeautifulSoup
import requests


# basic birth year reader. tables[1] has good stuff in it!
def get_birth_year(tables):

    birthdate_start = (tables[1].text).find("Birthdate:")

    birthdate_end =  (tables[1].text).find('(',birthdate_start)

    #print (tables[1].text)[birthdate_start:birthdate_end]
    birth_year = (tables[1].text)[birthdate_start:birthdate_end].split('/')[2]
    #print birth_year
    
    return birth_year





In [4]:
# the listing of the teams of interest (all!)

teams = ['angels','astros','athletics','bluejays','braves',\
        'brewers','cardinals','cubs','diamondbacks','dodgers',\
        'giants','indians','mariners','marlins','mets','nationals',\
        'orioles','padres','phillies','pirates','rangers',\
        'rays','redsox','rockies','royals','tigers','twins',\
        'whitesox','yankees']



#### Compile Hitters

This cell only needs to be run in the event that player ID numbers change, or new players are added.

In [5]:

HDict = {}

f = open('data/batterdict.dat')

for line in f:
    #print line
    try:
        HDict[(line.split('|')[0]).strip()] = line.split('|')[1]
    except:
        pass
    
    
print('Found {} hitters by scraping teams.'.format(len(HDict.keys())))

Found 821 hitters by scraping teams.


#### Compile Pitchers

This cell only needs to be run in the event that player ID numbers change, or new players are added.

In [6]:

PDict = {}

f = open('data/pitcherdict.dat')

for line in f:
    #print line
    try:
        PDict[(line.split('|')[0]).strip()] = line.split('|')[1]
    except:
        pass
    

print('Found {} pitchers by scraping teams.'.format(len(PDict.keys())))

Found 648 pitchers by scraping teams.


### First Application: Aging Curves

For pitchers, how does K/9 evolve with age?

In [None]:
#
# for an individual player
#'

itable = [u'Season', u'Team', u'W', u'L', u'SV', u'G', u'GS', \
          u'IP', u'K/9', u'BB/9', u'HR/9', u'BABIP', u'LOB%', \
          u'GB%', u'HR/FB', u'ERA', u'FIP', u'xFIP', u'WAR']
#             0         1        2    3     4      5     6  \
#            7      8       9       10       11          12     \
#            13     14       15       16       17      18


# these are categories to eliminate
projections = ['Depth Charts','Fans (14)','Steamer','Fans (12)'\
               'ZiPS', 'ZiPS (R)','Steamer (R)','Depth Charts (R)','Average','Postseason']

minors = ['(AA)','(A)','(AAA)','(R)','(A+)','(A-)']

agg = ['Total','Postseason']


# now make a dictionary
PVals = {}

for player in PDict.keys()[0:20]:
    PVals[player] = {}
    #print player

    get_url = 'http://www.fangraphs.com/statss.aspx?playerid='+str(PDict[player].strip())

    r  = requests.get(get_url)

    data = r.text

    soup = BeautifulSoup(data)

    tables = soup.findAll('table')

    birth_year = float(get_birth_year(tables))

    PVals[player]['age'] = []
    PVals[player]['k9'] = []

    for indx,table in enumerate(tables):

        #print indx,[th.get_text() for th in table.find("tr").find_all("th")]
   
        if [th.get_text() for th in table.find("tr").find_all("th")] == itable:
        
            #print get_url
            print player
        
            #print [th.get_text() for th in table.find("tr").find_all("th")]
        
            for row in table.find_all("tr")[1:]:

                print row.findAll("td", class_="rgRow grid_postseason"})
                sav = [td.get_text() for td in row.find_all("td")]
            
                if (sav[1] not in projections) & \
                (np.sum([(x in sav[1]) for x in minors])==0) & \
                (np.sum([(x in sav[0]) for x in agg])==0): 
                    #print "'"+sav[-1]+"'"
                    
                    # current problem is that postseason is mixed in
                    
                    if (float(sav[8]) > 0.) & (float(sav[7])>50.) & (sav[-1]!=' '):
                        PVals[player]['age'].append(float(sav[0])-birth_year)
                        PVals[player]['k9'].append(float(sav[8]))
                    #if (float(sav[7])<50.):
                    #    print 'rejected for too few innings:',sav[0],float(sav[7])


In [None]:
fig = plt.figure(figsize=(4,3))

for player in PVals.keys():
    plt.plot(PVals[player]['age'],PVals[player]['k9'],color='black')

plt.xlabel('Age',size=18)
plt.ylabel('K/9',size=18)

In [None]:
for pval in PVals.keys(): print pval,PVals[pval]['age'],PVals[pval]['k9']


Demonstrate the different rows that are available for extracting year-on-year stats.

In [None]:
#
# for an individual player
#

player = 'Brad Peacock'


get_url = 'http://www.fangraphs.com/statss.aspx?playerid='+str(PDict[player].strip())

get_url = 'http://www.fangraphs.com/statss.aspx?playerid=5401'



projections = ['Depth Charts','Fans (14)','Steamer',\
               'ZiPS', 'ZiPS (R)','Steamer (R)','Depth Charts (R)','Average']

minors = ['(AA)','(A)','(AAA)','(R)','(A+)','(A-)']

agg = ['Total']

r  = requests.get(get_url)

data = r.text

soup = BeautifulSoup(data)


tables = soup.findAll('table')

birth_year = float(get_birth_year(tables))

age = []
k9 = []

for indx,table in enumerate(tables):

    print indx,[th.get_text() for th in table.find("tr").find_all("th")]
   
    if indx==10:
        
        #print [th.get_text() for th in table.find("tr").find_all("th")]
        
        for row in table.find_all("tr")[1:]:
            sav = [td.get_text() for td in row.find_all("td")]
            #if (sav[0]=='2017') & (sav[1] not in projections) & (minors not in sav[1]): print sav
            if (sav[1] not in projections) & \
            (np.sum([(x in sav[1]) for x in minors])==0) & \
            (np.sum([(x in sav[0]) for x in agg])==0): 
                age.append(float(sav[0])-birth_year)
                k9.append(float(sav[2]))
            #print sav[1],np.sum([(x in sav[1]) for x in minors]),
    #        #print sav[0]
            


Now we have the ability to automatically grab the stat page for any player!