In [1]:
import pandas as pd
import requests
import statsapi
from bs4 import BeautifulSoup

import constants
import utils

### Scrape park factors from Fantasypros

In [2]:
PARK_FACTORS_URL = 'https://www.fantasypros.com/mlb/park-factors.php'
site_data = requests.get(PARK_FACTORS_URL).text
soup = BeautifulSoup(site_data)

Get table headers

In [3]:
table = soup.find('table')
headers = [header.text for header in table.find_all('th')]
headers.append('Splits')
headers

['Park Name', 'Runs', 'HR', '1B', '2B', '3B', 'Splits']

Parse table data

In [4]:
park_factors = []
for row in table.tbody.find_all('tr'):
    park_factor = []
    for data in row.find_all('td'):
        park_factor.append(data.text.strip())

    row_class = row.get('class')[0]
    if row_class == 'R':
        splits = 'RHB'
    elif row_class == 'L':
        splits = 'LHB'
    else:
        splits = 'All'
    park_factor.append(splits)
    park_factors.append(park_factor)
park_factors[:2]

[['Chase Field(Arizona Diamondbacks)',
  '0.994',
  '0.765',
  '1.095',
  '1.134',
  '1.303',
  'All'],
 ['Chase Field(Arizona Diamondbacks)',
  '1.025',
  '0.782',
  '1.072',
  '1.128',
  '1.453',
  'LHB']]

In [5]:
factors_df = pd.DataFrame(park_factors, columns=headers)
factors_df[['Park Name', 'Team']] = factors_df['Park Name'].str.split('(', expand=True)
factors_df['Team'] = factors_df['Team'].str[:-1]
factors_df.head(2)

Unnamed: 0,Park Name,Runs,HR,1B,2B,3B,Splits,Team
0,Chase Field,0.994,0.765,1.095,1.134,1.303,All,Arizona Diamondbacks
1,Chase Field,1.025,0.782,1.072,1.128,1.453,LHB,Arizona Diamondbacks


In [6]:
utils.save_dataframe(factors_df, constants.PARK_FACTORS_FILENAME)