In [286]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import pandas as pd

In [276]:
def get_games(date):
    url = 'https://www.mlb.com/scores/' + date
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    # get every game card on page
    games = soup.find_all('div',
                          class_ ="ScoresGamestyle__PaddingWrapper-sc-7t80if-5 btOCDf")
    status = soup.find_all('span', 
            class_="StatusLayerstyle__GameStateWrapper-sc-1s2c2o8-3 feaLYF")
    return games, status

In [281]:
def get_game_data(date):
    
    data = {'date':[], 'away_team': [], 'away_runs': [], 'away_hits': [],
            'away_errors': [], 'home_team': [], 'home_runs': [],
            'home_hits': [],'home_errors': []}
    games,  all_status = get_games(date)
    
    for i in range(len(games)):
        status = all_status[i].text
        if status[:5] != 'Final':
            continue
        teams  = games[i].find_all('div',
                           class_="TeamWrappersstyle__DesktopTeamWrapper-sc-uqs6qh-0 fdaoCu")
        data['date'].append(date)
        data['away_team'].append(teams[0].text)
        data['home_team'].append(teams[1].text)
        gdata = games[i].find('div',
                       class_="tablestyle__TableContainer-sc-wsl6eq-2 gNUrMZ").findChildren()
        away_runs,away_hits,away_errors = gdata[12].text, gdata[14].text,gdata[16].text
        home_runs,home_hits,home_errors = gdata[19].text, gdata[21].text,gdata[23].text
        data['away_runs'].append(away_runs)
        data['away_hits'].append(away_hits)
        data['away_errors'].append(away_errors)
        data['home_runs'].append(home_runs)
        data['home_hits'].append(home_hits)
        data['home_errors'].append(home_errors)
    return data

In [235]:
def combine_dicts(dict1, dict2):
    # only works for dicts with same keys
    dict_return = dict1.copy()
    for k in dict1.keys():
        dict_return[k] = dict1[k] + dict2[k]
    return dict_return

In [279]:
def create_df_for_year(start_date, end_date):
    date = start_date
    previous_dict = get_game_data(date)
    while date != end_date:
        date = list(map(int, date.split('-')))
        date = (datetime(*date) + timedelta(days=1)).strftime('%Y-%m-%d')
        game_dict = get_game_data(date)
        previous_dict = combine_dicts(previous_dict, game_dict)
    return pd.DataFrame(previous_dict)            

In [285]:
df_2019 = create_df_for_year('2019-03-20', '2019-09-29')

In [294]:
df_2019.to_csv('2019_scores.csv', index=False)