In [1]:
import requests
import pandas as pd
import json
import numpy as np
import matplotlib.pyplot as plt
import statsapi

In [6]:
#Gets player stat data from MLB stats API
def playerStats(group):    
    
    #Gets the team ids required for the player stats endpoint
    url = f"http://lookup-service-prod.mlb.com/json/named.team_all_season.bam?sport_code='mlb'&all_star_sw='N'&sort_order='name_asc'&season=2020"
    response = requests.get(url)
    response = response.json()['team_all_season']['queryResults']['row']
    teams_df = pd.json_normalize(response)
    team_ids = teams_df[['mlb_org_brief','team_id']]

    #Gets the player stats
    data = []
    for row in team_ids.itertuples():
        for season in range(2000,2022):
            url = f"https://statsapi.mlb.com/api/v1/stats?stats=season&group={group}&gameType=R&season={season}&teamId={row.team_id}&playerPool=ALL"
            response = requests.get(url)
            data.append(season)
            data += response.json()['stats'][0]['splits']

    data_df = pd.json_normalize(data)

    #Cleans up the data
    for col in data_df:
        try:
            data_df[col] = data_df[col].astype('int32')
        except ValueError:
            pass

    data_df.columns = data_df.columns.map(lambda x: x.split(".")[-1])    
    data_df = data_df.dropna(how='all')

    return data_df



In [9]:
#Filters and renames the dataframe columns
fielding_data_df = playerStats('fielding')[['season','fullName','id','name','assists','putOuts','errors','chances','fielding','innings','games','gamesStarted','doublePlays','triplePlays','throwingErrors']]

fielding_data_df.columns = ['season','fullName','team_id','player_id','league_id','rank','team_name','league_abreviation','postionFullName', 'assists','putOuts','errors','chances','fielding',
    'innings','games','gamesStarted','doublePlays','triplePlays','throwingErrors']


hitting_data_df = playerStats('hitting')[['season','fullName','name','id','abbreviation','groundOuts','airOuts','runs','doubles','triples','homeRuns','strikeOuts',
    'baseOnBalls','intentionalWalks','hits','avg','atBats','obp','slg','ops','stolenBases','caughtStealing','groundIntoDoublePlay','numberOfPitches','plateAppearances',
    'totalBases','rbi','leftOnBase','sacBunts','sacFlies','babip']]

hitting_data_df.columns = ['season','fullName','team','league1','fullPosition','team_id','player_id','league_id','rank_id','leagueAbbreviation','pos_abbreviation','groundOuts','airOuts','runs','doubles','triples','homeRuns','strikeOuts',
    'baseOnBalls','intentionalWalks','hits','avg','atBats','obp','slg','ops','stolenBases','caughtStealing','groundIntoDoublePlay','numberOfPitches','plateAppearances',
    'totalBases','rbi','leftOnBase','sacBunts','sacFlies','babip']

pitching_data_df = playerStats('pitching')[['season','fullName','id','name','abbreviation','gamesStarted','groundOuts','airOuts','runs','doubles',
'triples','homeRuns','strikeOuts','baseOnBalls','intentionalWalks','hits','hitByPitch','avg','era','wins','losses','saves',
'saveOpportunities','holds','blownSaves','earnedRuns','whip','battersFaced', 'outs','gamesPitched','completeGames','shutouts',
'strikes','strikePercentage','hitBatsmen','balks','wildPitches','pickoffs','totalBases','pitchesPerInning','gamesFinished','strikeoutWalkRatio',
'strikeoutsPer9Inn','walksPer9Inn','runsScoredPer9','inheritedRunners','inheritedRunnersScored','sacBunts','sacFlies']]

pitching_data_df.columns = ['season','fullName','team_id','player_id','league_id','rank_id','team_name','league_name','posion_name','leagueAbbreviation','pos_abbreviation','gamesStarted','groundOuts','airOuts','runs','doubles',
'triples','homeRuns','strikeOuts','baseOnBalls','intentionalWalks','hits','hitByPitch','avg','era','wins','losses','saves',
'saveOpportunities','holds','blownSaves','earnedRuns','whip','battersFaced', 'outs','gamesPitched','completeGames','shutouts',
'strikes','strikePercentage','hitBatsmen','balks','wildPitches','pickoffs','totalBases','pitchesPerInning','gamesFinished','strikeoutWalkRatio',
'strikeoutsPer9Inn','walksPer9Inn','runsScoredPer9','inheritedRunners','inheritedRunnersScored','sacBunts','sacFlies']



fielding_data_df.to_csv('fielding.csv')
hitting_data_df.to_csv('hitting.csv')
pitching_data_df.to_csv('pitching.csv')