In [1]:
import requests
import json
from pandas import DataFrame, Series
import pandas as pd

# MLB DATA API Documentation - https://appac.github.io/mlb-data-api-docs/

In [2]:
full_mookie_url = "http://lookup-service-prod.mlb.com/json/named.search_player_all.bam?sport_code=%27mlb%27&active_sw=%27Y%27&name_part=%27mookie%25%27"

In [3]:
resp = requests.get(full_mookie_url)
resp

<Response [200]>

In [4]:
mookie_json = resp.json()
mookie_json

{'search_player_all': {'copyRight': ' Copyright 2022 MLB Advanced Media, L.P.  Use of any content on this page acknowledges agreement to the terms posted here http://gdx.mlb.com/components/copyright.txt  ',
  'queryResults': {'created': '2022-06-23T17:24:42',
   'totalSize': '1',
   'row': {'position': 'RF',
    'birth_country': 'USA',
    'weight': '180',
    'birth_state': 'TN',
    'name_display_first_last': 'Mookie Betts',
    'college': '',
    'height_inches': '9',
    'name_display_roster': 'Betts',
    'sport_code': 'mlb',
    'bats': 'R',
    'name_first': 'Mookie',
    'team_code': 'lan',
    'birth_city': 'Nashville',
    'height_feet': '5',
    'pro_debut_date': '2014-06-29T00:00:00',
    'team_full': 'Los Angeles Dodgers',
    'team_abbrev': 'LAD',
    'birth_date': '1992-10-07T00:00:00',
    'throws': 'R',
    'league': 'NL',
    'name_display_last_first': 'Betts, Mookie',
    'position_id': '9',
    'high_school': 'John Overton, Nashville, TN',
    'name_use': 'Mookie',


In [5]:
mookie_json['search_player_all']['queryResults']['row']

{'position': 'RF',
 'birth_country': 'USA',
 'weight': '180',
 'birth_state': 'TN',
 'name_display_first_last': 'Mookie Betts',
 'college': '',
 'height_inches': '9',
 'name_display_roster': 'Betts',
 'sport_code': 'mlb',
 'bats': 'R',
 'name_first': 'Mookie',
 'team_code': 'lan',
 'birth_city': 'Nashville',
 'height_feet': '5',
 'pro_debut_date': '2014-06-29T00:00:00',
 'team_full': 'Los Angeles Dodgers',
 'team_abbrev': 'LAD',
 'birth_date': '1992-10-07T00:00:00',
 'throws': 'R',
 'league': 'NL',
 'name_display_last_first': 'Betts, Mookie',
 'position_id': '9',
 'high_school': 'John Overton, Nashville, TN',
 'name_use': 'Mookie',
 'player_id': '605141',
 'name_last': 'Betts',
 'team_id': '119',
 'service_years': '',
 'active_sw': 'Y'}

# Restricting which variables you get back with ```col_in``` and ```col_ex```

#### to do that we add ```&search_player_all.col_ex={variable_we_don't_want}``` 
#### or ```&search_player_all.col_ex={variable_we_want}```
To add more then one variable seperate with a comma AND no space

In [6]:
# col_in
mookie_url2 = "http://lookup-service-prod.mlb.com/json/named.search_player_all.bam?sport_code=%27mlb%27&active_sw=%27Y%27&name_part=%27mookie%25%27&search_player_all.col_in=player_id,name_display_first_last,team_full,position,bats,throws"

In [7]:
resp2 = requests.get(mookie_url2)

In [8]:
mookie_json2 = resp2.json()
mookie_json2

{'search_player_all': {'copyRight': ' Copyright 2022 MLB Advanced Media, L.P.  Use of any content on this page acknowledges agreement to the terms posted here http://gdx.mlb.com/components/copyright.txt  ',
  'queryResults': {'created': '2022-06-23T17:24:42',
   'totalSize': '1',
   'row': {'position': 'RF',
    'bats': 'R',
    'throws': 'R',
    'name_display_first_last': 'Mookie Betts',
    'player_id': '605141',
    'team_full': 'Los Angeles Dodgers'}}}}

In [9]:
# col_ex
mookie_url3 = "http://lookup-service-prod.mlb.com/json/named.search_player_all.bam?sport_code=%27mlb%27&active_sw=%27Y%27&name_part=%27mookie%25%27&search_player_all.col_ex=player_id,name_display_first_last,team_full,position,bats,throws, position_id,name_use,name_last,team_id,name_display_roster,sport_code,name_first"

In [10]:
resp3 = requests.get(mookie_url3)
mookie_json3 = resp3.json()
mookie_json3

{'search_player_all': {'copyRight': ' Copyright 2022 MLB Advanced Media, L.P.  Use of any content on this page acknowledges agreement to the terms posted here http://gdx.mlb.com/components/copyright.txt  ',
  'queryResults': {'created': '2022-06-23T17:24:42',
   'totalSize': '1',
   'row': {'birth_country': 'USA',
    'weight': '180',
    'birth_state': 'TN',
    'team_abbrev': 'LAD',
    'birth_date': '1992-10-07T00:00:00',
    'college': '',
    'height_inches': '9',
    'league': 'NL',
    'name_display_last_first': 'Betts, Mookie',
    'position_id': '9',
    'high_school': 'John Overton, Nashville, TN',
    'team_code': 'lan',
    'height_feet': '5',
    'birth_city': 'Nashville',
    'pro_debut_date': '2014-06-29T00:00:00',
    'service_years': '',
    'active_sw': 'Y'}}}}

### If we look at some of the other urls in the documentation we'll notice they all start with the same thing. To make things easier we'll assign it to a constant variable

In [11]:
MLB_URL = 'http://lookup-service-prod.mlb.com/json/'

Now we can work with each endpoint separately

In [12]:
mookie_endpoint = 'named.search_player_all.bam?sport_code=%27mlb%27&active_sw=%27Y%27&name_part=%27mookie%25%27'

requests.get(MLB_URL + mookie_endpoint)

<Response [200]>

That's a bit easier but that endpoint is a bit overwhelming. Let's put it in a function. This will make it easier to think about

In [13]:
def player_search(to_search):
    search_endpoint = f'/named.search_player_all.bam?sport_code=%27mlb%27&active_sw=%27Y%27&name_part=%27{to_search}%25%27&search_player_all.col_in=player_id,name_display_first_last,team_full,position,bats,throws,team_id'
    
    resp = requests.get(MLB_URL + search_endpoint)
    try:
        return resp.json()['search_player_all']['queryResults']['row']
    except KeyError:
        print("ERROR: Not a player")
        

In [14]:
# Note how we're putting whatever is in ```to_search``` inside our URL with f strings
yelich = player_search('yelich')
yelich

{'position': 'LF',
 'bats': 'L',
 'throws': 'R',
 'name_display_first_last': 'Christian Yelich',
 'player_id': '592885',
 'team_id': '158',
 'team_full': 'Milwaukee Brewers'}

Always good to try out a few use cases and make sure it works properly. What if we put a players full name?

In [15]:
aaron_judge = player_search('aaron judge')
aaron_judge

{'position': 'RF',
 'bats': 'R',
 'throws': 'R',
 'name_display_first_last': 'Aaron Judge',
 'player_id': '592450',
 'team_id': '147',
 'team_full': 'New York Yankees'}

In [16]:
mike_trout = player_search('mike trout')
mike_trout

{'position': 'CF',
 'bats': 'R',
 'throws': 'R',
 'name_display_first_last': 'Mike Trout',
 'player_id': '545361',
 'team_id': '108',
 'team_full': 'Los Angeles Angels'}

In [17]:
bellingers = player_search('bellinger')
bellingers

[{'position': '1B',
  'bats': 'R',
  'throws': 'R',
  'name_display_first_last': 'Clay Bellinger',
  'player_id': '150083',
  'team_id': '108',
  'team_full': 'Los Angeles Angels'},
 {'position': 'CF',
  'bats': 'L',
  'throws': 'L',
  'name_display_first_last': 'Cody Bellinger',
  'player_id': '641355',
  'team_id': '119',
  'team_full': 'Los Angeles Dodgers'}]

In [18]:
pete_alonso = player_search('pete alonso')
pete_alonso

{'position': '1B',
 'bats': 'R',
 'throws': 'R',
 'name_display_first_last': 'Pete Alonso',
 'player_id': '624413',
 'team_id': '121',
 'team_full': 'New York Mets'}

In [19]:
not_a_player = player_search('askf')
not_a_player

ERROR: Not a player


# Hitting and Pitching Stats

There are endpoints for hitting and pitching stats, both by season and career. All endpoints take a numeric player id. The specific season htting and pitcher stas also take a year

In [20]:
# Get the current Roster of the Brewers 
brewers_url = MLB_URL + f'/named.roster_40.bam?team_id=%27158%27'

brewers_resp = requests.get(brewers_url)

brewers_dict = json.loads(brewers_resp.text)['roster_40']['queryResults']['row']
brewers_dict

[{'college': '',
  'end_date': '',
  'pro_debut_date': '2018-05-22T00:00:00',
  'status_code': 'A',
  'name_full': 'Adames, Willy',
  'name_display_first_last': 'Willy Adames',
  'birth_date': '1995-09-02T00:00:00',
  'height_inches': '0',
  'team_id': '158',
  'name_last': 'Adames',
  'bats': 'R',
  'player_id': '642715',
  'position_txt': 'SS',
  'primary_position': '6',
  'jersey_number': '27',
  'starter_sw': 'N',
  'start_date': '2021-05-21T00:00:00',
  'name_display_last_first': 'Adames, Willy',
  'name_first': 'Willy',
  'name_use': 'Willy',
  'weight': '210',
  'throws': 'R',
  'team_name': 'Milwaukee Brewers',
  'team_code': 'mil',
  'team_abbrev': 'MIL',
  'height_feet': '6'},
 {'college': 'Menlo, CA',
  'end_date': '',
  'pro_debut_date': '2022-06-01T00:00:00',
  'status_code': 'A',
  'name_full': 'Alexander, Jason',
  'name_display_first_last': 'Jason Alexander',
  'birth_date': '1993-03-01T00:00:00',
  'height_inches': '3',
  'team_id': '158',
  'name_last': 'Alexander',
 

In [21]:
# same but with the Yankees
yankees_url = MLB_URL + f'/named.roster_40.bam?team_id=%27147%27'

yankees_resp = requests.get(yankees_url)

yankees_dict = json.loads(yankees_resp.text)['roster_40']['queryResults']['row']
yankees_dict

[{'college': '',
  'end_date': '',
  'pro_debut_date': '2020-08-08T00:00:00',
  'status_code': 'RM',
  'name_full': 'Abreu, Albert',
  'name_display_first_last': 'Albert Abreu',
  'birth_date': '1995-09-26T00:00:00',
  'height_inches': '2',
  'team_id': '147',
  'name_last': 'Abreu',
  'bats': 'R',
  'player_id': '656061',
  'position_txt': 'P',
  'primary_position': '1',
  'jersey_number': '',
  'starter_sw': 'N',
  'start_date': '2022-06-21T00:00:00',
  'name_display_last_first': 'Abreu, Albert',
  'name_first': 'Albert',
  'name_use': 'Albert',
  'weight': '190',
  'throws': 'R',
  'team_name': 'New York Yankees',
  'team_code': 'nya',
  'team_abbrev': 'NYY',
  'height_feet': '6'},
 {'college': '',
  'end_date': '',
  'pro_debut_date': '2017-06-28T00:00:00',
  'status_code': 'RM',
  'name_full': 'Andujar, Miguel',
  'name_display_first_last': 'Miguel Andujar',
  'birth_date': '1995-03-02T00:00:00',
  'height_inches': '0',
  'team_id': '147',
  'name_last': 'Andujar',
  'bats': 'R',


In [22]:
mets_url = MLB_URL + f'/named.roster_40.bam?team_id=%27121%27'
mets_resp = requests.get(mets_url)

mets_dict = json.loads(mets_resp.text)['roster_40']['queryResults']['row']

In [23]:
# Url function
def current_40_man_roster(team_id):
    roster_url = MLB_URL + f'/named.roster_40.bam?team_id=%27{team_id}%27'
    
    resp = requests.get(roster_url)

    roster_dict = json.loads(resp.text)['roster_40']['queryResults']['row']
    return roster_dict

The only tricky thing is that running ```resp.json()``` on our response object returns a decoding error, which means the response we're getting back isn't valid json.

If we look at the text with ```resp.text``` we can see the problem is the JSON is wrapped in parenthesis and has a semi-colon on the end. We could find a replace these characters like this:

In [24]:
# Note: Calling multiple methods like this in a row is called chaining.
clean_resp_text = (yankees_resp.text
                  .replace(')', '')
                  .replace(')', '')
                  .replace(';', ''))

In [25]:
# When chaining multiple methods together, write them over multple lines to make it clearer
# As opposed to:
clean_resp_text2 = (resp.text.replace('(','').replace(')', '').replace(';', ''))

In [26]:
yankees_dict = (json.loads(clean_resp_text)['roster_40']['queryResults']['row'])
yankees_dict

[{'college': '',
  'end_date': '',
  'pro_debut_date': '2020-08-08T00:00:00',
  'status_code': 'RM',
  'name_full': 'Abreu, Albert',
  'name_display_first_last': 'Albert Abreu',
  'birth_date': '1995-09-26T00:00:00',
  'height_inches': '2',
  'team_id': '147',
  'name_last': 'Abreu',
  'bats': 'R',
  'player_id': '656061',
  'position_txt': 'P',
  'primary_position': '1',
  'jersey_number': '',
  'starter_sw': 'N',
  'start_date': '2022-06-21T00:00:00',
  'name_display_last_first': 'Abreu, Albert',
  'name_first': 'Albert',
  'name_use': 'Albert',
  'weight': '190',
  'throws': 'R',
  'team_name': 'New York Yankees',
  'team_code': 'nya',
  'team_abbrev': 'NYY',
  'height_feet': '6'},
 {'college': '',
  'end_date': '',
  'pro_debut_date': '2017-06-28T00:00:00',
  'status_code': 'RM',
  'name_full': 'Andujar, Miguel',
  'name_display_first_last': 'Miguel Andujar',
  'birth_date': '1995-03-02T00:00:00',
  'height_inches': '0',
  'team_id': '147',
  'name_last': 'Andujar',
  'bats': 'R',


In [27]:
yankees_dict[2]

{'college': '',
 'end_date': '',
 'pro_debut_date': '2015-07-02T00:00:00',
 'status_code': 'A',
 'name_full': 'Banuelos, Manny',
 'name_display_first_last': 'Manny Banuelos',
 'birth_date': '1991-03-13T00:00:00',
 'height_inches': '10',
 'team_id': '147',
 'name_last': 'Banuelos',
 'bats': 'R',
 'player_id': '544365',
 'position_txt': 'P',
 'primary_position': '1',
 'jersey_number': '68',
 'starter_sw': 'N',
 'start_date': '2022-05-26T00:00:00',
 'name_display_last_first': 'Banuelos, Manny',
 'name_first': 'Manuel',
 'name_use': 'Manny',
 'weight': '215',
 'throws': 'L',
 'team_name': 'New York Yankees',
 'team_code': 'nya',
 'team_abbrev': 'NYY',
 'height_feet': '5'}

In [28]:
yankees_roster_df = pd.DataFrame(yankees_dict)
yankees_roster_df.loc[(yankees_roster_df['position_txt'] == 'P') & (yankees_roster_df['status_code'] != 'RM')] 

Unnamed: 0,college,end_date,pro_debut_date,status_code,name_full,name_display_first_last,birth_date,height_inches,team_id,name_last,...,start_date,name_display_last_first,name_first,name_use,weight,throws,team_name,team_code,team_abbrev,height_feet
2,,,2015-07-02T00:00:00,A,"Banuelos, Manny",Manny Banuelos,1991-03-13T00:00:00,10,147,Banuelos,...,2022-05-26T00:00:00,"Banuelos, Manny",Manuel,Manny,215,L,New York Yankees,nya,NYY,5
5,,,2015-04-06T00:00:00,A,"Castro, Miguel",Miguel Castro,1994-12-24T00:00:00,7,147,Castro,...,2022-04-03T00:00:00,"Castro, Miguel",Miguel,Miguel,205,R,New York Yankees,nya,NYY,6
6,,,2010-08-31T00:00:00,D15,"Chapman, Aroldis",Aroldis Chapman,1988-02-28T00:00:00,4,147,Chapman,...,2016-12-15T00:00:00,"Chapman, Aroldis",Albertin,Aroldis,218,L,New York Yankees,nya,NYY,6
7,UCLA,,2013-06-11T00:00:00,A,"Cole, Gerrit",Gerrit Cole,1990-09-08T00:00:00,4,147,Cole,...,2019-12-18T00:00:00,"Cole, Gerrit",Gerrit,Gerrit,220,R,New York Yankees,nya,NYY,6
8,,,2018-03-31T00:00:00,A,"Cortes, Nestor",Nestor Cortes,1994-12-10T00:00:00,11,147,Cortes,...,2021-05-30T00:00:00,"Cortes, Nestor",Nestor,Nestor,210,L,New York Yankees,nya,NYY,5
18,,,2018-04-06T00:00:00,A,"Holmes, Clay",Clay Holmes,1993-03-27T00:00:00,5,147,Holmes,...,2021-07-26T00:00:00,"Holmes, Clay",Clayton,Clay,245,R,New York Yankees,nya,NYY,6
21,Boston College,,2019-09-27T00:00:00,A,"King, Michael",Michael King,1995-05-25T00:00:00,3,147,King,...,2019-09-19T00:00:00,"King, Michael",Michael,Michael,210,R,New York Yankees,nya,NYY,6
23,,,2018-06-15T00:00:00,D15,"Loaisiga, Jonathan",Jonathan Loaisiga,1994-11-02T00:00:00,11,147,Loaisiga,...,2017-11-20T00:00:00,"Loaisiga, Jonathan",Jonathan,Jonathan,165,R,New York Yankees,nya,NYY,5
25,Rice,,2012-04-07T00:00:00,A,"Luetge, Lucas",Lucas Luetge,1987-03-24T00:00:00,4,147,Luetge,...,2021-03-31T00:00:00,"Luetge, Lucas",Lucas,Lucas,205,L,New York Yankees,nya,NYY,6
26,Delaware,,2022-04-09T00:00:00,A,"Marinaccio, Ron",Ron Marinaccio,1995-07-01T00:00:00,2,147,Marinaccio,...,2021-11-19T00:00:00,"Marinaccio, Ron",Ronald,Ron,205,R,New York Yankees,nya,NYY,6


In [29]:
yankees_roster_df.loc[(yankees_roster_df['position_txt'] != 'P') & (yankees_roster_df['status_code'] != 'RM')]

Unnamed: 0,college,end_date,pro_debut_date,status_code,name_full,name_display_first_last,birth_date,height_inches,team_id,name_last,...,start_date,name_display_last_first,name_first,name_use,weight,throws,team_name,team_code,team_abbrev,height_feet
4,Texas Christian,,2011-06-04T00:00:00,A,"Carpenter, Matt",Matt Carpenter,1985-11-26T00:00:00,4,147,Carpenter,...,2022-05-26T00:00:00,"Carpenter, Matt",Matthew,Matt,210,R,New York Yankees,nya,NYY,6
9,Auburn,,2010-04-30T00:00:00,A,"Donaldson, Josh",Josh Donaldson,1985-12-08T00:00:00,1,147,Donaldson,...,2022-03-13T00:00:00,"Donaldson, Josh",Joshua,Josh,210,R,New York Yankees,nya,NYY,6
11,,,2015-06-02T00:00:00,A,"Gallo, Joey",Joey Gallo,1993-11-19T00:00:00,5,147,Gallo,...,2021-07-29T00:00:00,"Gallo, Joey",Joseph,Joey,250,R,New York Yankees,nya,NYY,6
15,,,2012-04-06T00:00:00,A,"Gonzalez, Marwin",Marwin Gonzalez,1989-03-14T00:00:00,1,147,Gonzalez,...,2022-04-07T00:00:00,"Gonzalez, Marwin",Marwin,Marwin,205,R,New York Yankees,nya,NYY,6
16,,,2013-04-01T00:00:00,A,"Hicks, Aaron",Aaron Hicks,1989-10-02T00:00:00,1,147,Hicks,...,2015-11-11T00:00:00,"Hicks, Aaron",Aaron,Aaron,205,R,New York Yankees,nya,NYY,6
17,,,2017-04-10T00:00:00,A,"Higashioka, Kyle",Kyle Higashioka,1990-04-20T00:00:00,1,147,Higashioka,...,2016-11-04T00:00:00,"Higashioka, Kyle",Kyle,Kyle,202,R,New York Yankees,nya,NYY,6
19,Fresno State,,2016-08-13T00:00:00,A,"Judge, Aaron",Aaron Judge,1992-04-26T00:00:00,7,147,Judge,...,2016-08-13T00:00:00,"Judge, Aaron",Aaron,Aaron,282,R,New York Yankees,nya,NYY,6
20,,,2018-04-10T00:00:00,A,"Kiner-Falefa, Isiah",Isiah Kiner-Falefa,1995-03-23T00:00:00,11,147,Kiner-Falefa,...,2022-03-13T00:00:00,"Kiner-Falefa, Isiah",Isiah,Isiah,190,R,New York Yankees,nya,NYY,5
22,Louisiana State,,2011-05-30T00:00:00,A,"LeMahieu, DJ",DJ LeMahieu,1988-07-13T00:00:00,4,147,LeMahieu,...,2021-01-27T00:00:00,"LeMahieu, DJ",David,DJ,220,R,New York Yankees,nya,NYY,6
32,,,2011-06-09T00:00:00,A,"Rizzo, Anthony",Anthony Rizzo,1989-08-08T00:00:00,3,147,Rizzo,...,2022-03-17T00:00:00,"Rizzo, Anthony",Anthony,Anthony,240,L,New York Yankees,nya,NYY,6


In [30]:
mets_roster_df = pd.DataFrame(mets_dict)
mets_roster_df.loc[(mets_roster_df['position_txt'] == 'P') & (mets_roster_df['status_code'] != 'RM')] 

Unnamed: 0,college,end_date,pro_debut_date,status_code,name_full,name_display_first_last,birth_date,height_inches,team_id,name_last,...,start_date,name_display_last_first,name_first,name_use,weight,throws,team_name,team_code,team_abbrev,height_feet
1,Akron,,2014-08-30T00:00:00,A,"Bassitt, Chris",Chris Bassitt,1989-02-22T00:00:00,5,121,Bassitt,...,2022-03-12T00:00:00,"Bassitt, Chris",Christopher,Chris,217,R,New York Mets,nyn,NYM,6
4,,,2009-09-01T00:00:00,A,"Carrasco, Carlos",Carlos Carrasco,1987-03-21T00:00:00,4,121,Carrasco,...,2021-01-07T00:00:00,"Carrasco, Carlos",Carlos,Carlos,224,R,New York Mets,nyn,NYM,6
6,,,2016-06-06T00:00:00,A,"Diaz, Edwin",Edwin Diaz,1994-03-22T00:00:00,3,121,Diaz,...,2018-12-03T00:00:00,"Diaz, Edwin",Edwin,Edwin,165,R,New York Mets,nyn,NYM,6
9,Heartland CC,,2022-05-15T00:00:00,D15,"Holderman, Colin",Colin Holderman,1995-10-08T00:00:00,7,121,Holderman,...,2022-05-15T00:00:00,"Holderman, Colin",Colin,Colin,240,R,New York Mets,nyn,NYM,6
10,Alabama,,2008-08-01T00:00:00,A,"Hunter, Tommy",Tommy Hunter,1986-07-03T00:00:00,3,121,Hunter,...,2022-06-17T00:00:00,"Hunter, Tommy",Raymond,Tommy,250,R,New York Mets,nyn,NYM,6
14,,,2018-09-09T00:00:00,A,"Lopez, Yoan",Yoan Lopez,1993-01-02T00:00:00,3,121,Lopez,...,2022-03-29T00:00:00,"Lopez, Yoan",Yoan,Yoan,208,R,New York Mets,nyn,NYM,6
15,Centenary College of Louisiana,,2016-07-01T00:00:00,PL,"Lugo, Seth",Seth Lugo,1989-11-17T00:00:00,4,121,Lugo,...,2015-11-20T00:00:00,"Lugo, Seth",Jacob,Seth,225,R,New York Mets,nyn,NYM,6
21,,,2020-09-20T00:00:00,A,"Medina, Adonis",Adonis Medina,1996-12-18T00:00:00,1,121,Medina,...,2022-04-07T00:00:00,"Medina, Adonis",Adonis,Adonis,187,R,New York Mets,nyn,NYM,6
22,Arizona,,2021-06-23T00:00:00,D15,"Megill, Tylor",Tylor Megill,1995-07-28T00:00:00,7,121,Megill,...,2021-06-23T00:00:00,"Megill, Tylor",Tylor,Tylor,230,R,New York Mets,nyn,NYM,6
26,Northeastern,,2010-05-29T00:00:00,A,"Ottavino, Adam",Adam Ottavino,1985-11-22T00:00:00,5,121,Ottavino,...,2022-03-14T00:00:00,"Ottavino, Adam",Adam,Adam,246,R,New York Mets,nyn,NYM,6


In [31]:
mets_roster_df.loc[(mets_roster_df['position_txt'] == 'P')] 

Unnamed: 0,college,end_date,pro_debut_date,status_code,name_full,name_display_first_last,birth_date,height_inches,team_id,name_last,...,start_date,name_display_last_first,name_first,name_use,weight,throws,team_name,team_code,team_abbrev,height_feet
1,Akron,,2014-08-30T00:00:00,A,"Bassitt, Chris",Chris Bassitt,1989-02-22T00:00:00,5,121,Bassitt,...,2022-03-12T00:00:00,"Bassitt, Chris",Christopher,Chris,217,R,New York Mets,nyn,NYM,6
2,,,,RM,"Butto, Jose",Jose Butto,1998-03-19T00:00:00,1,121,Butto,...,2021-11-19T00:00:00,"Butto, Jose",Jose,Jose,202,R,New York Mets,nyn,NYM,6
4,,,2009-09-01T00:00:00,A,"Carrasco, Carlos",Carlos Carrasco,1987-03-21T00:00:00,4,121,Carrasco,...,2021-01-07T00:00:00,"Carrasco, Carlos",Carlos,Carlos,224,R,New York Mets,nyn,NYM,6
6,,,2016-06-06T00:00:00,A,"Diaz, Edwin",Edwin Diaz,1994-03-22T00:00:00,3,121,Diaz,...,2018-12-03T00:00:00,"Diaz, Edwin",Edwin,Edwin,165,R,New York Mets,nyn,NYM,6
9,Heartland CC,,2022-05-15T00:00:00,D15,"Holderman, Colin",Colin Holderman,1995-10-08T00:00:00,7,121,Holderman,...,2022-05-15T00:00:00,"Holderman, Colin",Colin,Colin,240,R,New York Mets,nyn,NYM,6
10,Alabama,,2008-08-01T00:00:00,A,"Hunter, Tommy",Tommy Hunter,1986-07-03T00:00:00,3,121,Hunter,...,2022-06-17T00:00:00,"Hunter, Tommy",Raymond,Tommy,250,R,New York Mets,nyn,NYM,6
14,,,2018-09-09T00:00:00,A,"Lopez, Yoan",Yoan Lopez,1993-01-02T00:00:00,3,121,Lopez,...,2022-03-29T00:00:00,"Lopez, Yoan",Yoan,Yoan,208,R,New York Mets,nyn,NYM,6
15,Centenary College of Louisiana,,2016-07-01T00:00:00,PL,"Lugo, Seth",Seth Lugo,1989-11-17T00:00:00,4,121,Lugo,...,2015-11-20T00:00:00,"Lugo, Seth",Jacob,Seth,225,R,New York Mets,nyn,NYM,6
21,,,2020-09-20T00:00:00,A,"Medina, Adonis",Adonis Medina,1996-12-18T00:00:00,1,121,Medina,...,2022-04-07T00:00:00,"Medina, Adonis",Adonis,Adonis,187,R,New York Mets,nyn,NYM,6
22,Arizona,,2021-06-23T00:00:00,D15,"Megill, Tylor",Tylor Megill,1995-07-28T00:00:00,7,121,Megill,...,2021-06-23T00:00:00,"Megill, Tylor",Tylor,Tylor,230,R,New York Mets,nyn,NYM,6


In [32]:
mets_roster_df.loc[(mets_roster_df['position_txt'] != 'P') & (yankees_roster_df['status_code'] != 'RM')]

Unnamed: 0,college,end_date,pro_debut_date,status_code,name_full,name_display_first_last,birth_date,height_inches,team_id,name_last,...,start_date,name_display_last_first,name_first,name_use,weight,throws,team_name,team_code,team_abbrev,height_feet
5,Cal State Fullerton,,2017-08-05T00:00:00,A,"Davis, J.D.",J.D. Davis,1993-04-27T00:00:00,3,121,Davis,...,2019-01-06T00:00:00,"Davis, J.D.",Jonathan,J.D.,218,R,New York Mets,nyn,NYM,6
7,,,2011-09-02T00:00:00,A,"Escobar, Eduardo",Eduardo Escobar,1989-01-05T00:00:00,10,121,Escobar,...,2021-12-01T00:00:00,"Escobar, Eduardo",Eduardo,Eduardo,193,R,New York Mets,nyn,NYM,5
8,,,2018-05-11T00:00:00,A,"Guillorme, Luis",Luis Guillorme,1994-09-27T00:00:00,10,121,Guillorme,...,2017-11-20T00:00:00,"Guillorme, Luis",Luis,Luis,190,R,New York Mets,nyn,NYM,5
11,SUNY Stony Brook,,2015-08-21T00:00:00,D10,"Jankowski, Travis",Travis Jankowski,1991-06-15T00:00:00,2,121,Jankowski,...,2022-04-06T00:00:00,"Jankowski, Travis",Travis,Travis,190,R,New York Mets,nyn,NYM,6
16,,,2012-07-26T00:00:00,A,"Marte, Starling",Starling Marte,1988-10-09T00:00:00,1,121,Marte,...,2021-11-30T00:00:00,"Marte, Starling",Starling,Starling,195,R,New York Mets,nyn,NYM,6
17,,,,RM,"Mauricio, Ronny",Ronny Mauricio,2001-04-04T00:00:00,3,121,Mauricio,...,2021-11-19T00:00:00,"Mauricio, Ronny",Ronny,Ronny,166,R,New York Mets,nyn,NYM,6
18,Stetson,,2021-04-11T00:00:00,A,"Mazeika, Patrick",Patrick Mazeika,1993-10-14T00:00:00,3,121,Mazeika,...,2020-08-25T00:00:00,"Mazeika, Patrick",Patrick,Patrick,210,R,New York Mets,nyn,NYM,6
19,Arkansas,,2014-09-01T00:00:00,D10,"McCann, James",James McCann,1990-06-13T00:00:00,3,121,McCann,...,2020-12-15T00:00:00,"McCann, James",James,James,220,R,New York Mets,nyn,NYM,6
20,Long Beach State,,2018-07-24T00:00:00,A,"McNeil, Jeff",Jeff McNeil,1992-04-08T00:00:00,1,121,McNeil,...,2018-07-24T00:00:00,"McNeil, Jeff",Jeff,Jeff,195,R,New York Mets,nyn,NYM,6
23,,,2017-09-13T00:00:00,A,"Nido, Tomas",Tomas Nido,1994-04-12T00:00:00,0,121,Nido,...,2016-11-18T00:00:00,"Nido, Tomas",Tomas,Tomas,211,R,New York Mets,nyn,NYM,6


To reverse making a DataFrame (maybe to write your own API that needs to return JSON or something) you can do:

In [33]:
yankees_roster_df.head(2).to_dict('records')

[{'college': '',
  'end_date': '',
  'pro_debut_date': '2020-08-08T00:00:00',
  'status_code': 'RM',
  'name_full': 'Abreu, Albert',
  'name_display_first_last': 'Albert Abreu',
  'birth_date': '1995-09-26T00:00:00',
  'height_inches': '2',
  'team_id': '147',
  'name_last': 'Abreu',
  'bats': 'R',
  'player_id': '656061',
  'position_txt': 'P',
  'primary_position': '1',
  'jersey_number': '',
  'starter_sw': 'N',
  'start_date': '2022-06-21T00:00:00',
  'name_display_last_first': 'Abreu, Albert',
  'name_first': 'Albert',
  'name_use': 'Albert',
  'weight': '190',
  'throws': 'R',
  'team_name': 'New York Yankees',
  'team_code': 'nya',
  'team_abbrev': 'NYY',
  'height_feet': '6'},
 {'college': '',
  'end_date': '',
  'pro_debut_date': '2017-06-28T00:00:00',
  'status_code': 'RM',
  'name_full': 'Andujar, Miguel',
  'name_display_first_last': 'Miguel Andujar',
  'birth_date': '1995-03-02T00:00:00',
  'height_inches': '0',
  'team_id': '147',
  'name_last': 'Andujar',
  'bats': 'R',


In [34]:
def clean_json(resp):
    return json.loads(resp.text
                  .replace(')', '')
                  .replace(')', '')
                  .replace(';', ''))

# CAUTION: 
### ```roster40``` 
below only has an endpoint for the current year AND always returns the current roster so doing ```pd.concat``` with ```teams_by_year``` is  controversial. For example, if you change ```teams_2020``` parameter to '1900' about 5 cells down, the function ```roster40``` will only be able to return CURRENT players (2022) to teams that were physically around in the year '1900'. To get a better idea of stats from a specific year, the endpoint from the API must include the ```season``` parameter to avoid confusion


In [35]:
def roster40(team_id):
    roster_url = MLB_URL + f'/named.roster_40.bam?team_id=%27{team_id}%27'
    
    resp = requests.get(roster_url)

    team_json = clean_json(resp)
    return DataFrame(team_json['roster_40']['queryResults']['row'])

In [36]:
YANKEES_CURRENT_ROSTER = roster40(147)
YANKEES_CURRENT_ROSTER

Unnamed: 0,college,end_date,pro_debut_date,status_code,name_full,name_display_first_last,birth_date,height_inches,team_id,name_last,...,start_date,name_display_last_first,name_first,name_use,weight,throws,team_name,team_code,team_abbrev,height_feet
0,,,2020-08-08T00:00:00,RM,"Abreu, Albert",Albert Abreu,1995-09-26T00:00:00,2,147,Abreu,...,2022-06-21T00:00:00,"Abreu, Albert",Albert,Albert,190,R,New York Yankees,nya,NYY,6
1,,,2017-06-28T00:00:00,RM,"Andujar, Miguel",Miguel Andujar,1995-03-02T00:00:00,0,147,Andujar,...,2016-11-18T00:00:00,"Andujar, Miguel",Miguel,Miguel,211,R,New York Yankees,nya,NYY,6
2,,,2015-07-02T00:00:00,A,"Banuelos, Manny",Manny Banuelos,1991-03-13T00:00:00,10,147,Banuelos,...,2022-05-26T00:00:00,"Banuelos, Manny",Manuel,Manny,215,L,New York Yankees,nya,NYY,5
3,,,,RM,"Cabrera, Oswaldo",Oswaldo Cabrera,1999-03-01T00:00:00,10,147,Cabrera,...,2021-11-19T00:00:00,"Cabrera, Oswaldo",Oswaldo,Oswaldo,145,R,New York Yankees,nya,NYY,5
4,Texas Christian,,2011-06-04T00:00:00,A,"Carpenter, Matt",Matt Carpenter,1985-11-26T00:00:00,4,147,Carpenter,...,2022-05-26T00:00:00,"Carpenter, Matt",Matthew,Matt,210,R,New York Yankees,nya,NYY,6
5,,,2015-04-06T00:00:00,A,"Castro, Miguel",Miguel Castro,1994-12-24T00:00:00,7,147,Castro,...,2022-04-03T00:00:00,"Castro, Miguel",Miguel,Miguel,205,R,New York Yankees,nya,NYY,6
6,,,2010-08-31T00:00:00,D15,"Chapman, Aroldis",Aroldis Chapman,1988-02-28T00:00:00,4,147,Chapman,...,2016-12-15T00:00:00,"Chapman, Aroldis",Albertin,Aroldis,218,L,New York Yankees,nya,NYY,6
7,UCLA,,2013-06-11T00:00:00,A,"Cole, Gerrit",Gerrit Cole,1990-09-08T00:00:00,4,147,Cole,...,2019-12-18T00:00:00,"Cole, Gerrit",Gerrit,Gerrit,220,R,New York Yankees,nya,NYY,6
8,,,2018-03-31T00:00:00,A,"Cortes, Nestor",Nestor Cortes,1994-12-10T00:00:00,11,147,Cortes,...,2021-05-30T00:00:00,"Cortes, Nestor",Nestor,Nestor,210,L,New York Yankees,nya,NYY,5
9,Auburn,,2010-04-30T00:00:00,A,"Donaldson, Josh",Josh Donaldson,1985-12-08T00:00:00,1,147,Donaldson,...,2022-03-13T00:00:00,"Donaldson, Josh",Joshua,Josh,210,R,New York Yankees,nya,NYY,6


In [37]:
YANKEES_CURRENT_ROSTER.columns

Index(['college', 'end_date', 'pro_debut_date', 'status_code', 'name_full',
       'name_display_first_last', 'birth_date', 'height_inches', 'team_id',
       'name_last', 'bats', 'player_id', 'position_txt', 'primary_position',
       'jersey_number', 'starter_sw', 'start_date', 'name_display_last_first',
       'name_first', 'name_use', 'weight', 'throws', 'team_name', 'team_code',
       'team_abbrev', 'height_feet'],
      dtype='object')

In [38]:
YANKEES_CURRENT_ROSTER.loc[YANKEES_CURRENT_ROSTER['name_last'] == 'Judge']['player_id']

19    592450
Name: player_id, dtype: object

In [39]:
def teams_by_year(year):
    teams_url = MLB_URL + f'/named.team_all_season.bam?sport_code=%27mlb%27&all_star_sw=%27N%27&sort_order=name_asc&season=%27{year}%27'
    resp = requests.get(teams_url)
    teams_json = clean_json(resp)
    return DataFrame(teams_json['team_all_season']['queryResults']['row'])

In [40]:
# Specifically TEAMS by year!!! Not rosters on teams
teams_2020 = teams_by_year(2020)
teams_2020.head()

Unnamed: 0,venue_short,sport_id,league_abbrev,team_id,spring_league_id,active_sw,division,mlb_org_brief,season,first_year_of_play,...,address_city,team_code,mlb_org_abbrev,address_intl,time_zone_generic,website_url,sport_code_display,home_opener_time,mlb_org_short,league_id
0,Chase Field,1,NL,109,114,Y,W,D-backs,2020,1996,...,Phoenix,ari,ARI,N,MST,,Major League Baseball,9:40:00 PM,Arizona,104
1,Truist Park,1,NL,144,115,Y,E,Braves,2020,1871,...,Atlanta,atl,ATL,N,ET,,Major League Baseball,7:10:00 PM,Atlanta,104
2,Oriole Park,1,AL,110,115,Y,E,Orioles,2020,1901,...,Baltimore,bal,BAL,N,ET,,Major League Baseball,7:35:00 PM,Baltimore,103
3,Fenway Park,1,AL,111,115,Y,E,Red Sox,2020,1901,...,Boston,bos,BOS,N,ET,,Major League Baseball,7:30:00 PM,Boston,103
4,Wrigley Field,1,NL,112,114,Y,C,Cubs,2020,1874,...,Chicago,chn,CHC,N,CT,,Major League Baseball,7:10:00 PM,Chi Cubs,104


In [41]:
TEAMS_1900 = teams_by_year(1900)

In [42]:
teams_2020.columns

Index(['venue_short', 'sport_id', 'league_abbrev', 'team_id',
       'spring_league_id', 'active_sw', 'division', 'mlb_org_brief', 'season',
       'first_year_of_play', 'state', 'name_short', 'bis_team_code',
       'venue_id', 'name_display_short', 'name_display_long',
       'name_display_brief', 'sport_code_name', 'spring_league', 'league',
       'division_id', 'sport_code', 'time_zone_num', 'mlb_org',
       'name_display_full', 'all_star_sw', 'division_abbrev', 'name',
       'home_opener', 'phone_number', 'address_zip', 'time_zone_text',
       'venue_name', 'division_full', 'franchise_code', 'city',
       'time_zone_alt', 'address_state', 'name_abbrev', 'store_url',
       'file_code', 'address_line3', 'address_line2', 'address_province',
       'mlb_org_id', 'address_line1', 'spring_league_full',
       'spring_league_abbrev', 'last_year_of_play', 'address', 'league_full',
       'address_country', 'base_url', 'time_zone', 'address_city', 'team_code',
       'mlb_org_abbrev'

In [43]:
rosters_all = pd.concat([roster40(x) for x in teams_2020['mlb_org_id']], ignore_index=True)
rosters_all.columns

Index(['college', 'end_date', 'pro_debut_date', 'status_code', 'name_full',
       'name_display_first_last', 'birth_date', 'height_inches', 'team_id',
       'name_last', 'bats', 'player_id', 'position_txt', 'primary_position',
       'jersey_number', 'starter_sw', 'start_date', 'name_display_last_first',
       'name_first', 'name_use', 'weight', 'throws', 'team_name', 'team_code',
       'team_abbrev', 'height_feet'],
      dtype='object')

In [44]:
len(rosters_all.loc[rosters_all['college'] != ''])

601

Note the index on each ```roster40``` DataFrame isn't meaningful -- it's just a list of numbers 0 to 39 -- which is why we set the ```ignore_index``` to ```True```. This resets the index on the final, combined DataFrame.
Presumably, a player can only be on one team, and there shouldn't be any duplicates by player_id

In [45]:
rosters_all['player_id'].duplicated().any()

False

Perfect. Let's use ```player_id``` for our index value then.

In [46]:
rosters_all.set_index('player_id', inplace=True)
# rosters_all.columns

In [47]:
rosters_all.loc[rosters_all['name_full'] == 'Ford, Mike']

Unnamed: 0_level_0,college,end_date,pro_debut_date,status_code,name_full,name_display_first_last,birth_date,height_inches,team_id,name_last,...,start_date,name_display_last_first,name_first,name_use,weight,throws,team_name,team_code,team_abbrev,height_feet
player_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
645801,Princeton,,2019-04-18T00:00:00,A,"Ford, Mike",Mike Ford,1992-07-04T00:00:00,0,144,Ford,...,2022-06-10T00:00:00,"Ford, Mike",Michael,Mike,225,R,Atlanta Braves,atl,ATL,6


Now that we've got ```player_ids``` for every player in the game, we're on to step(3): querying individual stats. The format is similar:

In [48]:
# Change the year in the 'season=' argument to see stats from different years
def season_hitting_data(player_id, season=2020):
    player_url = MLB_URL + f'/named.sport_hitting_tm.bam?league_list_id=%27mlb%27&game_type=%27R%27&season=%27{season}%27&player_id=%27{player_id}%27'
    resp = requests.get(player_url)
    return clean_json(resp)['sport_hitting_tm']['queryResults']['row']

The function only seems to work on certain players (hitters mainly)

In [49]:
# Juan Soto
season_hitting_data(665742)

{'sport_id': '1',
 'league_short': 'National',
 'hr': '13',
 'team_id': '120',
 'season': '2020',
 'ab': '154',
 'hldr': '27',
 'league': 'NL',
 'sport_code': 'mlb',
 'ao': '23',
 'slg': '.695',
 'team_full': 'Washington Nationals',
 'ops': '1.185',
 'team_abbrev': 'WSH',
 'hbp': '1',
 'rbi': '37',
 'go_ao': '2.13',
 'hfly': '9',
 'lob': '54',
 'xbh': '27',
 'end_date': '2021-04-20T00:00:00',
 'bb': '41',
 'np': '827',
 'hgnd': '18',
 'roe': '3',
 'sb': '6',
 'player_id': '665742',
 'avg': '.351',
 'sf': '0',
 'sac': '0',
 'wo': '0',
 'team_short': 'Washington',
 'hpop': '0',
 'so': '28',
 'gidp_opp': '29',
 'gidp': '1',
 'ppa': '4.22',
 'd': '14',
 'tpa': '196',
 'league_full': 'National League',
 'g': '47',
 'h': '54',
 'ibb': '12',
 'go': '49',
 'team_seq': '1.0',
 'tb': '107',
 'cs': '2',
 'r': '39',
 't': '0',
 'babip': '.363',
 'obp': '.490',
 'sport': 'MLB',
 'league_id': '104'}

In [50]:
# Change the year in the 'season=' argument to see stats from different years
def season_hitting_row(player_id, season=2020):
    player_url = MLB_URL + f'/named.sport_hitting_tm.bam?league_list_id=%27mlb%27&game_type=%27R%27&player_id=%27{player_id}%27&season=%27{season}%27'
    resp = requests.get(player_url)
    # .T returns the transpose!
    # .to_frame() turns a Series to DataFrame object
    return Series(clean_json(resp)['sport_hitting_tm']['queryResults']['row']).to_frame(player_id).T

In [51]:
# Juan Soto, 2020 stats
season_hitting_row(665742)

Unnamed: 0,sport_id,league_short,hr,team_id,season,ab,hldr,league,sport_code,ao,...,go,team_seq,tb,cs,r,t,babip,obp,sport,league_id
665742,1,National,13,120,2020,154,27,NL,mlb,23,...,49,1.0,107,2,39,0,0.363,0.49,MLB,104


In [52]:
# Juan Soto, 2022 stats
season_hitting_row(665742, season=2022)

Unnamed: 0,sport_id,league_short,hr,team_id,season,ab,hldr,league,sport_code,ao,...,go,team_seq,tb,cs,r,t,babip,obp,sport,league_id
665742,1,National,14,120,2022,248,18,NL,mlb,60,...,89,1.0,107,2,38,0,0.207,0.365,MLB,104


In [53]:
def season_hitting_row2(player_id, season=2020):
    player_url = MLB_URL + f'/named.sport_hitting_tm.bam?league_list_id=%27mlb%27&game_type=%27R%27&player_id=%27{player_id}%27&season=%27{season}%27'
    resp = requests.get(player_url)
    qr = clean_json(resp)['sport_hitting_tm']['queryResults']
    
    if 'row' in qr: 
        # .T returns the transpose!
        # .to_frame() turns a Series to DataFrame object
        return Series(qr['row']).to_frame(player_id).T
    else:
        return DataFrame()

In [54]:
season_hitting_row2(000000) # not a real player
# produces an empty DF

The second issue is if we get more than one row back, which happens if a player was traded in season (technically this data is at the player-season-team level). For example, Mike Ford (```player_id``` 645801) was traded mid 2022 from the Giants to the Mariners.

In [55]:
season_hitting_row2(645801, season=2022) #ford, traded midseason

Unnamed: 0,0,1
645801,"{'sport_id': '1', 'league_short': 'National', ...","{'sport_id': '1', 'league_short': 'American', ..."


In [56]:
season_hitting_data(645801, season=2022)

[{'sport_id': '1',
  'league_short': 'National',
  'hr': '0',
  'team_id': '137',
  'season': '2022',
  'ab': '4',
  'hldr': '1',
  'league': 'NL',
  'sport_code': 'mlb',
  'ao': '3',
  'slg': '.250',
  'team_full': 'San Francisco Giants',
  'ops': '.500',
  'team_abbrev': 'SF',
  'hbp': '0',
  'rbi': '2',
  'go_ao': '0.00',
  'hfly': '0',
  'lob': '1',
  'xbh': '0',
  'end_date': '2022-05-12T00:00:00',
  'bb': '0',
  'np': '13',
  'hgnd': '0',
  'roe': '0',
  'sb': '0',
  'player_id': '645801',
  'avg': '.250',
  'sf': '0',
  'sac': '0',
  'wo': '0',
  'team_short': 'San Francisco',
  'hpop': '0',
  'so': '0',
  'gidp_opp': '0',
  'gidp': '0',
  'ppa': '3.25',
  'd': '0',
  'tpa': '4',
  'league_full': 'National League',
  'g': '1',
  'h': '1',
  'ibb': '0',
  'go': '0',
  'team_seq': '2.0',
  'tb': '1',
  'cs': '0',
  'r': '0',
  't': '0',
  'babip': '.250',
  'obp': '.250',
  'sport': 'MLB',
  'league_id': '104'},
 {'sport_id': '1',
  'league_short': 'American',
  'hr': '0',
  'team

We can it's returning a *list* of hitting dicts. Whereas in the regular (non traded) case it's returning just the one dict. Let's handle this by (1) checking on what we get back from theJSON, and (2) wrapping it inside a list in the one player case sp we can treat both cases the same.
The relevant parts are in the two ```if``` statements

In [57]:
def season_hitting_rows(player_id, season = 2020):
    player_url = MLB_URL + f'/named.sport_hitting_tm.bam?league_list_id=%27mlb%27&game_type=%27R%27&season=%27{season}%27&player_id=%27{player_id}%27'
    resp = requests.get(player_url)
    qr = clean_json(resp)['sport_hitting_tm']['queryResults']
    
    if 'row' in qr:
        raw_data = qr['row']
        if type(raw_data) is dict:
            raw_data = [raw_data]
        return DataFrame(raw_data)
    else:
        return DataFrame()

In [58]:
# Mike Ford's stats for each team , nice. 
season_hitting_rows(645801, season=2022)

Unnamed: 0,sport_id,league_short,hr,team_id,season,ab,hldr,league,sport_code,ao,...,go,team_seq,tb,cs,r,t,babip,obp,sport,league_id
0,1,National,0,137,2022,4,1,NL,mlb,3,...,0,2.0,1,0,0,0,0.25,0.25,MLB,104
1,1,American,0,136,2022,29,3,AL,mlb,4,...,8,1.0,6,0,1,0,0.294,0.368,MLB,103


In [59]:
# Now, let's make sure this works for our regular case
season_hitting_rows(665742, season=2022) # Juan Soto, 2022 stats

Unnamed: 0,sport_id,league_short,hr,team_id,season,ab,hldr,league,sport_code,ao,...,go,team_seq,tb,cs,r,t,babip,obp,sport,league_id
0,1,National,14,120,2022,248,18,NL,mlb,60,...,89,1.0,107,2,38,0,0.207,0.365,MLB,104


Now we can go through our full roster, finding each's player's ```player_id```, and using to query thier 2020 hitting stats and bind them together

In [60]:
rosters_all.loc[(rosters_all['position_txt'] != 'P') & (rosters_all['name_last'] == 'Judge')]

Unnamed: 0_level_0,college,end_date,pro_debut_date,status_code,name_full,name_display_first_last,birth_date,height_inches,team_id,name_last,...,start_date,name_display_last_first,name_first,name_use,weight,throws,team_name,team_code,team_abbrev,height_feet
player_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
592450,Fresno State,,2016-08-13T00:00:00,A,"Judge, Aaron",Aaron Judge,1992-04-26T00:00:00,7,147,Judge,...,2016-08-13T00:00:00,"Judge, Aaron",Aaron,Aaron,282,R,New York Yankees,nya,NYY,6


In [61]:
hitting_2022 = pd.concat([season_hitting_rows(x, season=2022) for x in rosters_all.index], ignore_index=True)

In [62]:
for x in rosters_all.index:
    if x == '592450':
        print(type(x))

<class 'str'>


In [63]:
hitting_2022.columns

Index(['sport_id', 'league_short', 'hr', 'team_id', 'season', 'ab', 'hldr',
       'league', 'sport_code', 'ao', 'slg', 'team_full', 'ops', 'team_abbrev',
       'hbp', 'rbi', 'go_ao', 'hfly', 'lob', 'xbh', 'end_date', 'bb', 'np',
       'hgnd', 'roe', 'sb', 'player_id', 'avg', 'sf', 'sac', 'wo',
       'team_short', 'hpop', 'so', 'gidp_opp', 'gidp', 'ppa', 'd', 'tpa',
       'league_full', 'g', 'h', 'ibb', 'go', 'team_seq', 'tb', 'cs', 'r', 't',
       'babip', 'obp', 'sport', 'league_id'],
      dtype='object')

In [64]:
for x in hitting_2022['team_abbrev']:
    print(x)

ARI
ARI
ARI
ARI
ARI
ARI
ARI
ARI
ARI
ARI
ARI
ARI
ARI
ARI
ARI
ARI
ARI
ARI
ARI
PIT
ARI
ARI
ARI
ARI
ATL
ATL
ATL
ATL
ATL
ATL
SF
SEA
ATL
ATL
ATL
ATL
ATL
ATL
ATL
STL
ATL
ATL
BOS
BAL
BAL
BAL
BAL
BAL
BAL
BAL
BAL
BAL
BAL
BAL
BAL
BAL
BAL
BAL
BAL
BOS
BOS
BOS
BOS
BOS
BOS
BOS
BOS
BOS
BOS
BOS
BOS
BOS
BOS
BOS
BOS
CHC
CHC
CHC
CHC
CHC
CHC
CHC
CHC
CHC
CHC
CHC
CHC
CHC
CHC
CHC
CHC
CHC
CHC
CHC
CHC
CHC
CWS
CWS
CWS
CWS
CWS
CWS
CWS
CWS
CWS
CWS
CWS
CWS
CWS
CWS
CWS
CWS
CWS
CWS
CWS
CWS
CWS
CWS
CWS
CIN
CIN
CIN
CIN
SEA
SF
CIN
CIN
CIN
CIN
CIN
CIN
CIN
CIN
CIN
CIN
CIN
CIN
CIN
CIN
NYM
CIN
CIN
CIN
CIN
CIN
CIN
CIN
CLE
CLE
CLE
CLE
CLE
CLE
CLE
CLE
CLE
CLE
CLE
CLE
CLE
CLE
CLE
COL
COL
COL
COL
COL
COL
COL
COL
COL
COL
COL
COL
COL
COL
COL
COL
COL
COL
COL
DET
DET
DET
DET
DET
DET
DET
DET
DET
DET
DET
DET
DET
DET
DET
DET
DET
HOU
HOU
HOU
HOU
HOU
HOU
SF
HOU
HOU
HOU
HOU
HOU
HOU
HOU
HOU
HOU
LAA
HOU
KC
KC
KC
KC
KC
KC
KC
KC
KC
KC
KC
KC
KC
KC
KC
KC
KC
KC
KC
KC
KC
KC
KC
LAA
LAA
LAA
LAA
LAA
LAA
LAA
LAA
LAA
LAA
LAA
LAA
LAA
LAA
LAA
LAA
LAD
LA

In [65]:
rosters_all['name_full']

player_id
666179            Beer, Seth
518516    Bumgarner, Madison
605200          Davies, Zach
666818           Frias, Luis
643316             Fry, Paul
                 ...        
665742            Soto, Juan
544931    Strasburg, Stephen
676194    Tetreault, Jackson
657041          Thomas, Lane
607179         Weems, Jordan
Name: name_full, Length: 1204, dtype: object

### I found out the ```hr``` and ```player_id``` columns was a string when I went to sort by highest homeruns so far because I knew Aaron Judge had 25 at the time of this (6/20/2022) so had to convert it to an int in order to be able to sort the values 

In [66]:
hitting_2022['hr'] = (hitting_2022['hr']).astype(int)
(hitting_2022.loc[hitting_2022['team_abbrev'] == 'NYY', ['player_id', 'avg', 'obp', 'slg', 'ops', 'hr']]).sort_values(by='hr', ascending=False)

Unnamed: 0,player_id,avg,obp,slg,ops,hr
357,592450,0.302,0.379,0.663,1.042,27
361,519203,0.231,0.336,0.51,0.846,19
362,519317,0.245,0.333,0.48,0.813,14
363,650402,0.26,0.309,0.512,0.821,13
353,608336,0.18,0.295,0.36,0.655,9
350,572761,0.265,0.375,0.824,1.199,6
351,518626,0.233,0.327,0.386,0.714,6
359,518934,0.26,0.346,0.399,0.746,6
364,624431,0.283,0.339,0.478,0.817,6
356,543309,0.176,0.222,0.324,0.546,4


In [67]:
hitting_2022.loc[hitting_2022['team_abbrev'] == 'WSH']

Unnamed: 0,sport_id,league_short,hr,team_id,season,ab,hldr,league,sport_code,ao,...,go,team_seq,tb,cs,r,t,babip,obp,sport,league_id
550,1,National,3,120,2022,72,5,NL,mlb,18,...,16,1.0,25,0,8,0,0.261,0.288,MLB,104
551,1,National,0,120,2022,30,3,NL,mlb,7,...,10,1.0,6,0,1,0,0.227,0.194,MLB,104
552,1,National,11,120,2022,254,35,NL,mlb,63,...,79,1.0,120,1,37,1,0.311,0.38,MLB,104
553,1,National,7,120,2022,242,28,NL,mlb,52,...,71,1.0,92,0,32,0,0.301,0.332,MLB,104
554,1,National,0,120,2022,118,15,NL,mlb,36,...,26,1.0,34,1,11,2,0.295,0.264,MLB,104
555,1,National,0,120,2022,25,0,NL,mlb,4,...,12,1.0,2,0,2,0,0.125,0.115,MLB,104
556,1,National,6,120,2022,266,39,NL,mlb,76,...,73,1.0,101,0,22,0,0.294,0.28,MLB,104
557,1,National,2,120,2022,82,13,NL,mlb,16,...,22,1.0,38,2,9,0,0.397,0.341,MLB,104
558,1,National,0,120,2022,293,38,NL,mlb,79,...,80,1.0,98,2,39,2,0.331,0.314,MLB,104
559,1,National,4,120,2022,178,28,NL,mlb,26,...,59,1.0,70,1,18,0,0.341,0.314,MLB,104


### Awesome, now we could run this on all players(not just the top 100) or all years if we want too. If we look at the other endpoints, there's also career hitting stats as well as (season and career) pitching stats. 

##### From the ```season_hitting_rows``` function, the next cell of the ```if 'row' in qr``` part is in its own separate helper function.

In [68]:
def qr_to_df(qr):
    if 'row' in qr:
        raw_data = qr['row']
        if type(raw_data) is dict:
            raw_data = [raw_data]
        return DataFrame(raw_data)
    else:
        return DataFrame()

In [69]:
# rewritten with the new 'qr_to_df' function
def season_hitting_rows(player_id, season=2020):
    player_url = MLB_URL + f'/named.sport_hitting_tm.bam?league_list_id=%27mlb%27&game_type=%27R%27&season=%27{season}%27&player_id=%27{player_id}%27'
    resp = requests.get(player_url)
    qr = clean_json(resp)['sport_hitting_tm']['queryResults']
    return qr_to_df(qr)

In [70]:
def season_pitching_rows(player_id, season=2020):
    player_url = MLB_URL + f'/named.sport_pitching_tm.bam?league_list_id=%27mlb%27&game_type=%27R%27&season={season}&player_id={player_id}'
    resp = requests.get(player_url)
    qr = clean_json(resp)['sport_pitching_tm']['queryResults']
    return qr_to_df(qr)

In [71]:
def career_hitting_rows(player_id):
    player_url = MLB_URL + f'/named.sport_career_hitting.bam?league_list_id=%27mlb%27&game_type=%27R%27&player_id={player_id}'
    resp = requests.get(player_url)
    qr = clean_json(resp)['sport_career_hitting']['queryResults']
    return qr_to_df(qr)

In [72]:
def career_pitching_rows(player_id):
    player_url = MLB_URL + f'/named.sport_career_pitching.bam?league_list_id=%27mlb%27&game_type=%27R%27&player_id={player_id}'
    resp = requests.get(player_url)
    qr = clean_json(resp)['sport_career_pitching']['queryResults']
    return qr_to_df(qr)

### Next, we call them. I'm going to skip the rewritten function```season_hitting_rows``` here

In [73]:
hitting_2022 = pd.concat([season_hitting_rows(x, season=2022) for x in rosters_all.index], ignore_index=True)
hitting_2022

Unnamed: 0,sport_id,league_short,hr,team_id,season,ab,hldr,league,sport_code,ao,...,go,team_seq,tb,cs,r,t,babip,obp,sport,league_id
0,1,National,1,109,2022,81,12,NL,mlb,18,...,23,1.0,23,0,4,0,.281,.301,MLB,104
1,1,National,0,109,2022,41,4,NL,mlb,8,...,10,1.0,10,0,3,0,.346,.319,MLB,104
2,1,National,0,109,2022,24,0,NL,mlb,5,...,15,1.0,2,0,2,0,.100,.154,MLB,104
3,1,National,0,109,2022,80,4,NL,mlb,13,...,26,1.0,17,0,6,0,.283,.244,MLB,104
4,1,National,2,109,2022,101,9,NL,mlb,18,...,27,1.0,32,0,10,2,.262,.308,MLB,104
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
559,1,National,4,120,2022,178,28,NL,mlb,26,...,59,1.0,70,1,18,0,.341,.314,MLB,104
560,1,National,1,120,2022,159,15,NL,mlb,41,...,42,1.0,50,0,24,1,.327,.303,MLB,104
561,1,National,2,120,2022,201,37,NL,mlb,66,...,63,1.0,70,0,18,0,.277,.314,MLB,104
562,1,National,14,120,2022,248,18,NL,mlb,60,...,89,1.0,107,2,38,0,.207,.365,MLB,104


In [74]:
pitching_2022 = pd.concat([season_pitching_rows(x, season=2022) for x in rosters_all.index], ignore_index=True)
pitching_2022

Unnamed: 0,sport_id,bqs,league_short,hr,team_id,whip,pip,season,rs9,ab,...,w,babip,bb9,hb,pk,obp,sport,league_id,db,tr
0,1,0,National,12,109,1.37,17.2,2022,3.15,297,...,3,.289,2.78,2,0,.323,MLB,104,21,2
1,1,0,National,10,109,1.20,16.6,2022,3.12,279,...,2,.258,3.00,2,0,.297,MLB,104,12,1
2,1,2,National,0,109,2.83,23.7,2022,4.50,27,...,0,.417,10.50,1,0,.514,MLB,104,3,1
3,1,0,American,1,110,1.33,17.1,2022,3.00,43,...,0,.258,5.25,2,0,.340,MLB,103,1,1
4,1,0,National,0,109,4.00,27.0,2022,0.00,5,...,0,.667,18.00,0,0,.571,MLB,104,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
627,1,2,National,7,120,1.41,15.2,2022,3.58,131,...,1,.311,1.93,4,0,.347,MLB,104,2,0
628,1,3,National,6,120,1.33,15.9,2022,4.78,101,...,2,.217,3.76,0,1,.313,MLB,104,3,0
629,1,0,National,1,120,2.14,17.8,2022,3.86,20,...,0,.500,3.86,1,0,.478,MLB,104,3,0
630,1,0,National,3,120,1.64,16.5,2022,9.82,48,...,1,.286,2.45,0,0,.346,MLB,104,1,0


In [75]:
hitting_career = pd.concat([career_hitting_rows(x) for x in rosters_all.index], ignore_index=True)
hitting_career

Unnamed: 0,bb,sport_id,np,hgnd,roe,hr,team_count,sb,player_id,avg,...,t,hbp,rbi,babip,go_ao,hfly,obp,sport,lob,xbh
0,9,1,370,4,0,2,1,0,666179,.233,...,0,3,12,.306,1.15,2,.320,MLB,35,6
1,49,1,2765,32,4,19,2,0,518516,.172,...,0,2,65,.269,1.43,23,.232,MLB,355,38
2,9,1,947,14,5,0,2,0,605200,.126,...,0,0,9,.201,2.32,0,.161,MLB,98,4
3,0,1,0,0,0,0,1,0,666818,,...,0,0,0,,,0,,MLB,0,0
4,0,1,0,0,0,0,1,0,643316,,...,0,0,0,,,0,,MLB,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
993,22,1,1123,17,2,6,2,3,660688,.259,...,0,4,31,.276,0.89,11,.318,MLB,152,22
994,430,1,9551,187,13,112,1,37,665742,.289,...,8,8,343,.313,1.47,118,.423,MLB,807,223
995,24,1,1895,30,7,4,1,0,544931,.152,...,0,1,29,.223,4.42,7,.198,MLB,230,14
996,64,1,2321,36,6,20,2,8,657041,.232,...,4,3,71,.280,0.97,26,.322,MLB,227,50


In [76]:
pitching_career = pd.concat([career_pitching_rows(x) for x in rosters_all.index], ignore_index=True)
pitching_career

Unnamed: 0,sport_id,bqs,hr,whip,pip,rs9,ab,qs,hldr,ip,...,s,w,babip,bb9,hb,pk,obp,sport,db,tr
0,1,36,241,1.13,15.7,4.36,7935,202,903,2108.1,...,21880,130,.286,2.15,81,30,.288,MLB,393,38
1,1,32,114,1.32,16.4,4.83,3461,60,484,906.2,...,9214,58,.291,2.97,28,6,.322,MLB,171,23
2,1,2,0,2.57,22.5,2.89,37,0,5,9.1,...,117,0,.375,11.57,1,0,.490,MLB,4,1
3,1,45,15,1.43,18.0,4.31,669,0,66,177.1,...,1929,7,.307,4.92,17,1,.345,MLB,23,2
4,1,10,43,1.18,16.8,3.97,1279,29,137,344.1,...,3667,14,.275,3.37,20,1,.298,MLB,59,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
683,1,41,111,1.25,15.5,4.09,2728,33,319,714.1,...,7239,34,.278,2.49,36,2,.310,MLB,125,6
684,1,3,22,1.44,16.2,4.60,343,2,35,88.0,...,898,5,.248,3.68,6,1,.344,MLB,12,2
685,1,21,149,1.10,15.9,5.12,5449,152,597,1470.0,...,15291,113,.296,2.41,53,1,.281,MLB,223,23
686,1,0,3,1.64,16.5,9.82,48,1,7,11.0,...,112,1,.286,2.45,0,0,.346,MLB,1,0


In [78]:
basic_hitting_stats = hitting_2022[['player_id', 'avg', 'obp', 'slg', 'ops', 'hr']]
basic_hitting_stats

Unnamed: 0,player_id,avg,obp,slg,ops,hr
0,666179,.210,.301,.284,.585,1
1,606993,.220,.319,.244,.563,0
2,660634,.083,.154,.083,.237,0
3,645444,.188,.244,.213,.457,0
4,669450,.178,.308,.317,.625,2
...,...,...,...,...,...,...
559,628450,.270,.314,.393,.707,4
560,645302,.239,.303,.314,.617,1
561,660688,.254,.314,.348,.662,2
562,665742,.214,.365,.431,.796,14


In [79]:
rosters_all_new_index = rosters_all.reset_index()
rosters_all_new_index

Unnamed: 0,player_id,college,end_date,pro_debut_date,status_code,name_full,name_display_first_last,birth_date,height_inches,team_id,...,start_date,name_display_last_first,name_first,name_use,weight,throws,team_name,team_code,team_abbrev,height_feet
0,666179,Clemson,,2021-09-10T00:00:00,RM,"Beer, Seth",Seth Beer,1996-09-18T00:00:00,3,109,...,2021-09-10T00:00:00,"Beer, Seth",Seth,Seth,213,R,Arizona Diamondbacks,ari,ARI,6
1,518516,,,2009-09-08T00:00:00,A,"Bumgarner, Madison",Madison Bumgarner,1989-08-01T00:00:00,4,109,...,2019-12-17T00:00:00,"Bumgarner, Madison",Madison,Madison,257,L,Arizona Diamondbacks,ari,ARI,6
2,605200,,,2015-09-02T00:00:00,A,"Davies, Zach",Zach Davies,1993-02-07T00:00:00,0,109,...,2022-03-24T00:00:00,"Davies, Zach",Zachary,Zach,180,R,Arizona Diamondbacks,ari,ARI,6
3,666818,,,2021-09-19T00:00:00,RM,"Frias, Luis",Luis Frias,1998-05-23T00:00:00,3,109,...,2020-11-20T00:00:00,"Frias, Luis",Luis,Luis,245,R,Arizona Diamondbacks,ari,ARI,6
4,643316,"St. Clair County CC, MI",,2018-06-29T00:00:00,RM,"Fry, Paul",Paul Fry,1992-07-26T00:00:00,0,109,...,2022-05-18T00:00:00,"Fry, Paul",Paul,Paul,205,L,Arizona Diamondbacks,ari,ARI,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1199,665742,,,2018-05-15T00:00:00,A,"Soto, Juan",Juan Soto,1998-10-25T00:00:00,2,120,...,2018-05-20T00:00:00,"Soto, Juan",Juan,Juan,224,L,Washington Nationals,was,WSH,6
1200,544931,San Diego State,,2010-06-08T00:00:00,D15,"Strasburg, Stephen",Stephen Strasburg,1988-07-20T00:00:00,5,120,...,2019-12-09T00:00:00,"Strasburg, Stephen",Stephen,Stephen,239,R,Washington Nationals,was,WSH,6
1201,676194,State College of Florida,,2022-06-14T00:00:00,A,"Tetreault, Jackson",Jackson Tetreault,1996-06-03T00:00:00,5,120,...,2022-06-14T00:00:00,"Tetreault, Jackson",Jackson,Jackson,189,R,Washington Nationals,was,WSH,6
1202,657041,,,2019-04-17T00:00:00,A,"Thomas, Lane",Lane Thomas,1995-08-23T00:00:00,0,120,...,2021-07-30T00:00:00,"Thomas, Lane",Lane,Lane,191,R,Washington Nationals,was,WSH,6


In [80]:
new_basic_hitting_stats = pd.merge(basic_hitting_stats, rosters_all_new_index[['player_id', 'name_full', 'team_abbrev']])

In [81]:
new_basic_hitting_stats['hr'] = (new_basic_hitting_stats['hr']).astype(int)
new_basic_hitting_stats['ops'] = (new_basic_hitting_stats['ops']).astype(float)
new_basic_hitting_stats['slg'] = [float(x) if x != '.---' else .000 for x in new_basic_hitting_stats['slg']]
new_basic_hitting_stats['avg'] = [float(x) if x != '.---' else .000 for x in new_basic_hitting_stats['avg']]
new_basic_hitting_stats['player_id'] = (new_basic_hitting_stats['player_id']).astype(int)
(new_basic_hitting_stats.loc[new_basic_hitting_stats['team_abbrev'] == 'NYY']).sort_values(by='ops', ascending =False)

Unnamed: 0,player_id,avg,obp,slg,ops,hr,name_full,team_abbrev
350,572761,0.265,.375,0.824,1.199,6,"Carpenter, Matt",NYY
357,592450,0.302,.379,0.663,1.042,27,"Judge, Aaron",NYY
361,519203,0.231,.336,0.51,0.846,19,"Rizzo, Anthony",NYY
363,650402,0.26,.309,0.512,0.821,13,"Torres, Gleyber",NYY
364,624431,0.283,.339,0.478,0.817,6,"Trevino, Jose",NYY
362,519317,0.245,.333,0.48,0.813,14,"Stanton, Giancarlo",NYY
360,641796,0.231,.333,0.462,0.795,1,"Locastro, Tim",NYY
359,518934,0.26,.346,0.399,0.746,6,"LeMahieu, DJ",NYY
351,518626,0.233,.327,0.386,0.714,6,"Donaldson, Josh",NYY
354,503556,0.244,.300,0.39,0.69,2,"Gonzalez, Marwin",NYY


In [82]:
from os import path
# import datetime
# year = datetime.date.today().year
# print(year)
DATA_DIR = "C:/Users/jake_/Desktop/baseball_etl/data"

In [83]:
# for df in [f'hitting_{year}', f'pitching_{year}', 'career_hitting', 'career_pitching']:
#     (df.to_csv(path.join(DATA_DIR, f'{df}.csv'), index=False))

In [84]:
# data_all = {
#     'data1': hitting_2020,
#     'data2': hitting_career,
#     'data3': pitching_2022,
#     'data4': pitching_career
# }

In [85]:
# for i in range(1, len(data_all) + 1):
#     data_i = data_all['data' + str(i)]
#     data_i.to_csv('data' + str(i) + '.csv')  

##### From here we can store them in a SQL Database, or as csv's or whatever. Note usually we *would* want to store this data, as opposed to treating mlb.com's API as our storage that we calll whenever we want to do some analysis.

##### First, it's faster. It's much more efficient to store data locally (or even get it directly from a database online) than it is to re-hit a networked API every time we need the data

##### Second, it's the polite thing to do. Hosting and maintaining an API costs money. It's usually not a big deal playing around with it or grabbing data occasionally, but we don't need to overload servers when we don't have to. 

##### Finally, storing the data means you'd have it if anything ever happened to the API.

In [86]:
pitching_career.to_csv(path.join(DATA_DIR, f"mlb_pitching_career.csv"), index=False)

In [87]:
pitching_2022.to_csv(path.join(DATA_DIR, f"mlb_pitching_2022.csv"), index=False)

In [88]:
hitting_career.to_csv(path.join(DATA_DIR, f"mlb_hitting_career.csv"), index=False)

In [89]:
hitting_2022.to_csv(path.join(DATA_DIR, f"mlb_hitting_2022.csv"), index=False)