# A05. Rosters
This scrapes extracts batting orders and rosters
- Type: Data
- Run Frequency: Pre-contest, refresh of yesterday
- Sources:
    - MLB Stats API
- Dates:
    - Created: 9/23/2023
    - Updated: 4/21/2024

##### 1. Batting Orders

In [1]:
# Creates dataframe of players and their spot in the batting order
def order(gamePk, teamId, date, team="away"):
    list_of_lists = []
    players = statsapi.get("game", {"gamePk": gamePk})['liveData']['boxscore']['teams'][team]['players']
    for player in players:
        id = players[player]['person']['id']
        fullName = players[player]['person']['fullName']
        position = players[player]['position']['name']
        status = players[player]['status']['description']
        try:
            order = statsapi.get("game", {"gamePk": gamePk})['liveData']['boxscore']['teams'][team]['players'][player]['battingOrder']
        except:
            order = np.nan
    
        return_list = [id, fullName, position, status, order]
        list_of_lists.append(return_list)
    
    # Create dataframe
    df = pd.DataFrame(list_of_lists, columns=['id', 'fullName', 'position', 'status', 'order'])
        
    # Game ID
    df['gamePk'] = gamePk
    # Date
    df['date'] = date
    # Team ID #
    df['teamId'] = teamId

    
    return df

In [None]:
def orders(team_map, game_df, row):
    ### Extract info
    # Date
    date = game_df.loc[row]['date']
    # Game ID
    game_id = game_df.loc[row]['game_id']
    # Team IDs
    away_id = game_df.loc[row]['away_id']
    home_id = game_df.loc[row]['home_id']
    
    # Create path
    os.makedirs(os.path.join(baseball_path, "A05. Rosters", "1. Batting Orders", f"Batting Orders {date}"), exist_ok=True)
    time.sleep(1)

    # Loop over teams in matchup
    for team_id in [away_id, home_id]:
        if team_id == away_id:
            team = "away"
        else:
            team = "home"
        # Scrape away team batting order
        order_df = order(game_id, team_id, date, team)
        # Extract team name
        team_name = team_map.loc[team_map['teamId'] == team_id, 'BBREFTEAM'].values[0]
        # To csv
        order_df.to_csv(os.path.join(baseball_path, "A05. Rosters", "1. Batting Orders", f"Batting Orders {date}", f"Batting Order {team_name} {game_id}.csv"), index=False, encoding='iso-8859-1')
        # To database
        # order_df.to_sql(f'Batting Order {team} {game_id}', con=engine, index=False, if_exists='replace')

##### 2. Rosters

In [5]:
# Creates a roster
def roster(teamId, date, rosterType):
    # Reformat date to fit function
    date_dash = f"{date[:4]}-{date[4:6]}-{date[6:8]}"
    # rosterTypes = statsapi.meta("rosterTypes")
    # print(rosterTypes)
    # Get roster (options include active, 40Man, depthChart, fullSeason, allTime, and more, available in rosterTypes)
    roster = statsapi.get("team_roster", {"teamId": teamId, "rosterType": rosterType, "date": date_dash, "hydrate": "person"})['roster']

    # Initialize empty lists to store the extracted values
    id_list = []
    full_name_list = []
    first_name_list = []
    last_name_list = []
    position_list = []
    bat_side_list = []
    pitch_hand_list = []

    # Iterate over the roster data
    for player in roster:
        # Extract the values using .get() method and fill missing values with "Missing"
        id_list.append(player['person'].get('id', 'Missing'))
        full_name_list.append(player['person'].get('fullName', 'Missing'))
        first_name_list.append(player['person'].get('firstName', 'Missing'))
        last_name_list.append(player['person'].get('lastName', 'Missing'))
        position_list.append(player['position'].get('name', 'Missing'))
        bat_side_list.append(player['person'].get('batSide', {}).get('description', 'Missing'))
        pitch_hand_list.append(player['person'].get('pitchHand', {}).get('description', 'Missing'))

    # Create the dataframe
    df = pd.DataFrame({
        'id': id_list,
        'fullName': full_name_list,
        'firstName': first_name_list,
        'lastName': last_name_list,
        'position': position_list,
        'batSide': bat_side_list,
        'pitchHand': pitch_hand_list
    })

    date = date.replace("-", "")
    
    # Date
    df['date'] = date
    # Team ID #
    df['teamId'] = teamId

    
    return df

In [None]:
def rosters(team_map, game_df, row):
    ### Extract info
    # Date
    date = game_df.loc[row]['date']
    # Game ID
    game_id = game_df.loc[row]['game_id']
    # Team IDs
    away_id = game_df.loc[row]['away_id']
    home_id = game_df.loc[row]['home_id']
    
    # Create path
    os.makedirs(os.path.join(baseball_path, "A05. Rosters", "2. Rosters", f"Rosters {date}"), exist_ok=True)
    time.sleep(1)
    
    # Loop over teams in matchup
    for team_id in [away_id, home_id]:
        if team_id == away_id:
            team = "away"
        else:
            team = "home"
    
        # Extract team name
        bbrefteam = team_map.loc[team_map['teamId'] == team_id, 'BBREFTEAM'].values[0]
    
        # Scrape rosters
        roster_df = roster(team_id, date, "40Man")
        roster_df.drop_duplicates('id', inplace=True)
        # To csv
        roster_df.to_csv(os.path.join(baseball_path, "A05. Rosters", "2. Rosters", f"Rosters {date}", f"Roster {bbrefteam} {date}.csv"), index=False, encoding='iso-8859-1')
        # # To database
        # roster_df.to_sql(f'Roster {bbrefteam} {date}', con=engine, index=False, if_exists='replace')

In [3]:
# import statsapi
# rosterTypes = statsapi.meta("rosterTypes")
# rosterTypes

[{'description': '40 man roster for a team',
  'lookupName': '40man',
  'parameter': '40Man'},
 {'description': 'Full roster including active and inactive players for a season',
  'lookupName': 'fullSeason',
  'parameter': 'fullSeason'},
 {'description': 'Full roster including active and inactive players',
  'lookupName': 'full',
  'parameter': 'fullRoster'},
 {'description': 'Non-Roster Invitees',
  'lookupName': 'nonRosterInvitees',
  'parameter': 'nonRosterInvitees'},
 {'description': 'Active roster for a team',
  'lookupName': 'active',
  'parameter': 'active'},
 {'description': 'All Time roster for a team',
  'lookupName': 'alltime',
  'parameter': 'allTime'},
 {'description': 'Depth chart for a team',
  'lookupName': 'active',
  'parameter': 'depthChart'},
 {'description': 'Roster for day of game',
  'lookupName': 'active',
  'parameter': 'gameday'},
 {'description': 'Coach roster for a team',
  'lookupName': 'active',
  'parameter': 'coach'}]