# 06. Rosters
Source: <br>
1. MLB Stats API <br>

Description: This scrapes roster information from MLB Stats API <br>
It can get historic data, although it might miss some players.

### Box Score

In [6]:
# Creates box score variables
def create_box(gamePk):
    # Read in boxscore as json
    box = pd.json_normalize(statsapi.boxscore_data(gamePk, timecode=None), record_path='gameBoxInfo')
    # Take value where label = variable of interest
    try:
        weather = box.loc[box['label']=="Weather", "value"].item() 
    except:
        weather = "75 degrees, Clear."
    try:
        wind = box.loc[box['label']=="Wind", "value"].item()
    except:
        wind = "0 mph, L To R."

    return weather, wind

### Batting Order

In [7]:
# Creates dataframe of players and their spot in the batting order
def create_order(gamePk, teamId, date, team="away"):
    list_of_lists = []
    players = statsapi.get("game", {"gamePk": gamePk})['liveData']['boxscore']['teams'][team]['players']
    for player in players:
        id = players[player]['person']['id']
        fullName = players[player]['person']['fullName']
        position = players[player]['position']['name']
        status = players[player]['status']['description']
        try:
            order = statsapi.get("game", {"gamePk": gamePk})['liveData']['boxscore']['teams'][team]['players'][player]['battingOrder']
        except:
            order = np.nan
    
        return_list = [id, fullName, position, status, order]
        list_of_lists.append(return_list)
        
    df = pd.DataFrame(list_of_lists, columns=['id', 'fullName', 'position', 'status', 'order'])
    df = df.merge(chadwick, left_on='id', right_on='key_mlbam', how='left')
        
    return df

### Rosters

In [8]:
# Creates a roster
def create_roster(teamId, date):
    roster = statsapi.get("team_roster", {"teamId": teamId, "rosterType": "activeRoster", "date": date, "hydrate": "person"})['roster']

    # Initialize empty lists to store the extracted values
    id_list = []
    full_name_list = []
    first_name_list = []
    last_name_list = []
    position_list = []
    bat_side_list = []
    pitch_hand_list = []

    # Iterate over the roster data
    for player in roster:
        # Extract the values using .get() method and fill missing values with "Missing"
        id_list.append(player['person'].get('id', 'Missing'))
        full_name_list.append(player['person'].get('fullName', 'Missing'))
        first_name_list.append(player['person'].get('firstName', 'Missing'))
        last_name_list.append(player['person'].get('lastName', 'Missing'))
        position_list.append(player['position'].get('name', 'Missing'))
        bat_side_list.append(player['person'].get('batSide', {}).get('description', 'Missing'))
        pitch_hand_list.append(player['person'].get('pitchHand', {}).get('description', 'Missing'))

    # Create the dataframe
    df = pd.DataFrame({
        'id': id_list,
        'fullName': full_name_list,
        'firstName': first_name_list,
        'lastName': last_name_list,
        'position': position_list,
        'batSide': bat_side_list,
        'pitchHand': pitch_hand_list
    })

    return df

In [9]:
# Creates all rosters
def create_rosters(x, team_map):    
    team_map = team_map[['FULLNAME', 'BBREFTEAM']]    
    
    ### Create new folder
    # Create date
    date = x['game_date']
    # Remove -
    date = date.replace("-", "")

    # Create roster directory
    directory = "Rosters" + date
    try:
        os.mkdir(os.path.join(baseball_path, "6. Rosters", directory))
    except:
        pass

    # Create boxscore for weather
    weather, wind = create_box(x['game_id'])

    ### Away
    # Create rosters
    away_roster = create_roster(x['away_id'], x['game_date'])        

    # Create batting orders
    away_order = create_order(x['game_id'], x['away_id'], date, "away")

    # Merge batting order df onto rosters
    away_roster = away_roster.merge(away_order, on='id', how='outer', suffixes=("", "_fill"))

    # Fill in missings
    away_roster['fullName'].fillna(away_roster['fullName_fill'], inplace=True)
    away_roster['position'].fillna(away_roster['position_fill'], inplace=True)
    away_roster['firstName'].fillna(away_roster['name_first'], inplace=True)
    away_roster['lastName'].fillna(away_roster['name_last'], inplace=True)
    away_roster['batSide'].fillna("R", inplace=True)
    away_roster['pitchHand'].fillna("R", inplace=True)

    # Determine starting pitcher
    away_roster['starter'] = (away_roster['fullName'] == x['away_probable_pitcher']).astype('int')

    away_roster['teamName'] = x['away_name']
    away_roster['venue_id'] = x['venue_id']
    away_roster['game_date'] = x['game_date']
    away_roster['game_type'] = x['game_type']
    away_roster['game_num'] = x['game_num']
    away_roster['summary'] = x['summary']

    away_roster['weather'] = weather
    away_roster['wind'] = wind

    # Acquire team name in short form
    away_roster = away_roster.merge(team_map, left_on=['teamName'], right_on=['FULLNAME'], how='inner')
    try:
        teamname = away_roster['BBREFTEAM'][0]
    except:
        teamname = "Missing"

    # Read in depth chart
    depthfolder = "Depth" + date
    depthfile = "Depth_Chart_" + teamname + "_" + date + ".csv"
    depth_chart = pd.read_csv(os.path.join(baseball_path, "5. Depth Charts", depthfolder, depthfile), encoding='iso-8859-1')
    depth_chart = depth_chart[['Name', 'Leverage']]

    # Merge rosters with depth chart
    away_roster = away_roster.merge(depth_chart, left_on='fullName', right_on='Name', how='left')
    away_roster['Leverage'].fillna(0, inplace=True)

    away_roster.drop(columns={'teamName', 'Name', 'fullName_fill', 'position_fill', 'name_first', 'name_last'}, inplace=True)
    # If they weren't in the roster, they'll have a missing position value, which will be filled in using full name. These folks will have missing hand values
    away_roster['missing'] = np.where(away_roster['position'].str.len() > 2, 1, 0)            
    # Position will be weird for those missing from roster. Change from Pitcher to P to match
    away_roster['position'] = np.where(away_roster['position'] == 'Pitcher', 'P', away_roster['position'])

    # Write to csv
    awayname = teamname + date + ".csv"
    away_roster.to_csv(os.path.join(baseball_path, "6. Rosters", directory, awayname), encoding='iso-8859-1')

    time.sleep(3)
    
    ### Home
    # Create rosters
    home_roster = create_roster(x['home_id'], x['game_date'])        

    # Create batting orders
    home_order = create_order(x['game_id'], x['home_id'], date, "home")

    # Merge batting order df onto rosters
    home_roster = home_roster.merge(home_order, on='id', how='outer', suffixes=("", "_fill"))

    # Fill in missings
    home_roster['fullName'].fillna(home_roster['fullName_fill'], inplace=True)
    home_roster['position'].fillna(home_roster['position_fill'], inplace=True)
    home_roster['firstName'].fillna(home_roster['name_first'], inplace=True)
    home_roster['lastName'].fillna(home_roster['name_last'], inplace=True)
    home_roster['batSide'].fillna("R", inplace=True)
    home_roster['pitchHand'].fillna("R", inplace=True)

    # Determine starting pitcher
    home_roster['starter'] = (home_roster['fullName'] == x['home_probable_pitcher']).astype('int')

    home_roster['teamName'] = x['home_name']
    home_roster['venue_id'] = x['venue_id']
    home_roster['game_date'] = x['game_date']
    home_roster['game_type'] = x['game_type']
    home_roster['game_num'] = x['game_num']
    home_roster['summary'] = x['summary']

    home_roster['weather'] = weather
    home_roster['wind'] = wind

    # Acquire team name in short form
    home_roster = home_roster.merge(team_map, left_on=['teamName'], right_on=['FULLNAME'], how='inner')
    try:
        teamname = home_roster['BBREFTEAM'][0]
    except:
        teamname = "Missing"

    # Read in depth chart
    depthfolder = "Depth" + date
    depthfile = "Depth_Chart_" + teamname + "_" + date + ".csv"
    depth_chart = pd.read_csv(os.path.join(baseball_path, "5. Depth Charts", depthfolder, depthfile), encoding='iso-8859-1')
    depth_chart = depth_chart[['Name', 'Leverage']]

    # Merge rosters with depth chart
    home_roster = home_roster.merge(depth_chart, left_on='fullName', right_on='Name', how='left')
    home_roster['Leverage'].fillna(0, inplace=True)

    home_roster.drop(columns={'teamName', 'Name', 'fullName_fill', 'position_fill', 'name_first', 'name_last'}, inplace=True)
    # If they weren't in the roster, they'll have a missing position value, which will be filled in using full name. These folks will have missing hand values
    home_roster['missing'] = np.where(home_roster['position'].str.len() > 2, 1, 0)
    # Position will be weird for those missing from roster. Change from Pitcher to P to match
    home_roster['position'] = np.where(home_roster['position'] == 'Pitcher', 'P', home_roster['position'])

    # Write to csv
    homename = teamname + date + ".csv"
    home_roster.to_csv(os.path.join(baseball_path, "6. Rosters", directory, homename), encoding='iso-8859-1')
    
    time.sleep(3)

Note: Certain players (Trey Mancini, Astros, 9/15/22) don't appear in the roster, but they do appear in the batting order. You can gather id, fullName, position, and batting order (which will have Mancini) and do an outer merge with rosters to keep Mancini, albeit with some missing information. <br>
Can fill in those later, but it beats not having any idea he's on the roster at all. <br>
You'll just need to fill in firstName, lastName, batSide, pitchHand unless you can find a way to do this all at once. <br>


Worth exploring its data for: <br>
1) Weather <br>
2) Probables <br>
    Updates very quickly. Can use for locks <br>
3) Live stats (dashboard, maybe?) <br>
4) Final scores <br>
5) Full rosters (not just active)