In [25]:
import requests
import pandas as pd
from connections import AWS

In [26]:
""" INITIALIZE AWS CONNECTION """
aws_connection = AWS()
aws_connection.connect()

[AWS]: Port 5433 is free.
[AWS]: Connected to RDS endpoint.


$\textbf{Statcast: Player Info Querying (Development)}$

Pulls player mass & height from Statcast API for post-model analysis. 

In [27]:
def get_player_bio(
        mlbam_id: int,
        url_stem: str = "https://statsapi.mlb.com/api/v1/people"
):
    """ 
    Fetches player biography information from MLB API.
    Args:
        mlbam_id (int): The MLBAM ID of the player.
        url_stem (str): The base URL for the MLB API endpoint.
    Returns:
        dict: A dictionary containing the player's name, height, and weight.
    """

    # create player URL
    url = f"{url_stem}/{mlbam_id}"

    # make request to API
    resp = requests.get(url)
    resp.raise_for_status()
    data = resp.json()

    # parse response (SI units)
    person = data['people'][0]
    name = person['fullName']
    height_ft = person['height'] 
    weight = person['weight']

    # metric conversions
    height = (int(height_ft.split('\'')[0]) * 30.48 + int(height_ft.split('\'')[1].split('"')[0]) * 2.54) / 100
    mass = int(weight) * 0.45359237

    return {
        'name': name,
        'height': height,
        'mass': mass,
    }


In [28]:
# load cohort (csv)
    # NOTE: new AWS module (v0.2.8) loads this directly as a dataframe
cohort = aws_connection.load_s3_object('epidemiology/cohorts/injured/combined_0625.csv')

In [36]:
# initialize full bio storage
full_bios = []

# iterate through cohort and get player bios
for player_id in cohort['mlbamid'].unique():
    try:
        player_bio = get_player_bio(player_id)
        player_bio['mlbamid'] = player_id  # add mlbamid to the bio
        full_bios.append(player_bio)
        
        # save player bio to S3
        aws_connection.upload_to_s3(
            pd.DataFrame([player_bio]),
            f'epidemiology/subjects/{player_id}/demographics.csv'
        )

        print(f"Successfully fetched and saved data for player ID {player_id}.")
    
    except requests.HTTPError as e:
        print(f"Failed to fetch data for player ID {player_id}: {e}")
    
    except Exception as e:
        print(f"An error occurred for player ID {player_id}: {e}")

[AWS]: Uploaded object to s3://pitch-ml/epidemiology/subjects/642678/demographics.csv
Successfully fetched and saved data for player ID 642678.
[AWS]: Uploaded object to s3://pitch-ml/epidemiology/subjects/641845/demographics.csv
Successfully fetched and saved data for player ID 641845.
[AWS]: Uploaded object to s3://pitch-ml/epidemiology/subjects/657570/demographics.csv
Successfully fetched and saved data for player ID 657570.
[AWS]: Uploaded object to s3://pitch-ml/epidemiology/subjects/623406/demographics.csv
Successfully fetched and saved data for player ID 623406.
[AWS]: Uploaded object to s3://pitch-ml/epidemiology/subjects/670155/demographics.csv
Successfully fetched and saved data for player ID 670155.
[AWS]: Uploaded object to s3://pitch-ml/epidemiology/subjects/641927/demographics.csv
Successfully fetched and saved data for player ID 641927.
[AWS]: Uploaded object to s3://pitch-ml/epidemiology/subjects/596106/demographics.csv
Successfully fetched and saved data for player ID 

In [37]:
# concatenate all bios and save to cohort folder
full_bios_df = pd.DataFrame(full_bios)
aws_connection.upload_to_s3(
    full_bios_df,
    'epidemiology/cohorts/injured/statcast_bios.csv'
)

[AWS]: Uploaded object to s3://pitch-ml/epidemiology/cohorts/injured/statcast_bios.csv


$\textbf{Close AWS Connection}$

In [39]:
aws_connection.close()

[AWS]: Database connection closed.
[AWS]: SSH tunnel stopped.
