In [3]:
import json
import logging
from datetime import datetime
import pandas as pd
from dotenv import load_dotenv
from utils import (
    get_game_logs,
    process_game_logs,
    get_boxscores,
    process_boxscores,
    store_games_in_rds,
    store_boxscores_in_rds
)

# Configure logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)

# Load the .env file
load_dotenv()

def lambda_handler(event, context):
    """
    AWS Lambda handler function for the initial data ingestion.
        - Fetches game logs from the season start date up to yesterday.
        - Fetches and stores boxscores for all games retrieved.
    
    Parameters:
        - event (dict): Event data that triggers the Lambda function.
        - context (object): Provides runtime information to the handler.
    
    Returns:
        - dict: Status message indicating success or failure.
    """
    try:
        logger.info("Starting initial data load...")

        # Set date range for initial fetching
        season_start_date = datetime.date(2024,10,1)
        season_start_date_str = season_start_date.strftime('%m/%d/%Y')
        yesterday = datetime.now() - pd.Timedelta(days=1)
        yesterday_str = yesterday.strftime('%m/%d/%Y')
        
        # Fetch game logs from yesterday
        logger.info(f"Fetching game logs from {season_start_date_str} to {yesterday_str}")

        game_logs_df = get_game_logs(season_start_date_str, yesterday_str)
        if not game_logs_df.empty:
            # Process the retrieved game logs
            clean_game_logs_df = process_game_logs(game_logs_df)
            logger.info(f"Retrieved and processed {len(clean_game_logs_df)} games.")

            # Store game logs in RDS
            store_games_in_rds(game_logs_df)
            logger.info("Game logs data successfully stored in RDS.")

            # Fetch, process, and store boxscores for each unique game
            boxscores_list = []
            unique_games = clean_game_logs_df['game_id'].unique()
            for game_id in unique_games:
                try:
                    boxscore_df = get_boxscores(game_id)
                    if not boxscore_df.empty:
                        boxscores_list.append(boxscore_df)
                        logger.info(f"Box scores for game_id {game_id} retrieved successfully.")
                except Exception as box_e:
                    logger.error(f"Failed to process box scores for game_id {game_id}: {str(box_e)}")
                    # TODO Store failure and retry 
                    continue

            if boxscores_list:
                boxscores_df = pd.concat(boxscores_list, ignore_index=True)
                clean_boxscores_df = process_boxscores(boxscores_df)
                store_boxscores_in_rds(clean_boxscores_df)
                logger.info(f"Stored {len(clean_boxscores_df)} boxscores in RDS successfully.")
            else:
                logger.info("No boxscores found to store.")

        else:
            logger.info("No games found for the specified date range.")

        logger.info("Initial data load completed successfully.")

        return {
            'statusCode': 200,
            'body': json.dumps('Initial data ingestion complete and stored successfully.')
        }

    except Exception as e:
        logger.error(f"Error in initial_load_handler: {str(e)}")
        return {
            'statusCode': 500,
            'body': json.dumps(f"Initial data ingestion failed: {str(e)}")
        }

# Entry Point (for local testing)
if __name__ == "__main__":
    lambda_handler({}, {})

season_id              int64
team_id                int64
game_id               object
game_date     datetime64[ns]
matchup               object
wl                    object
min                    int64
pts                    int64
fgm                    int64
fga                    int64
fg3m                   int64
fg3a                   int64
ftm                    int64
fta                    int64
oreb                   int64
dreb                   int64
reb                    int64
ast                    int64
stl                    int64
blk                    int64
tov                    int64
pf                     int64
plus_minus           float64
dtype: object
GAME_ID: 0022401202
       GAME_ID     TEAM_ID TEAM_ABBREVIATION TEAM_CITY  PLAYER_ID  \
0   0022401202  1610612737               ATL   Atlanta    1630552   
1   0022401202  1610612737               ATL   Atlanta    1642258   
2   0022401202  1610612737               ATL   Atlanta     203991   
3   0022401202  1610612