In [19]:
import json
import boto3
import psycopg2
from psycopg2 import extras
from nba_api.stats.endpoints import leaguegamefinder, boxscoretraditionalv2
from nba_api.stats.static import teams, players
import os
from datetime import datetime, timedelta
import pandas as pd
import logging

# Configure logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)

def lambda_handler(event, context):
    """
    AWS Lambda handler function to fetch NBA team, player, and game data, process it,
    and store it in Amazon RDS.
    
    Parameters:
    - event (dict): Event data that triggers the Lambda function.
    - context (object): Provides runtime information to the handler.
    
    Returns:
    - dict: Status message indicating success or failure.
    """
    
    try:
        # Determine if this is an initial load or incremental update
        is_initial_load = event.get('initial_load', False)
        
        if is_initial_load:
            # Fetch and store static data
            teams_df = fetch_teams()
            # store_teams_in_rds(teams_df)
            
            players_df = fetch_players()
            # store_players_in_rds(players_df)
            logger.info("Static data (Teams and Players) stored successfully")
        
        # Determine the date for game fetching
        yesterday = datetime.now() - timedelta(days=1)
        yesterday_str = yesterday.strftime('%m/%d/%Y')
        logger.info(f"Fetching NBA data up to {yesterday_str}")
        
        # Initialize the NBA Game Finder
        gamefinder = leaguegamefinder.LeagueGameFinder(
            league_id_nullable='00',            # '00' corresponds to the NBA
            season_nullable='2024-25',          # Get current season
            date_to_nullable=yesterday_str
        )
        
        # Fetch game data as a Pandas DataFrame
        game_logs = gamefinder.get_data_frames()[0]
        game_logs = process_game_logs(game_logs)
        print(game_logs)
        logger.info(f"Retrieved {len(game_logs)} games from NBA API")
#         # Process the fetched game data
#         teams_df, games_df, game_stats_df = process_games(games)
#         logger.info(f"Processed games data: {len(game_stats_df)} records after filtering")
        
#         # Store the processed data into Amazon RDS
#         store_game_stats_in_rds(game_stats_df)
#         store_games_in_rds(games_df)
#         logger.info("Game logs data successfully stored in Amazon RDS")
        
#         # Fetch and store box scores for each game
#         for game_id in games_df['game_id']:
#             try:
#                 box_scores = fetch_box_scores(game_id)
#                 box_scores_clean = process_box_scores(box_scores)
#                 store_player_stats_in_rds(box_scores_clean)
#                 logger.info(f"Box scores for game_id {game_id} stored successfully")
#             except Exception as box_e:
#                 logger.error(f"Failed to process box scores for game_id {game_id}: {str(box_e)}")
#                 continue    # Continue processing games
        
        return {
            'statusCode': 200,
            'body': json.dumps('Data ingestion complete and stored successfully.')
        }
    
    except Exception as e:
        logger.error(f"Error in lambda_handler: {str(e)}")
        return {
            'statusCode': 500,
            'body': json.dumps(f"Data ingestion failed: {str(e)}")
        }
    
def fetch_teams():
    """
    Fetches static team data from nba_api and returns it as a DataFrame.
    
    Parameters:
    - None
    
    Returns:
    - pd.DataFrame: Pandas dataframe comtaining team data
    """
    
    try:
        # Fetch teams data
        nba_teams = teams.get_teams()
        teams_df = pd.DataFrame(nba_teams)
        
        # Select relevant columns
        teams_df = teams_df[['id', 'full_name', 'nickname', 'abbreviation', 'city', 'state', 'year_founded']]
        teams_df.rename(columns={
            'id': 'team_id',
            'abbreviation': 'team_abbreviation',
            'full_name': 'team_name'
        }, inplace=True)
        
        logger.info(f"Fetched {len(teams_df)} teams from nba_api")
        
        return teams_df
    
    except Exception as e:
        logger.error(f"Error in fetch_and_store_teams: {str(e)}")
        raise e
        
def process_teams(df):
    """
    Processes static team data and returns the processed data as a dataframe
    
    Parameters:
    - pd.DataFrame: A Pandas dataframe containing raw team data
    
    Returns:
    - pd.DataFrame: A Pandas dataframe contaning processed team data
    """
    
    try:
        processed_df = df[['id', 'full_name', 'nickname', 'abbreviation', 'city', 'state', 'year_founded']]
        processed_df.rename(columns={
            'id': 'team_id',
            'abbreviation': 'team_abbreviation',
            'full_name': 'team_name'
        }, inplace=True)
        
        # Logging
        logger.info(f'Processed team data.')
        
        return players_df
    
    except Exception as e:
        logger.error(f'Error in process_teams: {str(e)}')
        raise e
    
        
def fetch_players():
    """
    Fetches static player data from nba_api and returns it as a Pandas dataframe.
    
    Parameters:
    - None
    
    Returns:
    - pd.DataFrame: a Pandass dataframe containing player data
    """
    
    try:
        # Fetch players data
        nba_players = players.get_players()
        players_df = pd.DataFrame(nba_players)
        
        # Process player data
        processed_players_df = process_players(players_df)
        logger.info(f"Fetched {len(players_df)} players from nba_api")
        
        return processed_players_df
    
    except Exception as e:
        logger.error(f"Error in fetch_and_store_players: {str(e)}")
        raise e
        
def process_players(df):
    """
    Processes static player data and returns the processed data as a Pandas dataframe.
    
    Paramters:
    - pd.DataFrame: A Pandas dataframe containing raw player data
    
    Returns:
    - pd.DataFrame: A Pandas dataframe containing processed player data
    """
    
    try:
        df_processed = df[df['is_active'] == True]    # Get active players
        df_processed.drop('is_active', axis=1, inplace=True)
        
        df_processed.rename(columns={
            'id': 'player_id',
            'full_name': 'player_name'
        }, inplace=True)
        
        # Logging
        logger.info('Processed player data.')
        
        return df_processed
    
    except Exception as e:
        logger.error(f'Error in process_players: {str(e)}')
        raise e
        
def process_game_logs(df):
    """
    Processes raw boxscore data and returns the processed data as a Pandas dataframe.
    
    Paramters:
    - pd.DataFrame: a Pandas dataframe containing raw box score data.
    
    Returns:
    - pd.DataFrame: A Pandas dataframe containing processed player data.
    """
    
    try:
        # Drop Unnecessary columns
        processed_df = df.drop(['TEAM_ABBREVIATION', 'TEAM_NAME'], axis=1)
        
        return processed_df
    
    except Exception as e:
        logger.error(f'Error in process_boxscores: {str(e)}')
        raise e


event = {
    'initial_load': True
}

lambda_handler(event, {})

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_processed.drop('is_active', axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_processed.rename(columns={


    SEASON_ID     TEAM_ID     GAME_ID   GAME_DATE      MATCHUP WL  MIN  PTS  \
0       22024  1610612740  0022400330  2024-12-05  NOP vs. PHX  W  241  126   
1       22024  1610612760  0022400328  2024-12-05    OKC @ TOR  W  240  129   
2       22024  1610612742  0022400326  2024-12-05    DAL @ WAS  W  239  137   
3       22024  1610612759  0022400331  2024-12-05  SAS vs. CHI  L  240  124   
4       22024  1610612766  0022400327  2024-12-05    CHA @ NYK  L  240  101   
..        ...         ...         ...         ...          ... ..  ...  ...   
805     12024  1610612738  0012400001  2024-10-04    BOS @ DEN  W  240  107   
806     12024  1610612747  0012400003  2024-10-04  LAL vs. MIN  L  241  107   
807     12024  1610612762  0012400002  2024-10-04  UTA vs. NZB  W  238  116   
808     12024  1610612750  0012400003  2024-10-04    MIN @ LAL  W  240  124   
809     12024       15020  0012400002  2024-10-04    NZB @ UTA  L  239   87   

     FGM  FGA  ...  FT_PCT  OREB  DREB  REB  AST  S

{'statusCode': 200,
 'body': '"Data ingestion complete and stored successfully."'}