# **1. Import Libraries**

In [8]:
import requests
import numpy as np
import pandas as pd
import time
import joblib

# **2. Endpoints Research**

## **Players Related Info:**

- <pre><b>CommonAllPlayers</b>             # Get all the player' ids from all seasons
https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/commonallplayers.md
</pre>

- <pre><b>CommonPlayerInfo</b>             # Extra information related to the players
https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/commonplayerinfo.md
</pre>

- <pre><b>PlayerAwards</b>                 # Check correlation between teams and allstar players
https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/playerawards.md
</pre>

- <pre><b>LeagueDashPlayerBioStats</b>     # Check correlation between physical aspects and performance
https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/leaguedashplayerbiostats.md
</pre>

## **Teams Related Info:**

- <pre><b>CommonTeamRoster</b>             # Check each player position
https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/commonteamroster.md
</pre>

- <pre><b>TeamDetails</b>                  # Check team awards
https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/teamdetails.md
</pre>

- <pre><b>CumeStatsTeam</b>                # Explore Raptors stats
https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/cumestatsteam.md
</pre>

## **Games Related Info:**

- <pre><b>BoxScoreSummaryV2</b>            # Check correlation between winning and points per quarter
https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/boxscoresummaryv2.md
</pre>

- CumeStatsTeam -> https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/cumestatsteam.md
- LeagueDashTeamStats -> https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/leaguedashteamstats.md

In [9]:
from nba_api.stats.endpoints import commonallplayers, commonplayerinfo, playerawards, leaguedashplayerbiostats, teamdetails, commonteamroster, boxscoresummaryv2

# **3. Functions**

## Testing leaguedash_season Function

In [10]:
def leaguedash_season(season, leaguedash_df):

    try:
        leaguedash_info = leaguedashplayerbiostats.LeagueDashPlayerBioStats(league_id='00', season=season)
        df_append = leaguedash_info.get_data_frames()[0]
        df_append['SEASON'] = season
        leaguedash_df = pd.concat([leaguedash_df, df_append], ignore_index=True)

        t = time.localtime()
        current_time = time.strftime("%H:%M:%S", t)

        print(f'Data from season {season} has been retrieved successfully at {current_time}')
        
    except:
        t = time.localtime()
        current_time = time.strftime("%H:%M:%S", t)

        print(f'ERROR: Failed to retrieve data from season {season} at {current_time}')
        pass
    
    return leaguedash_df

In [11]:
def leaguedash_allseasons(fisrt_season=1996, last_season=2023, api_wait=30):

    seasons = [f'{x}-{str(x+1)[2:]}' for x in range(fisrt_season,last_season)]
    
    leaguedash_df = pd.DataFrame()

    for season in seasons:
        leaguedash_df = leaguedash_season(season, leaguedash_df)
        print(f'Sleeping for {api_wait} seconds...')
        time.sleep(api_wait)
    
    all_seasons = leaguedash_df.SEASON.unique()
    missing = [x for x in seasons if x not in all_seasons]

    if missing:
        print(f'Total missing seasons: {len(missing)}')
        print(f'Starting the api`s recall for the following missing seasons: \n{missing}')

    while missing:

        leaguedash_df = leaguedash_season(missing[0], leaguedash_df)
        print(f'Sleeping for {api_wait} seconds...')
        time.sleep(api_wait)

        all_seasons = leaguedash_df.SEASON.unique()
        missing = [x for x in seasons if x not in all_seasons]
    
    leaguedash_df.sort_values('SEASON', inplace=True, ignore_index=True)
    
    return leaguedash_df


In [5]:
#leaguedash_df = leaguedash_allseasons()

## Testing all_playersids Function

In [6]:
def all_playersids():
    
    all_players_info = commonallplayers.CommonAllPlayers(league_id='00')
    all_players_df = all_players_info.get_data_frames()[0]
    all_players = all_players_df.PERSON_ID

    return all_players

## Testing awards_perplayer Function

In [7]:
def awards_perplayer(player_id, player_awards_df, missing, mode=['first','recall']):

    if mode=='first':
        try:
            player_awards_info = playerawards.PlayerAwards(player_id=player_id)
            df_append = player_awards_info.get_data_frames()[0]
            player_awards_df = pd.concat([player_awards_df, df_append], ignore_index=True)

        except:
            t = time.localtime()
            current_time = time.strftime("%H:%M:%S", t)

            missing.append(player_id)

            print(f'ERROR: Failed to retrieve data from player {player_id} at {current_time}.\nAdded to the missing list.')
            pass
        
        return player_awards_df, missing

    elif mode=='recall':

        player_awards_info = playerawards.PlayerAwards(player_id=player_id)
        df_append = player_awards_info.get_data_frames()[0]
        player_awards_df = pd.concat([player_awards_df, df_append], ignore_index=True)
        
        return player_awards_df, missing

In [8]:
def awards_allplayers(all_players=None, attempts=5):

    if all_players:
        pass
    else:
        all_players = all_playersids()

    size = len(all_players)
    
    player_awards_df = pd.DataFrame()
    missing = []    

    for i, player_id in enumerate(all_players):
        player_awards_df, missing = awards_perplayer(player_id, player_awards_df, missing, mode='first')
        print(f'Progress --> {i+1}/{size}')

    if missing:

        size = len(missing)

        print(f'Total missing players: {size}')
        print(f'Starting the api`s recall for the following missing players: \n{missing}')

        for i, player_id in enumerate(missing):
            
            for attempt in range(0, attempts):

                print(f'Attempt --> {attempt+1}')

                try:
                    player_awards_df, missing = awards_perplayer(player_id, player_awards_df, missing, mode='recall')
                    print(f'Progress --> {i+1}/{size}')

                    str_error = None

                except Exception as e:

                    t = time.localtime()
                    current_time = time.strftime("%H:%M:%S", t)

                    print(f'ERROR: Failed to retrieve data from player {player_id} at {current_time}')

                    str_error = str(e)

                if str_error:
                    pass

                else:
                    break

        df_ids = player_awards_df.PERSON_ID.unique()
        still_missing = [x for x in all_players if x not in df_ids]

    return player_awards_df, still_missing

In [None]:
player_awards_df, missing = awards_allplayers(attempts=5)

In [None]:
player_awards_df

In [None]:
missing

## Testing manage_data Function

In [2]:
def manage_data(filename, df=pd.DataFrame(), action=['store','load']):

    path = f'../dataframes/{filename}.pkl'

    if action == 'store':
        if df.empty:
            print('You must pass a Dataframe to be stored.')
        else:
            joblib.dump(df, path)

    elif action == 'load':
        df = joblib.load(path)
        return df

In [None]:
manage_data(filename='playersawards', df=player_awards_df, action='store')