# DAT 205 Project 
## By Dennis Hung
## Version 1
## Last updated 2021-03-06

##  Reference

#### How to Get NBA Data Using the nba_api Python Module (Beginner). Retrieved from Plyaing Numbers: 

https://www.playingnumbers.com/2019/12/how-to-get-nba-data-using-the-nba_api-python-module-beginner/

#### Patel, S. (2020, August 19). swar / nba_api. Retrieved from GitHub: 

https://github.com/swar/nba_api/blob/master/docs/table_of_contents.md

#### Issues

https://github.com/swar/nba_api/issues/124



# NBA API: playergamelogs

# 2021-03-05 Current pulls the play game stats for each specified season as 1 big file

# Section 0: Function definitions

hms_string(sec_elapsed)


In [1]:
# Nicely formatted time string
def hms_string(sec_elapsed):
    h = int(sec_elapsed / (60 * 60))
    m = int((sec_elapsed % (60 * 60))/60)
    s = sec_elapsed % 60
    return "{}:{:>02}:{:>05.2f}".format(h,m,s)



# Section 1: Import libraries

In [2]:
import pandas as pd
import numpy as np
import time
from nba_api.stats.endpoints import playergamelogs
import requests


start_time = time.time()

# Initialize variables if there is any debugging required
# Insert following line and activate the debugging.
# # VALIDATION CODE 
# if debug_active == 'yes':
# 
# Use "display(df)"" if the result command is "df" to retain the same format

debug_active = 'no'

# Section 2: Configuration of variables

Must manually set the following variables

gameTypeListed as one of the following: 'Pre Season', 'Regular Season', 'Playoffs'

seasonsListed for the game season in this format '2015-16'. Have at least 2 values

In [3]:
# Update player stats per game each season

gameTypeListed = ['Pre Season', 'Regular Season', 'Playoffs']
# gameTypeListed = ['Pre Season']
# gameTypeListed = ['Regular Season']
# gameTypeListed = ['Playoffs']

# Option 1: For all currently possible seasons
seasonsListed = ['1946-47', '1947-48', '1948-49', '1949-50'
, '1950-51', '1951-52', '1952-53', '1953-54', '1954-55', '1955-56', '1956-57', '1957-58', '1958-59', '1959-60'
, '1960-61', '1961-62', '1962-63', '1963-64', '1964-65', '1965-66', '1966-67', '1967-68', '1968-69', '1969-70'
, '1970-71', '1971-72', '1972-73', '1973-74', '1974-75', '1975-76', '1976-77', '1977-78', '1978-79', '1979-80'
, '1980-81', '1981-82', '1982-83', '1983-84', '1984-85', '1985-86', '1986-87', '1987-88', '1988-89', '1989-90'
, '1990-91', '1991-92', '1992-93', '1993-94', '1994-95', '1995-96', '1996-97', '1997-98', '1998-99', '1999-00'
, '2000-01', '2001-02', '2002-03', '2003-04', '2004-05', '2005-06', '2006-07', '2007-08', '2008-09', '2009-10'
, '2010-11', '2011-12', '2012-13', '2013-14', '2014-15', '2015-16', '2016-17', '2017-18', '2018-19', '2019-20'
, '2020-21']

# Testing only 
# seasonsListed = ['2015-16', '2016-17']

seasonStart = seasonsListed[0]
seasonEnd = seasonsListed[-1]

# Request info for each season in the list
df_gamelogs_player = []
countFirstYear = 0

# Section 3: Data Pull

In [4]:
for seasonSelected in seasonsListed: 

    # For gameType in gameTypeListed:        
    for gameType in gameTypeListed:
        # Start counter for processing the current season
        start_time_counter_Season = time.time() 
        
        gamelogs_players = playergamelogs.PlayerGameLogs(season_nullable = seasonSelected, season_type_nullable = gameType)
        df_gamelogs_players_currSeason = pd.DataFrame(gamelogs_players.get_data_frames()[0])
        
        # Insert gameType column and list as one of the values in gameTypeListed
        df_gamelogs_players_currSeason['Game_Type'] = gameType
        if countFirstYear == 0:
            df_gamelogs_players = df_gamelogs_players_currSeason
            countFirstYear = 1
        else:
            # df_gamelogs_players = np.concatenate([df_gamelogs_players, df_gamelogs_players_currSeason])
            df_gamelogs_players = pd.concat([df_gamelogs_players, df_gamelogs_players_currSeason],ignore_index=True)
            # df_gamelogs_players = df_gamelogs_players.append(df_gamelogs_players_currSeason)

        time_took_Season = time.time() - start_time_counter_Season
        print("")
        print("Processed: Season =", seasonSelected, "| Game_Type =", gameType, "| ",  f"Process time: {hms_string(time_took_Season)}")
        # print(f"Process time: {hms_string(time_took_Season)}")
        time.sleep(2) 


Processed: Season = 1946-47 | Game_Type = Pre Season |  Process time: 0:00:00.67

Processed: Season = 1946-47 | Game_Type = Regular Season |  Process time: 0:00:01.19

Processed: Season = 1946-47 | Game_Type = Playoffs |  Process time: 0:00:00.68

Processed: Season = 1947-48 | Game_Type = Pre Season |  Process time: 0:00:00.64

Processed: Season = 1947-48 | Game_Type = Regular Season |  Process time: 0:00:01.27

Processed: Season = 1947-48 | Game_Type = Playoffs |  Process time: 0:00:00.77

Processed: Season = 1948-49 | Game_Type = Pre Season |  Process time: 0:00:00.58

Processed: Season = 1948-49 | Game_Type = Regular Season |  Process time: 0:00:01.55

Processed: Season = 1948-49 | Game_Type = Playoffs |  Process time: 0:00:00.66

Processed: Season = 1949-50 | Game_Type = Pre Season |  Process time: 0:00:00.62

Processed: Season = 1949-50 | Game_Type = Regular Season |  Process time: 0:00:01.64

Processed: Season = 1949-50 | Game_Type = Playoffs |  Process time: 0:00:00.94

Process

# Section 4: Export data to CSV

In [5]:
#Save the data to the same folder that contains the notebook, example will be named 'Jamal Murray2019.csv'
# Setup file name for CSV
filename = './HistoricalGameLogs_'
# filename = filename + seasonStart + '_to_' + seasonEnd + '_' + gameType + '.csv'
filename = filename + seasonStart + '_to_' + seasonEnd + '_ALL' + '.csv'

df_gamelogs_players.to_csv(filename)

time_took = time.time() - start_time
print("")
print("")
print("PROCESSING COMPLETE")
print(f"Total Runtime: {hms_string(time_took)}")



PROCESSING COMPLETE
Total Runtime: 0:20:44.31


# End of Code