ANN Final Project
Authors: Caleb Johnson, Gabe Schwartz, Evan Kates
Data Processing Module

In [1]:
# Gather player data for training
from nba_api.stats.endpoints import playercareerstats
from nba_api.stats.static import players
import pandas as pd
import time

all_players = players.get_active_players()
player_data = []

print("Gathering player data for...")
for p in all_players:
    try:
        player_id = p['id']
        full_name = p['full_name']
        stats = playercareerstats.PlayerCareerStats(player_id=player_id)

        # take only players with 2 years in nba
        df = stats.get_data_frames()[0]
        df = df[df['LEAGUE_ID'] == '00']
        if len(df) < 2:
            continue
        
        # gather data from rookie and sophomore seasons
        rookie = df.iloc[0]
        sophomore = df.iloc[1]

        stat_fields = [
            'PLAYER_AGE', 'GP', 'GS', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A',
            'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL',
            'BLK', 'TOV', 'PF', 'PTS'
        ]

        data = {'PLAYER_ID': player_id, 'PLAYER_NAME': full_name}

        for field in stat_fields:
            data[f'{field}_r'] = rookie[field] / rookie['GP'] if field not in ['PLAYER_AGE', 'GP', 'GS', 'FG_PCT', 'FG3_PCT', 'FT_PCT'] else rookie[field]
            data[f'{field}_s'] = sophomore[field] / sophomore['GP'] if field not in ['PLAYER_AGE', 'GP', 'GS', 'FG_PCT', 'FG3_PCT', 'FT_PCT'] else sophomore[field]

        player_data.append(data)

        print(f"Success for player: {p['full_name']}")
        time.sleep(0.6)

    except Exception as e:
        print(f"Failed for player {p['full_name']}: {e}")



Gathering player data for...
Success for player: Precious Achiuwa
Success for player: Steven Adams
Success for player: Bam Adebayo
Success for player: Ochai Agbaji
Success for player: Santi Aldama
Success for player: Nickeil Alexander-Walker
Success for player: Grayson Allen
Success for player: Jarrett Allen
Success for player: Jose Alvarado
Success for player: Kyle Anderson
Success for player: Giannis Antetokounmpo
Success for player: Cole Anthony
Success for player: OG Anunoby
Success for player: Deni Avdija
Success for player: Deandre Ayton
Success for player: Marvin Bagley III
Success for player: Patrick Baldwin Jr.
Success for player: LaMelo Ball
Success for player: Lonzo Ball
Success for player: Mo Bamba
Success for player: Paolo Banchero
Success for player: Desmond Bane
Success for player: Dalano Banton
Success for player: Dominick Barlow
Success for player: Harrison Barnes
Success for player: Scottie Barnes
Success for player: RJ Barrett
Success for player: Charles Bassey
Succe

In [2]:
# Export data to csv
df = pd.DataFrame(player_data)
df.dropna(inplace=True)
df.to_csv('training.csv')

In [5]:
# Gather player data for predictions
from nba_api.stats.endpoints import playercareerstats
from nba_api.stats.static import players
import pandas as pd
import time

all_players = players.get_active_players()
player_data = []

print("Gathering player data for...")
for p in all_players:
    try:
        player_id = p['id']
        full_name = p['full_name']
        stats = playercareerstats.PlayerCareerStats(player_id=player_id)

        # take only players with at least one year in the NBA
        df = stats.get_data_frames()[0]
        df = df[df['LEAGUE_ID'] == '00']
        if len(df) < 1:
            continue
        
        # gather data from rookie and sophomore seasons
        rookie = df.iloc[0]

        stat_fields = [
            'PLAYER_AGE', 'GP', 'GS', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A',
            'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL',
            'BLK', 'TOV', 'PF', 'PTS'
        ]

        data = {'PLAYER_ID': player_id, 'PLAYER_NAME': full_name}

        for field in stat_fields:
            data[f'{field}_r'] = rookie[field] / rookie['GP'] if field not in ['PLAYER_AGE', 'GP', 'GS', 'FG_PCT', 'FG3_PCT', 'FT_PCT'] else rookie[field]

        player_data.append(data)

        print(f"Success for player: {p['full_name']}")
        time.sleep(0.6)

    except Exception as e:
        print(f"Failed for player {p['full_name']}: {e}")

Gathering player data for...


KeyboardInterrupt: 

In [None]:
# Export prediction data to csv
df = pd.DataFrame(player_data)
df.dropna(inplace=True)
df.to_csv('prediction.csv')