In [1]:
import pandas as pd
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

def get_nba_db():
    """
    Creates a connection to the NBA PostgreSQL database.
    """
    host = os.getenv("NBA_DB_HOST")
    port = os.getenv("NBA_DB_PORT")
    database = os.getenv("NBA_DB_NAME")
    user = os.getenv("NBA_DB_USER")
    password = os.getenv("NBA_DB_PASSWORD")
    
    connection_string = f"postgresql://{user}:{password}@{host}:{port}/{database}"
    return create_engine(connection_string)

def query(sql):
    """
    Executes SQL query against the NBA database and returns results as a pandas DataFrame.
    """
    engine = get_nba_db()
    return pd.read_sql(sql, engine)

# Function to display available tables
def list_tables():
    """Lists all tables available in the NBA database."""
    tables = query("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'")
    return tables['table_name'].tolist()

In [2]:
list_tables()

['common_player_info',
 'draft_combine_stats',
 'draft_history',
 'game',
 'game_info',
 'game_summary',
 'games',
 'inactive_players',
 'leagueschedule24_25',
 'leagueschedule25_26',
 'line_score',
 'officials',
 'other_stats',
 'play_by_play',
 'player',
 'playeroftheweek',
 'playeroftheweek2',
 'players',
 'playerstatistics',
 'team',
 'team_details',
 'team_history',
 'team_info_common',
 'teamhistories',
 'teamstatistics']

In [3]:
query("""
    SELECT * FROM playerstatistics LIMIT 10
""")

Unnamed: 0,firstname,lastname,personid,gameid,gamedate,playerteamcity,playerteamname,opponentteamcity,opponentteamname,gametype,...,threepointerspercentage,freethrowsmade,freethrowsattempted,freethrowspercentage,reboundsoffensive,reboundsdefensive,reboundstotal,foulspersonal,turnovers,plusminuspoints
0,Jamal,Murray,1627750.0,22500248.0,2025-11-17T21:00:00Z,Denver,Nuggets,Chicago,Bulls,,...,0.455,5.0,5.0,1.0,0.0,11.0,11.0,3.0,2.0,-1.0
1,Bruce,Brown,1628971.0,22500248.0,2025-11-17T21:00:00Z,Denver,Nuggets,Chicago,Bulls,,...,0.0,0.0,0.0,0.0,0.0,2.0,2.0,1.0,0.0,-17.0
2,Jevon,Carter,1628975.0,22500248.0,2025-11-17T21:00:00Z,Chicago,Bulls,Denver,Nuggets,,...,0.5,0.0,0.0,0.0,1.0,3.0,4.0,2.0,1.0,20.0
3,Kevin,Huerter,1628989.0,22500248.0,2025-11-17T21:00:00Z,Chicago,Bulls,Denver,Nuggets,,...,0.444,2.0,2.0,1.0,0.0,2.0,2.0,0.0,1.0,-21.0
4,Jalen,Pickett,1629618.0,22500248.0,2025-11-17T21:00:00Z,Denver,Nuggets,Chicago,Bulls,,...,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,9.0
5,Cameron,Johnson,1629661.0,22500248.0,2025-11-17T21:00:00Z,Denver,Nuggets,Chicago,Bulls,,...,0.714,2.0,2.0,1.0,2.0,2.0,4.0,0.0,0.0,13.0
6,Isaac,Okoro,1630171.0,22500248.0,2025-11-17T21:00:00Z,Chicago,Bulls,Denver,Nuggets,,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,-11.0
7,Patrick,Williams,1630172.0,22500248.0,2025-11-17T21:00:00Z,Chicago,Bulls,Denver,Nuggets,,...,0.25,2.0,2.0,1.0,1.0,1.0,2.0,2.0,0.0,4.0
8,Jalen,Smith,1630188.0,22500248.0,2025-11-17T21:00:00Z,Chicago,Bulls,Denver,Nuggets,,...,0.5,1.0,1.0,1.0,1.0,7.0,8.0,2.0,1.0,15.0
9,Zeke,Nnaji,1630192.0,22500248.0,2025-11-17T21:00:00Z,Denver,Nuggets,Chicago,Bulls,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
# Minutes per game

sql_stats = """
SELECT 
    personid,
    numminutes,
    gamedate
FROM playerstatistics
WHERE gamedate >= '2024-01-01'
"""
stats = query(sql_stats)
print("Loaded stats:", stats.shape)


stats['gamedate'] = pd.to_datetime(stats['gamedate'], errors='coerce', utc=True)
stats['gamedate'] = stats['gamedate'].dt.tz_localize(None)

stats = stats.dropna(subset=['gamedate'])


stats = stats.set_index('gamedate')


weekly_minutes = (
    stats
    .groupby(['personid', pd.Grouper(freq='W')])['numminutes']
    .mean()
    .reset_index()
    .rename(columns={'numminutes': 'weekly_minutes'})
)

print("Weekly minutes:", weekly_minutes.shape)
weekly_minutes.head()


Loaded stats: (67824, 3)
Weekly minutes: (3401, 3)


Unnamed: 0,personid,gamedate,weekly_minutes
0,2544.0,2025-10-05,
1,2544.0,2025-10-19,
2,42824.0,2025-10-05,
3,101108.0,2025-10-12,18.885
4,101108.0,2025-10-19,18.26


In [None]:
# Wins vs all-nba players

import pandas as pd

first = pd.read_csv("all-nba-first-team.csv")
second = pd.read_csv("all-nba-second-team.csv")
third = pd.read_csv("all-nba-third-team.csv")
stats = pd.read_csv("player-statistics.csv")

stats.rename(columns={"gameId": "game_id"}, inplace=True)

STATS_THIS_SEASON = stats[stats["gameDate"].str.startswith("2024-") | 
                          stats["gameDate"].str.startswith("2025-")].copy()



all_nba = pd.concat([first, second, third], ignore_index=True)
all_nba_player_ids = set(all_nba["player_id"].unique())



rosters = (
    STATS_THIS_SEASON
    .groupby(["game_id", "playerteamName"])["player_id"]
    .apply(set)
    .reset_index()
    .rename(columns={"playerteamName": "team_name", "player_id": "roster"})
)



df = STATS_THIS_SEASON.merge(
    rosters.rename(columns={"team_name": "playerteamName",
                            "roster": "player_roster"}),
    on=["game_id", "playerteamName"],
    how="left"
)

df = df.merge(
    rosters.rename(columns={"team_name": "opponentteamName",
                            "roster": "opponent_roster"}),
    on=["game_id", "opponentteamName"],
    how="left"
)


def opponent_has_all_nba(roster):
    if isinstance(roster, set):
        return int(len(roster.intersection(all_nba_player_ids)) > 0)
    return 0

df["opponent_has_all_nba"] = df["opponent_roster"].apply(opponent_has_all_nba)


df["wins_vs_team_with_all_nba_player"] = (
    (df["win"] == 1) & (df["opponent_has_all_nba"] == 1)
).astype(int)



df["player_name"] = df["full_name"]   



output = df[[
    "player_id",
    "player_name",
    "game_id",
    "playerteamName",
    "opponentteamName",
    "win",
    "opponent_has_all_nba",
    "wins_vs_team_with_all_nba_player"
]].sort_values("game_id")

display(output.head(40))


  stats = pd.read_csv("player-statistics.csv")


Unnamed: 0,player_id,player_name,game_id,playerteamName,opponentteamName,win,opponent_has_all_nba,wins_vs_team_with_all_nba_player
45333,204001,Kristaps Porzingis,12400001,Celtics,Nuggets,1,1,1
45309,1631128,Christian Braun,12400001,Nuggets,Celtics,0,1,0
45308,1631124,Julian Strawther,12400001,Nuggets,Celtics,0,1,0
45307,1631120,JD Davison,12400001,Celtics,Nuggets,1,1,1
45306,1630573,Sam Hauser,12400001,Celtics,Nuggets,1,1,1
45305,1630531,Jaden Springer,12400001,Celtics,Nuggets,1,1,1
45304,1630214,Xavier Tillman,12400001,Celtics,Nuggets,1,1,1
45303,1630202,Payton Pritchard,12400001,Celtics,Nuggets,1,1,1
45301,1629674,Neemias Queta,12400001,Celtics,Nuggets,1,1,1
45310,1631199,Ron Harper Jr.,12400001,Celtics,Nuggets,1,1,1
