In [33]:
import pandas as pd
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

def get_nba_db():
    """
    Creates a connection to the NBA PostgreSQL database.
    """
    host = os.getenv("NBA_DB_HOST")
    port = os.getenv("NBA_DB_PORT")
    database = os.getenv("NBA_DB_NAME")
    user = os.getenv("NBA_DB_USER")
    password = os.getenv("NBA_DB_PASSWORD")
    
    connection_string = f"postgresql://{user}:{password}@{host}:{port}/{database}"
    return create_engine(connection_string)

def query(sql):
    """
    Executes SQL query against the NBA database and returns results as a pandas DataFrame.
    """
    engine = get_nba_db()
    return pd.read_sql(sql, engine)

# Function to display available tables
def list_tables():
    """Lists all tables available in the NBA database."""
    tables = query("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'")
    return tables['table_name'].tolist()

In [None]:
list_tables()

In [34]:
query("""
    SELECT * FROM playerstatistics LIMIT 10
""")

Unnamed: 0,firstname,lastname,personid,gameid,gamedate,playerteamcity,playerteamname,opponentteamcity,opponentteamname,gametype,...,threepointerspercentage,freethrowsmade,freethrowsattempted,freethrowspercentage,reboundsoffensive,reboundsdefensive,reboundstotal,foulspersonal,turnovers,plusminuspoints
0,Kawhi,Leonard,202695.0,42400175.0,2025-04-29 22:00:00,Los Angeles,Clippers,Denver,Nuggets,Playoffs,...,0.25,3.0,7.0,0.429,2.0,7.0,9.0,2.0,1.0,-7.0
1,Aaron,Gordon,203932.0,42400175.0,2025-04-29 22:00:00,Denver,Nuggets,Los Angeles,Clippers,Playoffs,...,0.5,8.0,9.0,0.889,2.0,2.0,4.0,4.0,1.0,13.0
2,Dario,Saric,203967.0,42400175.0,2025-04-29 22:00:00,Denver,Nuggets,Los Angeles,Clippers,Playoffs,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bogdan,Bogdanovic,203992.0,42400175.0,2025-04-29 22:00:00,Los Angeles,Clippers,Denver,Nuggets,Playoffs,...,0.5,4.0,4.0,1.0,1.0,2.0,3.0,5.0,2.0,-8.0
4,Nikola,Jokic,203999.0,42400175.0,2025-04-29 22:00:00,Denver,Nuggets,Los Angeles,Clippers,Playoffs,...,0.25,4.0,5.0,0.8,1.0,9.0,10.0,1.0,2.0,18.0
5,Jaylen,Brown,1627759.0,42400115.0,2025-04-29 20:30:00,Boston,Celtics,Orlando,Magic,Playoffs,...,0.5,3.0,5.0,0.6,1.0,5.0,6.0,3.0,2.0,11.0
6,Jayson,Tatum,1628369.0,42400115.0,2025-04-29 20:30:00,Boston,Celtics,Orlando,Magic,Playoffs,...,0.8,11.0,11.0,1.0,0.0,8.0,8.0,2.0,2.0,34.0
7,Jonathan,Isaac,1628371.0,42400115.0,2025-04-29 20:30:00,Orlando,Magic,Boston,Celtics,Playoffs,...,0.0,1.0,2.0,0.5,0.0,1.0,1.0,1.0,2.0,-8.0
8,Derrick,White,1628401.0,42400115.0,2025-04-29 20:30:00,Boston,Celtics,Orlando,Magic,Playoffs,...,0.0,1.0,1.0,1.0,0.0,4.0,4.0,3.0,1.0,8.0
9,Luke,Kornet,1628436.0,42400115.0,2025-04-29 20:30:00,Boston,Celtics,Orlando,Magic,Playoffs,...,0.0,3.0,3.0,1.0,0.0,4.0,4.0,2.0,1.0,23.0


In [35]:

df = df.reset_index()

df['gamedate'] = pd.to_datetime(df['gamedate'], errors='coerce', utc=True).dt.tz_localize(None)

df = df.dropna(subset=['gamedate'])
print("gamedate dtype:", df['gamedate'].dtype)


weekly = (
    df.groupby(['personid', pd.Grouper(key='gamedate', freq='W')])
      .agg({
          'points': 'mean',
          'assists': 'mean',
          'reboundstotal': 'mean',
          'turnovers': 'mean',
          'numminutes': 'mean',
          'plusminuspoints': 'mean',
          'fieldgoalspercentage': 'mean',
          'threepointerspercentage': 'mean',
          'freethrowspercentage': 'mean'
      })
      .reset_index()
)

print(" Weekly aggregation complete:", weekly.shape)
display(weekly.head(50))


sql = """
SELECT 
    personid,
    firstname,
    lastname,
    playerteamname,
    opponentteamname,
    points,
    assists,
    reboundstotal,
    reboundsoffensive,
    reboundsdefensive,
    fieldgoalspercentage,
    threepointerspercentage,
    freethrowspercentage,
    numminutes,
    turnovers,
    foulspersonal,
    plusminuspoints,
    gamedate
FROM playerstatistics
WHERE gamedate >= '2024-01-01'
"""
df = query(sql)
print("Loaded data:", df.shape)


df = df.reset_index()   
df['gamedate'] = pd.to_datetime(df['gamedate'], errors='coerce')

df = df.dropna(subset=['gamedate'])

weekly = (
    df.groupby(['personid', pd.Grouper(key='gamedate', freq='W')])
      .agg({
          'points': 'mean',
          'assists': 'mean',
          'reboundstotal': 'mean',
          'turnovers': 'mean',
          'numminutes': 'mean',
          'plusminuspoints': 'mean',
          'fieldgoalspercentage': 'mean',
          'threepointerspercentage': 'mean',
          'freethrowspercentage': 'mean'
      })
      .reset_index()
)

print("Weekly aggregation complete:", weekly.shape)
display(weekly.head(50))


gamedate dtype: datetime64[ns]
 Weekly aggregation complete: (21411, 11)


Unnamed: 0,personid,gamedate,points,assists,reboundstotal,turnovers,numminutes,plusminuspoints,fieldgoalspercentage,threepointerspercentage,freethrowspercentage
0,2544.0,2024-01-07,23.0,7.666667,6.333333,3.333333,38.0,-7.666667,0.456667,0.259,0.422333
1,2544.0,2024-01-14,16.0,10.5,5.0,3.5,29.5,-12.5,0.372,0.2,0.619
2,2544.0,2024-01-21,25.5,6.0,7.75,3.75,33.75,9.25,0.50875,0.426,0.675
3,2544.0,2024-01-28,30.5,12.0,12.0,4.0,41.0,4.0,0.543,0.625,1.0
4,2544.0,2024-02-04,22.333333,7.666667,6.666667,1.666667,37.0,-2.0,0.512667,0.377667,0.606333
5,2544.0,2024-02-11,24.0,9.333333,5.666667,2.666667,37.0,3.666667,0.485667,0.444333,0.847333
6,2544.0,2024-02-18,25.0,8.0,1.0,4.0,30.0,9.0,0.6,0.5,1.0
7,2544.0,2024-02-25,29.0,10.5,7.0,2.5,35.0,6.5,0.6115,0.5835,0.625
8,2544.0,2024-03-03,30.333333,8.666667,4.666667,3.333333,37.333333,1.666667,0.573,0.461,0.571333
9,2544.0,2024-03-10,26.333333,10.0,8.0,3.333333,34.333333,8.333333,0.583333,0.361,0.859333


Loaded data: (62777, 18)
Weekly aggregation complete: (21411, 11)


Unnamed: 0,personid,gamedate,points,assists,reboundstotal,turnovers,numminutes,plusminuspoints,fieldgoalspercentage,threepointerspercentage,freethrowspercentage
0,2544.0,2024-01-07,23.0,7.666667,6.333333,3.333333,38.0,-7.666667,0.456667,0.259,0.422333
1,2544.0,2024-01-14,16.0,10.5,5.0,3.5,29.5,-12.5,0.372,0.2,0.619
2,2544.0,2024-01-21,25.5,6.0,7.75,3.75,33.75,9.25,0.50875,0.426,0.675
3,2544.0,2024-01-28,30.5,12.0,12.0,4.0,41.0,4.0,0.543,0.625,1.0
4,2544.0,2024-02-04,22.333333,7.666667,6.666667,1.666667,37.0,-2.0,0.512667,0.377667,0.606333
5,2544.0,2024-02-11,24.0,9.333333,5.666667,2.666667,37.0,3.666667,0.485667,0.444333,0.847333
6,2544.0,2024-02-18,25.0,8.0,1.0,4.0,30.0,9.0,0.6,0.5,1.0
7,2544.0,2024-02-25,29.0,10.5,7.0,2.5,35.0,6.5,0.6115,0.5835,0.625
8,2544.0,2024-03-03,30.333333,8.666667,4.666667,3.333333,37.333333,1.666667,0.573,0.461,0.571333
9,2544.0,2024-03-10,26.333333,10.0,8.0,3.333333,34.333333,8.333333,0.583333,0.361,0.859333


In [36]:
pd.set_option('display.max_rows', 1000) 
display(weekly)


Unnamed: 0,personid,gamedate,points,assists,reboundstotal,turnovers,numminutes,plusminuspoints,fieldgoalspercentage,threepointerspercentage,freethrowspercentage
0,2544.0,2024-01-07,23.000000,7.666667,6.333333,3.333333,38.000,-7.666667,0.456667,0.259000,0.422333
1,2544.0,2024-01-14,16.000000,10.500000,5.000000,3.500000,29.500,-12.500000,0.372000,0.200000,0.619000
2,2544.0,2024-01-21,25.500000,6.000000,7.750000,3.750000,33.750,9.250000,0.508750,0.426000,0.675000
3,2544.0,2024-01-28,30.500000,12.000000,12.000000,4.000000,41.000,4.000000,0.543000,0.625000,1.000000
4,2544.0,2024-02-04,22.333333,7.666667,6.666667,1.666667,37.000,-2.000000,0.512667,0.377667,0.606333
...,...,...,...,...,...,...,...,...,...,...,...
21406,1642530.0,2025-03-16,0.000000,0.000000,0.000000,0.000000,,0.000000,0.000000,0.000000,0.000000
21407,1642530.0,2025-03-23,0.000000,0.000000,0.000000,0.000000,,0.000000,0.000000,0.000000,0.000000
21408,1642530.0,2025-03-30,0.000000,0.000000,0.000000,0.000000,,0.000000,0.000000,0.000000,0.000000
21409,1642530.0,2025-04-06,0.000000,0.000000,0.000000,0.000000,,0.000000,0.000000,0.000000,0.000000
