In [1]:
import requests
import lxml
from typing import Optional
import cloudscraper
import pandas as pd
from lxml.html import HTMLParser
import re
import requests
from io import StringIO
from lxml import html
import numpy as np

In [2]:
import pandas as pd
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Creates a connection to the NBA PostgreSQL database
def get_nba_db():
    host = os.getenv("NBA_DB_HOST")
    port = os.getenv("NBA_DB_PORT")
    database = os.getenv("NBA_DB_NAME")
    user = os.getenv("NBA_DB_USER")
    password = os.getenv("NBA_DB_PASSWORD")
    
    connection_string = f"postgresql://{user}:{password}@{host}:{port}/{database}"
    return create_engine(connection_string)

# Executes SQL query against the NBA database and returns results as a pandas DataFrame.
def query(sql):
    engine = get_nba_db()
    return pd.read_sql(sql, engine)

# Function to display available tables in database
def list_tables():
    tables = query("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'")
    return tables['table_name'].tolist()

def get_csv_df(csv_name):
    url = f"https://storage.googleapis.com/nba_award_predictor/nba_data/{csv_name}"

    r = requests.get(url)
    r.raise_for_status()

    csv_text = r.content.decode('utf-8')
    df = pd.read_csv(StringIO(csv_text))
    return df

In [3]:
def strip_or_null(text: Optional[str]) -> Optional[str]:
    if text is not None:
        return text.strip() or None
    
MOJI = re.compile(r"[ÃÂÄÅ]")  # markers that tell us mojibake happened
    
def fix_name(name: str) -> str:
    if not isinstance(name, str):
        return name
    s = name.strip()
    # Try standard mojibake repair
    if MOJI.search(s):
        try:
            s = s.encode("latin1").decode("utf-8")
        except Exception:
            pass  # fall through to dangling fix
    # Fix dangling lead byte "Ä" at the end (lost second byte \x87 => ć)
    if s.endswith("Ä"):
        s = s[:-1] + "ć"
    return s

def fix_mojibake(s):
    try:
        return s.encode('latin1').decode('utf8')
    except:
        return s

def extract_weekly_winners(doc):
    def get_names(p, conf=None):
        if conf:
            conf_path = f'[contains(@data-desc, "{conf}")]'
        else:
            conf_path = ""
        names = [strip_or_null(t) for t in p.xpath(f".//a{conf_path}/text()") if strip_or_null(t)]
        return names
    
    def create_rows(out, season, month, date, names, c):
        tie = 0 if len(names) == 1 else 1
        for n in names:
            out.append({
                        "season": season,
                        "month": month,
                        "date": date,
                        "conference": c,
                        "player": n,
                        "tie": tie
                        })

    out = []
    # Each season block
    for group in doc.xpath("//div[@class='data_grid_group']"):
        season = strip_or_null(group.xpath("string(.//h3[1])"))
        if not season:
            continue
        # Each month box within the season group
        for box in group.xpath(".//div[contains(@class,'data_grid_box')]"):
            month = strip_or_null(box.xpath("string(.//div[@class='gridtitle'])"))
            if not month:
                continue
            for p in box.xpath(".//div/p[.//strong] | .//p[.//strong]"):
                date = strip_or_null(p.xpath("string(.//strong[1])"))
                if not date:
                    continue
                
                e_names = get_names(p, 'Eastern')
                if e_names:
                    create_rows(out, season, month, date, e_names, 'E')
                
                w_names = get_names(p, 'Western')
                if w_names:
                    create_rows(out, season, month, date, w_names, 'W')
                
                if not e_names and not w_names:
                    names = get_names(p)
                    create_rows(out, season, month, date, names, 'A')
    return out

MONTH_MAP = {
            "January": 1,
            "February": 2,
            "March": 3,
            "April": 4,
            "May": 5,
            "June": 6,
            "July": 7,
            "August": 8,
            "September": 9,
            "October": 10,
            "November": 11,
            "December": 12
           }
def get_player_of_week_df():
    def extract_year(row):
        if 6 < row['month'] <= 12:
            return int(row['season'][:4])
        else:
            return int(row['season'][:4]) + 1
    
    URL = "https://www.basketball-reference.com/awards/pow.html"
    scraper = cloudscraper.create_scraper(browser={'custom': 'Chrome/124'})  # mimics a browser
    r = scraper.get(URL, timeout=20)
    parser = HTMLParser(encoding="utf-8")
    doc = html.fromstring(r.content, parser=parser)

    out = extract_weekly_winners(doc)
    pow_df = pd.DataFrame(out)
    pow_df['month'] = pow_df['month'].apply(lambda x: MONTH_MAP[x])
    pow_df['day'] = pow_df['date'].str.split().str[-1].astype(int)
    pow_df['year'] = pow_df.apply(extract_year, axis=1)
    pow_df['firstname'] = pow_df['player'].str.split().str[0]
    pow_df['lastname'] = pow_df['player'].str.split().str[-1]
    pow_df["date"] = pd.to_datetime(pow_df[["year", "month", "day"]])
    pow_df["week"] = pow_df["date"].dt.isocalendar().week

    return pow_df[['season', 'date', 'year', 'month', 'week', 'day', 'conference', 'player', 'firstname', 'lastname', 'tie']]

import numpy as np
import pandas as pd

def build_team_games(df, filter=None):
    df = df.copy()

    # Parse datetime with mixed formats
    df['gamedate'] = pd.to_datetime(
        df['gamedate'],
        utc=True,
        errors='coerce',
        format='mixed',
        dayfirst=True,
    )

    if filter:
        df = df.query(filter)

    # Normalize: make two rows per game (home + away)
    home = df.rename(columns={
        'hometeamname': 'team',
        'hometeamid':   'teamid',
        'homescore':    'team_score',
        'awayscore':    'opp_score'
    })[['gameid','gamedate','team','teamid','team_score','opp_score','winner',
        'awayteamname','awayteamid']].assign(
            home = 1,
            opponent  = lambda x: x['awayteamname'],
            opponentid= lambda x: x['awayteamid'],
        ).drop(columns=['awayteamname','awayteamid'])

    away = df.rename(columns={
        'awayteamname': 'team',
        'awayteamid':   'teamid',
        'awayscore':    'team_score',
        'homescore':    'opp_score'
    })[['gameid','gamedate','team','teamid','team_score','opp_score','winner',
        'hometeamname','hometeamid']].assign(
            home = 0,
            opponent  = lambda x: x['hometeamname'],
            opponentid= lambda x: x['hometeamid'],
        ).drop(columns=['hometeamname','hometeamid'])

    long = pd.concat([home, away], ignore_index=True)

    # Outcome flags
    long['is_win']  = (long['teamid'] == long['winner']).astype(int)
    long['outcome'] = long['is_win'].map({1: 'win', 0: 'loss'})
    long['is_home_win'] = ((long['home'] == 1) & (long['is_win'] == 1)).astype(int)
    long['is_away_win'] = ((long['home'] == 0) & (long['is_win'] == 1)).astype(int)

    # Season (season starts in July)
    year = long['gamedate'].dt.year
    month = long['gamedate'].dt.month
    long['season'] = np.where(month >= 7, year, year - 1)

    # Sort for rolling calculations
    long = long.sort_values(['teamid', 'season', 'gamedate', 'gameid'], kind='mergesort')

    # Group by team + season for all season-based stats
    g = long.groupby(['teamid', 'season'], group_keys=False)

    # Games played prior (per season)
    long['games_prior'] = g.cumcount()

    # Wins / losses prior (per season)
    long['wins_prior'] = g['is_win'].transform(
        lambda s: s.shift().fillna(0).cumsum().astype(int)
    )
    long['losses_prior'] = long['games_prior'] - long['wins_prior']

    # Home / away games prior (per season)
    long['home_games_prior'] = g['home'].transform(
        lambda s: s.shift(fill_value=0).cumsum()
    )
    long['away_games_prior'] = long['games_prior'] - long['home_games_prior']

    # Home / away wins prior (per season)
    long['home_wins_prior'] = g['is_home_win'].transform(
        lambda s: s.shift().fillna(0).cumsum().astype(int)
    )
    long['away_wins_prior'] = g['is_away_win'].transform(
        lambda s: s.shift().fillna(0).cumsum().astype(int)
    )

    # Home / away losses prior (per season)
    long['home_losses_prior'] = long['home_games_prior'] - long['home_wins_prior']
    long['away_losses_prior'] = long['away_games_prior'] - long['away_wins_prior']

    # Win streaks prior (per season)
    def streak_prior(series):
        prior = series.shift().fillna(0).astype(int)
        # reset when we see a 0
        return prior.groupby((prior == 0).cumsum()).cumsum().astype(int)

    long['win_streak_prior'] = g['is_win'].transform(streak_prior)
    long['home_win_streak_prior'] = g['is_home_win'].transform(streak_prior)
    long['away_win_streak_prior'] = g['is_away_win'].transform(streak_prior)

    # Record strings (per season)
    long['record_prior'] = long['wins_prior'].astype(str) + '-' + long['losses_prior'].astype(str)
    long['home_record_prior'] = long['home_wins_prior'].astype(str) + '-' + long['home_losses_prior'].astype(str)
    long['away_record_prior'] = long['away_wins_prior'].astype(str) + '-' + long['away_losses_prior'].astype(str)

    # Opponent's prior record for same game (still season-based because wins_prior/losses_prior are)
    opp_prior = long[['gameid','teamid','wins_prior','losses_prior']].rename(
        columns={
            'teamid': 'opponentid',
            'wins_prior': 'opp_wins_prior',
            'losses_prior': 'opp_losses_prior'
        }
    )

    long = long.merge(opp_prior, on=['gameid','opponentid'], how='left')

    # Opponent winrate prior to the game
    opp_games_prior = long['opp_wins_prior'] + long['opp_losses_prior']
    long['opp_winrate_prior'] = long['opp_wins_prior'] / opp_games_prior.where(opp_games_prior > 0)

    # Flag for wins vs > .500 opponent (per game)
    long['is_win_vs_over_500'] = (
        (long['is_win'] == 1) &
        (long['opp_winrate_prior'] > 0.5)
    ).astype(int)

    # Per-team, per-season cumulative PRIOR wins vs > .500
    long['wins_vs_over_500_prior'] = (
        long.groupby(['teamid', 'season'])['is_win_vs_over_500']
            .transform(lambda s: s.shift().fillna(0).cumsum().astype(int))
    )

    # Week-based (Mon–Sun) record PRIOR, per-season
    iso = long['gamedate'].dt.isocalendar()
    long['week_year'] = iso['year']
    long['week_num']  = iso['week']

    gw = long.groupby(['teamid','season','week_year','week_num'], group_keys=False)

    long['week_games_prior'] = gw.cumcount()
    long['week_wins_prior'] = gw['is_win'].transform(
        lambda s: s.shift().fillna(0).cumsum().astype(int)
    )
    long['week_losses_prior'] = long['week_games_prior'] - long['week_wins_prior']

    long['week_record_prior'] = (
        long['week_wins_prior'].astype(str)
        + '-' +
        long['week_losses_prior'].astype(str)
    )

    # Final output
    out = long[['gameid','gamedate',
                'team','teamid',
                'opponent','opponentid',
                'outcome','home',
                'team_score','opp_score',
                'games_prior','wins_prior','losses_prior','record_prior',
                'home_games_prior','home_wins_prior','home_losses_prior','home_record_prior',
                'away_games_prior','away_wins_prior','away_losses_prior','away_record_prior',
                'win_streak_prior','home_win_streak_prior','away_win_streak_prior',
                'opp_wins_prior','opp_losses_prior','opp_winrate_prior',
                'is_win_vs_over_500','wins_vs_over_500_prior',
                'week_games_prior','week_wins_prior','week_losses_prior','week_record_prior',
                'season'
               ]]
    out.loc[:, 'gameid'] = out['gameid'].astype(int)
    out = out.rename(columns={'gameid': 'gameId'})

    return out.sort_values(['gamedate','gameId','home']).reset_index(drop=True)

SPECIAL_LASTNAMES = {
                    #"Dončić": "Doncic",
                    "Şengün": "Sengun",
                    #"Porziņģis": "Porzingis",
                    #"Jokić": "Jokic",
                    #"Vučević": "Vucevic",
                    "Dragić": "Dragic",
                    "Peković": "Pekovic",
                    "Bogdanović": "Bogdanovic",
                    #"Schröder": "Schroder",
                    "Vásquez": "Vasquez",
                    "İlyasova": "Ilyasova",
                    "Ginóbili": "Ginobili",
                    "Türkoğlu": "Turkoglu",
                    "Stojaković": "Stojakovic",
                    "Petrović": "Petrovic",
                    "Peace": "World Peace",
                    }

SPECIAL_FIRSTNAMES = {
                     "Dražen": "Drazen",
                     "Fat": "Lafayette",
                     "J.R.": "JR",
                     "Cliff": "Clifford",
                     "Hot": "John",
                     "Tiny": "Nate",
                     }

def remove_special_names(row):
    if row['lastname'] == "Jr.":
        row['lastname'] = row['player'].split()[-2]
    if row['player'] == "Steve Smith":
        row['firstname'] = "Steven"
    if row['player'] == 'Billy Ray Bates':
        row['firstname'] = "Billyray"
    if row['player'] == "Joe Barry Carroll":
        row['firstname'] = "Joe Barry"
    if row['player'] == "Michael Ray Richardson":
        row['firstname'] = "Micheal Ray"
    if row['player'] == "Cliff Robinson":
        row['firstname'] = "Clifford"
    if row['player'] == "Jimmy Butler":
        row['lastname'] = "Butler III"
    row['firstname'] = SPECIAL_FIRSTNAMES.get(row['firstname'], row['firstname'])
    row['lastname'] = SPECIAL_LASTNAMES.get(row['lastname'], row['lastname'])
    row['player'] = row['firstname'] + " " + row['lastname']
    return row

def compute_player_wins(df):
    df = df.copy()

    df['player_played'] = df['player_id'].notna().astype(int)

    df['player_win'] = ((df['player_played'] == 1) & (df['outcome'] == 'win')).astype(int)

    g_season = df.groupby(['player_id', 'season'], dropna=False)

    df['player_games_season_prior'] = g_season['player_played'].transform(
    lambda s: s.shift().fillna(0).cumsum())

    df['player_wins_season_prior'] = g_season['player_win'].transform(
    lambda s: s.shift().fillna(0).cumsum())

    g_week = df.groupby(['player_id', 'season', 'week'], dropna=False)

    df['player_games_week_prior'] = g_week['player_played'].transform(
    lambda s: s.shift().fillna(0).cumsum())

    df['player_wins_week_prior'] = g_week['player_win'].transform(
        lambda s: s.shift().fillna(0).cumsum())
    
    return df

In [4]:
game = query("""SELECT * FROM games""")
"""
game['year'] = game['gamedate'].apply(lambda x: int(x[:4]))
l2 = len(game)
game = game[game['year'] >= 1979]
l1 = len(game)
"""
game = build_team_games(game, "gamedate.dt.year >= 1979")
#print(f"Original row count: {l1} \nNew row count: {len(game)} \nCorrect output count: {l1 * 2 == len(game)}")

In [5]:
game.columns

Index(['gameId', 'gamedate', 'team', 'teamid', 'opponent', 'opponentid',
       'outcome', 'home', 'team_score', 'opp_score', 'games_prior',
       'wins_prior', 'losses_prior', 'record_prior', 'home_games_prior',
       'home_wins_prior', 'home_losses_prior', 'home_record_prior',
       'away_games_prior', 'away_wins_prior', 'away_losses_prior',
       'away_record_prior', 'win_streak_prior', 'home_win_streak_prior',
       'away_win_streak_prior', 'opp_wins_prior', 'opp_losses_prior',
       'opp_winrate_prior', 'is_win_vs_over_500', 'wins_vs_over_500_prior',
       'week_games_prior', 'week_wins_prior', 'week_losses_prior',
       'week_record_prior', 'season'],
      dtype='object')

In [6]:
game.isnull().sum()

gameId                       0
gamedate                     0
team                         0
teamid                       0
opponent                     0
opponentid                   0
outcome                      0
home                         0
team_score                   0
opp_score                    0
games_prior                  0
wins_prior                   0
losses_prior                 0
record_prior                 0
home_games_prior             0
home_wins_prior              0
home_losses_prior            0
home_record_prior            0
away_games_prior             0
away_wins_prior              0
away_losses_prior            0
away_record_prior            0
win_streak_prior             0
home_win_streak_prior        0
away_win_streak_prior        0
opp_wins_prior               0
opp_losses_prior             0
opp_winrate_prior         1340
is_win_vs_over_500           0
wins_vs_over_500_prior       0
week_games_prior             0
week_wins_prior              0
week_los

In [233]:
pow_df = get_player_of_week_df()
pow_df = pow_df.apply(remove_special_names, axis=1)

In [None]:
pow_3 = get_player_of_week_df()
pow_3 = pow_3.apply(remove_special_names, axis=1)

In [None]:
pow_3.head()

Unnamed: 0,season,date,year,month,week,day,conference,player,firstname,lastname,tie,player_id,player_name,_merge
0,2025-26,2025-10-26,2025,10,43,26,E,Giannis Antetokounmpo,Giannis,Antetokounmpo,0,203507,Giannis Antetokounmpo,both
1,2025-26,2025-10-26,2025,10,43,26,W,Victor Wembanyama,Victor,Wembanyama,0,1641705,Victor Wembanyama,both
2,2025-26,2025-11-02,2025,11,44,2,E,Tyrese Maxey,Tyrese,Maxey,0,1630178,Tyrese Maxey,both
3,2025-26,2025-11-02,2025,11,44,2,W,Shai Gilgeous-Alexander,Shai,Gilgeous-Alexander,0,1628983,Shai Gilgeous-Alexander,both
4,2025-26,2025-11-09,2025,11,45,9,E,Cade Cunningham,Cade,Cunningham,0,1630595,Cade Cunningham,both


In [212]:
player_id_df = get_csv_df('nba_player_lookup.csv')

In [235]:
l1 = len(pow_df)
id_df = player_id_df[['player_id', 'player_name']]
pow_df = pd.merge(pow_df, id_df, left_on="player", right_on="player_name", how="left", indicator=True)
print(f"Successfully added player IDs: {l1 == len(pow_df)}")

Successfully added player IDs: False


In [236]:
pow_df[pow_df['_merge'] != "both"]

Unnamed: 0,season,date,year,month,week,day,conference,player,firstname,lastname,tie,player_id,player_name,_merge


In [237]:
unique = pow_df.groupby("player")['player_id'].nunique()
unique[unique > 1]

player
Eddie Johnson    2
Glen Rice        2
Larry Johnson    2
Mike Dunleavy    2
Mike James       2
Patrick Ewing    2
Steven Smith     2
Name: player_id, dtype: int64

In [218]:
player_statistics_df = get_csv_df("player-statistics.csv")

  df = pd.read_csv(StringIO(csv_text))


In [296]:
player_statistics_df.columns

Index(['firstName', 'lastName', 'full_name', 'player_id', 'gameId', 'gameDate',
       'playerteamCity', 'playerteamName', 'opponentteamCity',
       'opponentteamName', 'gameType', 'gameLabel', 'gameSubLabel',
       'seriesGameNumber', 'win', 'home', 'numMinutes', 'points', 'assists',
       'blocks', 'steals', 'fieldGoalsAttempted', 'fieldGoalsMade',
       'fieldGoalsPercentage', 'threePointersAttempted', 'threePointersMade',
       'threePointersPercentage', 'freeThrowsAttempted', 'freeThrowsMade',
       'freeThrowsPercentage', 'reboundsDefensive', 'reboundsOffensive',
       'reboundsTotal', 'foulsPersonal', 'turnovers', 'plusMinusPoints'],
      dtype='object')

In [301]:
player_unique_count = pow_df['player'].nunique()
select_player_stats_df = player_statistics_df[player_statistics_df['numMinutes'] > 0][['player_id', 'gameId', 'gameDate', 'playerteamName']]
select_pow_df = pow_df[['season', 'date', 'year', 'month', 'week', 'day', 'conference', 'player', 'firstname', 'lastname', 'tie', 'player_id']]
pow_games_df = pd.merge(select_pow_df, select_player_stats_df, on='player_id')
print(f"Successfully merged player of week and game stats: {player_unique_count == pow_games_df['player'].nunique()}")


Successfully merged player of week and game stats: True


In [302]:
pow_games_df['gameDate'] = pd.to_datetime(
        pow_games_df['gameDate'],
        utc=True,
        errors='coerce',
        format='mixed',
        dayfirst=True,
    )
pow_games_df['gameYear'] = pow_games_df['gameDate'].dt.year
pow_games_df['gameWeek'] = pow_games_df['gameDate'].dt.isocalendar().week
pow_games_df = pow_games_df.rename(columns={'playerteamName': 'team'})


In [303]:
pow_games_df.isnull().sum()

season        0
date          0
year          0
month         0
week          0
day           0
conference    0
player        0
firstname     0
lastname      0
tie           0
player_id     0
gameId        0
gameDate      0
team          0
gameYear      0
gameWeek      0
dtype: int64

In [304]:
pow_games_df.head()

Unnamed: 0,season,date,year,month,week,day,conference,player,firstname,lastname,tie,player_id,gameId,gameDate,team,gameYear,gameWeek
0,2025-26,2025-10-26,2025,10,43,26,E,Giannis Antetokounmpo,Giannis,Antetokounmpo,0,203507,22500203,2025-11-10 20:30:00+00:00,Bucks,2025,46
1,2025-26,2025-10-26,2025,10,43,26,E,Giannis Antetokounmpo,Giannis,Antetokounmpo,0,203507,22500191,2025-11-09 15:30:00+00:00,Bucks,2025,45
2,2025-26,2025-10-26,2025,10,43,26,E,Giannis Antetokounmpo,Giannis,Antetokounmpo,0,203507,22500035,2025-11-07 20:00:00+00:00,Bucks,2025,45
3,2025-26,2025-10-26,2025,10,43,26,E,Giannis Antetokounmpo,Giannis,Antetokounmpo,0,203507,22500165,2025-11-04 19:30:00+00:00,Bucks,2025,45
4,2025-26,2025-10-26,2025,10,43,26,E,Giannis Antetokounmpo,Giannis,Antetokounmpo,0,203507,22500157,2025-11-03 19:00:00+00:00,Bucks,2025,45


In [305]:
game_pow = pow_games_df[(pow_games_df['week'] == pow_games_df['gameWeek']) & (pow_games_df['year'] == pow_games_df['gameYear'])]

In [306]:
game_pow.head()

Unnamed: 0,season,date,year,month,week,day,conference,player,firstname,lastname,tie,player_id,gameId,gameDate,team,gameYear,gameWeek
7,2025-26,2025-10-26,2025,10,43,26,E,Giannis Antetokounmpo,Giannis,Antetokounmpo,0,203507,22500107,2025-10-26 18:00:00+00:00,Bucks,2025,43
8,2025-26,2025-10-26,2025,10,43,26,E,Giannis Antetokounmpo,Giannis,Antetokounmpo,0,203507,22500092,2025-10-24 18:30:00+00:00,Bucks,2025,43
9,2025-26,2025-10-26,2025,10,43,26,E,Giannis Antetokounmpo,Giannis,Antetokounmpo,0,203507,22500086,2025-10-22 20:00:00+00:00,Bucks,2025,43
1007,2025-26,2025-10-26,2025,10,43,26,W,Victor Wembanyama,Victor,Wembanyama,0,1641705,22500105,2025-10-26 14:00:00+00:00,Spurs,2025,43
1008,2025-26,2025-10-26,2025,10,43,26,W,Victor Wembanyama,Victor,Wembanyama,0,1641705,22500095,2025-10-24 20:00:00+00:00,Spurs,2025,43


In [None]:
game = game.drop(columns=['season'])
game_pow = game_pow.drop(columns=['gameYear', 'gameWeek', 'gameDate'])
combined_df = pd.merge(game, game_pow, on=["gameId", 'team'], how="left")

In [309]:
combined_df

Unnamed: 0,gameId,gamedate,team,teamid,opponent,opponentid,outcome,home,team_score,opp_score,...,year,month,week,day,conference,player,firstname,lastname,tie,player_id
0,27800397,1979-01-01 20:00:00+00:00,Warriors,1610612744,SuperSonics,1610612760,win,0,110,97,...,,,,,,,,,,
1,27800397,1979-01-01 20:00:00+00:00,SuperSonics,1610612760,Warriors,1610612744,loss,1,97,110,...,,,,,,,,,,
2,27800398,1979-01-02 20:00:00+00:00,Nuggets,1610612743,Kings,1610612758,win,0,99,97,...,,,,,,,,,,
3,27800398,1979-01-02 20:00:00+00:00,Kings,1610612758,Nuggets,1610612743,loss,1,97,99,...,,,,,,,,,,
4,27800399,1979-01-02 20:00:00+00:00,Bulls,1610612741,Bullets,1610612764,loss,0,86,109,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115564,22500222,2025-11-12 21:30:00+00:00,Thunder,1610612760,Lakers,1610612747,win,1,121,92,...,,,,,,,,,,
115565,22500223,2025-11-12 22:00:00+00:00,Hawks,1610612737,Kings,1610612758,win,0,133,100,...,,,,,,,,,,
115566,22500223,2025-11-12 22:00:00+00:00,Kings,1610612758,Hawks,1610612737,loss,1,100,133,...,,,,,,,,,,
115567,22500224,2025-11-12 22:30:00+00:00,Nuggets,1610612743,Clippers,1610612746,win,0,130,116,...,,,,,,,,,,


In [310]:
combined_df.columns

Index(['gameId', 'gamedate', 'team', 'teamid', 'opponent', 'opponentid',
       'outcome', 'home', 'team_score', 'opp_score', 'games_prior',
       'wins_prior', 'losses_prior', 'record_prior', 'home_games_prior',
       'home_wins_prior', 'home_losses_prior', 'home_record_prior',
       'away_games_prior', 'away_wins_prior', 'away_losses_prior',
       'away_record_prior', 'win_streak_prior', 'home_win_streak_prior',
       'away_win_streak_prior', 'opp_wins_prior', 'opp_losses_prior',
       'opp_winrate_prior', 'is_win_vs_over_500', 'wins_vs_over_500_prior',
       'week_games_prior', 'week_wins_prior', 'week_losses_prior',
       'week_record_prior', 'season', 'date', 'year', 'month', 'week', 'day',
       'conference', 'player', 'firstname', 'lastname', 'tie', 'player_id'],
      dtype='object')

In [315]:
df = compute_player_wins(combined_df)

In [316]:
df[df['player_played'] != 0]

Unnamed: 0,gameId,gamedate,team,teamid,opponent,opponentid,outcome,home,team_score,opp_score,...,firstname,lastname,tie,player_id,player_played,player_win,player_games_season_prior,player_wins_season_prior,player_games_week_prior,player_wins_week_prior
1188,27900035,1979-10-17 20:00:00+00:00,76ers,1610612755,Nets,1610612751,win,0,113,88,...,Julius,Erving,0.0,76681.0,1,1,0.0,0.0,0.0,0.0
1213,27900047,1979-10-19 20:00:00+00:00,76ers,1610612755,Pistons,1610612765,win,1,112,104,...,Julius,Erving,0.0,76681.0,1,1,1.0,1.0,1.0,1.0
1224,27900053,1979-10-20 20:00:00+00:00,76ers,1610612755,Knicks,1610612752,win,0,136,111,...,Julius,Erving,0.0,76681.0,1,1,2.0,2.0,2.0,2.0
1247,27900064,1979-10-23 20:00:00+00:00,Knicks,1610612752,Pacers,1610612754,win,1,136,112,...,Micheal Ray,Richardson,0.0,77952.0,1,1,0.0,0.0,0.0,0.0
1276,27900079,1979-10-26 20:00:00+00:00,Knicks,1610612752,76ers,1610612755,loss,0,116,127,...,Micheal Ray,Richardson,0.0,77952.0,1,0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115454,22500178,2025-11-05 21:00:00+00:00,Nuggets,1610612743,Heat,1610612748,win,1,122,112,...,Nikola,Jokić,0.0,203999.0,1,1,1.0,1.0,1.0,1.0
115469,22500031,2025-11-07 19:30:00+00:00,Pistons,1610612765,Nets,1610612751,win,0,125,107,...,Cade,Cunningham,0.0,1630595.0,1,1,2.0,2.0,2.0,2.0
115482,22500037,2025-11-07 22:00:00+00:00,Nuggets,1610612743,Warriors,1610612744,win,1,129,104,...,Nikola,Jokić,0.0,203999.0,1,1,2.0,2.0,2.0,2.0
115498,22500189,2025-11-08 21:00:00+00:00,Nuggets,1610612743,Pacers,1610612754,win,1,117,100,...,Nikola,Jokić,0.0,203999.0,1,1,3.0,3.0,3.0,3.0


In [321]:
df.columns

Index(['gameId', 'gamedate', 'team', 'teamid', 'opponent', 'opponentid',
       'outcome', 'home', 'team_score', 'opp_score', 'games_prior',
       'wins_prior', 'losses_prior', 'record_prior', 'home_games_prior',
       'home_wins_prior', 'home_losses_prior', 'home_record_prior',
       'away_games_prior', 'away_wins_prior', 'away_losses_prior',
       'away_record_prior', 'win_streak_prior', 'home_win_streak_prior',
       'away_win_streak_prior', 'opp_wins_prior', 'opp_losses_prior',
       'opp_winrate_prior', 'is_win_vs_over_500', 'wins_vs_over_500_prior',
       'week_games_prior', 'week_wins_prior', 'week_losses_prior',
       'week_record_prior', 'season', 'date', 'year', 'month', 'week', 'day',
       'conference', 'player', 'firstname', 'lastname', 'tie', 'player_id',
       'player_played', 'player_win', 'player_games_season_prior',
       'player_wins_season_prior', 'player_games_week_prior',
       'player_wins_week_prior'],
      dtype='object')

In [322]:
df.head()

Unnamed: 0,gameId,gamedate,team,teamid,opponent,opponentid,outcome,home,team_score,opp_score,...,firstname,lastname,tie,player_id,player_played,player_win,player_games_season_prior,player_wins_season_prior,player_games_week_prior,player_wins_week_prior
0,27800397,1979-01-01 20:00:00+00:00,Warriors,1610612744,SuperSonics,1610612760,win,0,110,97,...,,,,,0,0,0.0,0.0,0.0,0.0
1,27800397,1979-01-01 20:00:00+00:00,SuperSonics,1610612760,Warriors,1610612744,loss,1,97,110,...,,,,,0,0,0.0,0.0,0.0,0.0
2,27800398,1979-01-02 20:00:00+00:00,Nuggets,1610612743,Kings,1610612758,win,0,99,97,...,,,,,0,0,0.0,0.0,0.0,0.0
3,27800398,1979-01-02 20:00:00+00:00,Kings,1610612758,Nuggets,1610612743,loss,1,97,99,...,,,,,0,0,0.0,0.0,0.0,0.0
4,27800399,1979-01-02 20:00:00+00:00,Bulls,1610612741,Bullets,1610612764,loss,0,86,109,...,,,,,0,0,0.0,0.0,0.0,0.0
