In [1]:
from nba_api.stats.endpoints import SynergyPlayTypes, LeagueDashPtStats, CommonPlayerInfo, CommonTeamYears , CommonTeamRoster, CommonAllPlayers
from supabase import create_client, Client
import pandas as pd
import numpy as np
from dotenv import load_dotenv
import os
import time

In [17]:
load_dotenv("../.env.local")

SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")

In [18]:
supabase = create_client(SUPABASE_URL, SUPABASE_KEY)

In [4]:
# Current NBA season
season = "2025-26"

print(f"Pulling players for season {season}")
response = CommonAllPlayers(
    is_only_current_season=1,   # Only active players
    league_id="00",
    season=season
)

df = response.get_data_frames()[0]

# Filter: only currently rostered players
df = df[df["ROSTERSTATUS"] == 1]

print(f"Collected {len(df)} active players for {season}")
df

Pulling players for season 2025-26
Collected 524 active players for 2025-26


Unnamed: 0,PERSON_ID,DISPLAY_LAST_COMMA_FIRST,DISPLAY_FIRST_LAST,ROSTERSTATUS,FROM_YEAR,TO_YEAR,PLAYERCODE,PLAYER_SLUG,TEAM_ID,TEAM_CITY,TEAM_NAME,TEAM_ABBREVIATION,TEAM_SLUG,TEAM_CODE,GAMES_PLAYED_FLAG,OTHERLEAGUE_EXPERIENCE_CH
0,1630173,"Achiuwa, Precious",Precious Achiuwa,1,2020,2025,precious_achiuwa,precious_achiuwa,1610612758,Sacramento,Kings,SAC,kings,kings,Y,00
1,203500,"Adams, Steven",Steven Adams,1,2013,2025,steven_adams,steven_adams,1610612745,Houston,Rockets,HOU,rockets,rockets,Y,00
2,1628389,"Adebayo, Bam",Bam Adebayo,1,2017,2025,bam_adebayo,bam_adebayo,1610612748,Miami,Heat,MIA,heat,heat,Y,00
3,1630534,"Agbaji, Ochai",Ochai Agbaji,1,2022,2025,ochai_agbaji,ochai_agbaji,1610612761,Toronto,Raptors,TOR,raptors,raptors,Y,00
4,1630583,"Aldama, Santi",Santi Aldama,1,2021,2025,santi_aldama,santi_aldama,1610612763,Memphis,Grizzlies,MEM,grizzlies,grizzlies,Y,01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
541,1642911,"Zikarsky, Rocco",Rocco Zikarsky,1,2025,2025,rocco_zikarsky,rocco_zikarsky,1610612750,Minnesota,Timberwolves,MIN,timberwolves,timberwolves,Y,11
542,1627826,"Zubac, Ivica",Ivica Zubac,1,2016,2025,ivica_zubac,ivica_zubac,1610612746,LA,Clippers,LAC,clippers,clippers,Y,01
543,1641783,"da Silva, Tristan",Tristan da Silva,1,2024,2025,tristan_da_silva,tristan_da_silva,1610612753,Orlando,Magic,ORL,magic,magic,Y,00
544,1642365,"Đurišić, Nikola",Nikola Đurišić,1,2025,2025,nikola_durisic,nikola_đurišić,1610612737,Atlanta,Hawks,ATL,hawks,hawks,Y,11


In [None]:
# Create a list of unique player IDs
player_ids = df["PERSON_ID"].tolist()

uploaded_players = pd.read_csv("players_rows.csv")

unuploaded_player_ids = df.loc[
    ~df["PERSON_ID"].isin(uploaded_players["player_id"]),
    "PERSON_ID"
].tolist()

print(unuploaded_player_ids)



[1630828, 1642846, 1642964, 1642926, 1642866, 1642886, 1641780, 1642868, 1642928, 1630619, 1642383, 1642363, 1631451, 1642907, 1643018, 1642885, 1630621, 1642856, 1642845, 1642400, 1642855, 1642847, 1642843, 1642853, 1642066, 1642884, 1642864, 1642844, 1642935, 1642857, 1642883, 1643052, 1642848, 1642938, 1642880, 1642357, 1641750, 1642939, 1642851, 1642404, 1642502, 1631126, 1642863, 1642918, 1642942, 1643024, 1642862, 1642867, 1642948, 1642854, 1642949, 1642950, 1642877, 1642869, 1643007, 1642962, 1642878, 1642852, 1642875, 1642954, 1642859, 1642860, 1642920, 1642879, 1642917, 1642914, 1642850, 1642876, 1642910, 1642849, 1642364, 1642873, 1642874, 1642905, 1642959, 1642911, 1642365]


In [11]:

detailed_players = []
failed_ids = []
consecutive_failures = 0

for pid in unuploaded_player_ids:
    success = False  # track if we succeed within 3 tries

    for attempt in range(3):
        try:
            info = CommonPlayerInfo(league_id_nullable="00", player_id=pid).get_data_frames()[0]

            row = {
                "player_id": str(info.loc[0, "PERSON_ID"]),
                "player_name": info.loc[0, "DISPLAY_FIRST_LAST"],
                "position": info.loc[0, "POSITION"],
                "height": info.loc[0, "HEIGHT"],
                "weight": info.loc[0, "WEIGHT"],
                "school": info.loc[0, "SCHOOL"],
                "birthdate": info.loc[0, "BIRTHDATE"],
                "draft_year": info.loc[0, "DRAFT_YEAR"],
                "draft_round": info.loc[0, "DRAFT_ROUND"],
                "draft_number": info.loc[0, "DRAFT_NUMBER"]
            }

            detailed_players.append(row)
            success = True
            print(pid)
            consecutive_failures = 0  # reset streak on success
            break  # exit retry loop on success

        except Exception as e:
            print(f"Attempt {attempt + 1} failed for {pid}: {e}")
            time.sleep(30 + attempt)

    if not success:
        failed_ids.append(pid)
        consecutive_failures += 1

    if consecutive_failures >= 2:
        print("\n Two players in a row failed completely. Exiting loop early.")
        break

    time.sleep(5)  # base delay between each player fetch

# Convert to DataFrame
player_info_df = pd.DataFrame(detailed_players)

print(player_info_df)

1630828
1642846
1642964
1642926
1642866
1642886
1641780
1642868
1642928
1630619
1642383
1642363
1631451
1642907
1643018
1642885
1630621
1642856
1642845
1642400
1642855
1642847
1642843
1642853
1642066
1642884
1642864
1642844
1642935
1642857
1642883
1643052
1642848
1642938
1642880
1642357
1641750
1642939
1642851
1642404
1642502
1631126
1642863
1642918
1642942
1643024
1642862
1642867
1642948
1642854
1642949
1642950
1642877
1642869
1643007
1642962
1642878
1642852
1642875
1642954
1642859
1642860
1642920
1642879
1642917
1642914
1642850
1642876
1642910
1642849
1642364
1642873
1642874
1642905
1642959
1642911
1642365
   player_id         player_name position height weight  \
0    1630828  Alex Antetokounmpo  Forward    6-8    214   
1    1642846          Ace Bailey  Forward    6-9    200   
2    1642964    Brooks Barnhizer    Guard    6-5    230   
3    1642926         Tamar Bates    Guard    6-4    195   
4    1642866       Joan Beringer  Forward   6-11    230   
..       ...                 .

In [13]:
pd.DataFrame(
    unuploaded_player_ids,
    columns=["PERSON_ID"]
).to_csv("player_ids.csv", index=False)
player_info_df.to_csv("player_info_full.csv", index=False)

In [16]:
player_info_df.to_csv("player_info_full.csv", index=False)

In [19]:
player_info_df["player_id"] = pd.to_numeric(player_info_df["player_id"], errors="coerce")
player_info_df = player_info_df.dropna(subset=["player_id"])
player_info_df["player_id"] = player_info_df["player_id"].astype(int)

player_info_df = player_info_df.drop_duplicates(subset=["player_id"])

# helps avoid  input syntax
player_info_df = player_info_df.replace({np.nan: None})

records = player_info_df.to_dict(orient="records")

chunk_size = 50
for i in range(0, len(records), chunk_size):
    chunk = records[i:i + chunk_size]
    try:
        supabase.table("players").insert(chunk).execute()
        print(f"Inserted chunk {i // chunk_size + 1}")
        time.sleep(0.3)  # 1s is fine too; 0.3 usually works
    except Exception as e:
        print(f"Error inserting chunk {i // chunk_size + 1}: {e}")

Inserted chunk 1
Inserted chunk 2


In [21]:
nba_teams = CommonTeamYears(league_id='00').get_data_frames()[0]


active_nba_teams = nba_teams[nba_teams["MAX_YEAR"] == '2025']


team_ids = active_nba_teams["TEAM_ID"].tolist()


team_rosters = []
consecutive_failures = 0


    
for team_id in team_ids:

    success = False
    
    for attempt in range(3):
        try:
            info = CommonTeamRoster(team_id=team_id, season=season).get_data_frames()[0]
            
            for _, row in info.iterrows():
                player = {
                    "season": season,
                    "team_id": team_id,
                    "player_id": str(row["PLAYER_ID"]),
                    "player_name": row["PLAYER"],
                    "position": row["POSITION"],
                    "age": row["AGE"],
                    "experience": row["EXP"]
                }
                team_rosters.append(player)
            
            success = True
            consecutive_failures = 0  # reset failure streak
            print(team_id)
            break  # success, break retry loop

        except Exception as e:
            print(f"Attempt {attempt + 1} failed for team {team_id} in {season}: {e}")
            time.sleep(30 + attempt)

    if not success:
        consecutive_failures += 1
        if consecutive_failures >= 2:
            print("Two consecutive team failures — exiting loop early.")
            break
    
    time.sleep(5) 



# Convert to DataFrame
team_roster_df = pd.DataFrame(team_rosters)
print(f"Finished with {len(team_roster_df)} player-team-season records.")
print(team_roster_df)

1610612737
1610612738
1610612739
1610612740
1610612741
1610612742
1610612743
1610612744
1610612745
1610612746
1610612747
1610612748
1610612749
1610612750
1610612751
1610612752
1610612753
1610612754
1610612755
1610612756
1610612757
1610612758
1610612759
1610612760
1610612761
1610612762
1610612763
1610612764
1610612765
1610612766
Finished with 524 player-team-season records.
      season     team_id player_id     player_name position   age experience
0    2025-26  1610612737   1630552   Jalen Johnson        F  24.0          4
1    2025-26  1610612737   1630811  Keaton Wallace        G  26.0          1
2    2025-26  1610612737    203468     CJ McCollum        G  34.0         12
3    2025-26  1610612737   1628379    Luke Kennard        G  29.0          8
4    2025-26  1610612737   1630700   Dyson Daniels        G  22.0          3
..       ...         ...       ...             ...      ...   ...        ...
519  2025-26  1610612766   1630544        Tre Mann        G  24.0          4
520  202

In [22]:
team_roster_df["player_id"] = pd.to_numeric(team_roster_df["player_id"], errors='coerce')
team_roster_df["team_id"] = pd.to_numeric(team_roster_df["team_id"], errors='coerce')
team_roster_df["season"] = team_roster_df["season"].astype(str)
team_roster_df = team_roster_df.drop_duplicates(subset=["season", "team_id", "player_id"])
print(team_roster_df)

      season     team_id  player_id     player_name position   age experience
0    2025-26  1610612737    1630552   Jalen Johnson        F  24.0          4
1    2025-26  1610612737    1630811  Keaton Wallace        G  26.0          1
2    2025-26  1610612737     203468     CJ McCollum        G  34.0         12
3    2025-26  1610612737    1628379    Luke Kennard        G  29.0          8
4    2025-26  1610612737    1630700   Dyson Daniels        G  22.0          3
..       ...         ...        ...             ...      ...   ...        ...
519  2025-26  1610612766    1630544        Tre Mann        G  24.0          4
520  2025-26  1610612766    1641706  Brandon Miller        F  23.0          2
521  2025-26  1610612766    1642354      KJ Simpson        G  23.0          1
522  2025-26  1610612766    1642275  Tidjane Salaün        F  20.0          1
523  2025-26  1610612766    1642862   Liam McNeeley        F  20.0          R

[524 rows x 7 columns]


In [23]:
team_roster_df["experience"] = team_roster_df["experience"].replace("R", 0).astype(int)
team_roster_df = team_roster_df.drop_duplicates()
print(team_roster_df)

      season     team_id  player_id     player_name position   age  experience
0    2025-26  1610612737    1630552   Jalen Johnson        F  24.0           4
1    2025-26  1610612737    1630811  Keaton Wallace        G  26.0           1
2    2025-26  1610612737     203468     CJ McCollum        G  34.0          12
3    2025-26  1610612737    1628379    Luke Kennard        G  29.0           8
4    2025-26  1610612737    1630700   Dyson Daniels        G  22.0           3
..       ...         ...        ...             ...      ...   ...         ...
519  2025-26  1610612766    1630544        Tre Mann        G  24.0           4
520  2025-26  1610612766    1641706  Brandon Miller        F  23.0           2
521  2025-26  1610612766    1642354      KJ Simpson        G  23.0           1
522  2025-26  1610612766    1642275  Tidjane Salaün        F  20.0           1
523  2025-26  1610612766    1642862   Liam McNeeley        F  20.0           0

[524 rows x 7 columns]


In [25]:
team_roster_df.to_csv("team_info_full26.csv", index=False)

In [24]:
# Convert to record dicts
team_records = team_roster_df.to_dict(orient="records")

# Upload in chunks
chunk_size = 1000
for i in range(0, len(team_records), chunk_size):
    chunk = team_records[i:i + chunk_size]
    try:
        supabase.table("teams_players").insert(chunk).execute()
        print(f"Inserted chunk {i // chunk_size + 1}")
        time.sleep(1)
    except Exception as e:
        print(f"Error inserting chunk {i // chunk_size + 1}: {e}")


Inserted chunk 1
