In [3]:
import pandas as pd
import sqlite3
import os
from nba_api.stats.endpoints import commonplayerinfo
from time import sleep

In [4]:
DB_PATH = os.path.abspath(os.path.join(os.getcwd(), '..', 'BALL.db'))
conn = sqlite3.connect(DB_PATH)

player_ids = pd.read_sql(
    "SELECT id FROM players",
    conn
)

conn.close()

player_ids_list = player_ids["id"].tolist()

In [None]:
def get_player_birthdate(player_id: int) -> str | None:
    try:
        response = commonplayerinfo.CommonPlayerInfo(
            player_id=player_id,
            timeout=30
        )

        df = response.get_data_frames()[0]
        birthdate_raw = df.loc[0, "BIRTHDATE"]

        # THe format is "YYYY-MM-DDTHH:MM:SS"
        # We dont need the time part
        return birthdate_raw.split("T")[0]

    except Exception as e:
        print(f"[ERROR] player_id={player_id}: {e}")
        return None


In [19]:
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()

cur.execute("""
    SELECT id
    FROM players
    WHERE birthdate IS NULL
""")

missing_ids = [row[0] for row in cur.fetchall()]
conn.close()

In [22]:
len(missing_ids)

3821

In [None]:
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()

for i, player_id in enumerate(missing_ids, start=1):
    birthdate = get_player_birthdate(player_id)

    if birthdate:
        cursor.execute(
            """
            UPDATE players
            SET birthdate = ?
            WHERE id = ?
            """,
            (birthdate , player_id)
        )
        conn.commit()

    sleep(0.3)

    if i % 10 == 0:
        print(f"[INFO] processed {i}/{len(missing_ids)}")

conn.close()