In [1]:
import pandas as pd
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

def get_nba_db():
    """
    Creates a connection to the NBA PostgreSQL database.
    """
    host = os.getenv("NBA_DB_HOST")
    port = os.getenv("NBA_DB_PORT")
    database = os.getenv("NBA_DB_NAME")
    user = os.getenv("NBA_DB_USER")
    password = os.getenv("NBA_DB_PASSWORD")
    
    connection_string = f"postgresql://{user}:{password}@{host}:{port}/{database}"
    return create_engine(connection_string)

def query(sql):
    """
    Executes SQL query against the NBA database and returns results as a pandas DataFrame.
    """
    engine = get_nba_db()
    return pd.read_sql(sql, engine)

# Function to display available tables
def list_tables():
    """Lists all tables available in the NBA database."""
    tables = query("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'")
    return tables['table_name'].tolist()

In [2]:
list_tables()

['common_player_info',
 'draft_combine_stats',
 'draft_history',
 'game',
 'game_info',
 'game_summary',
 'games',
 'inactive_players',
 'leagueschedule24_25',
 'leagueschedule25_26',
 'line_score',
 'officials',
 'other_stats',
 'play_by_play',
 'player',
 'playeroftheweek',
 'players',
 'playerstatistics',
 'team',
 'team_details',
 'team_history',
 'team_info_common',
 'teamhistories',
 'teamstatistics']

In [3]:
query("""
WITH BaseData AS (

SELECT
firstname
,lastname
,CAST(personid AS INT)
,CAST(gameid AS INT)
,CAST(gamedate AS DATE)
,CAST(EXTRACT('week' FROM CAST(gamedate AS DATE)) AS INT) AS week
,CAST(EXTRACT('month' FROM CAST(gamedate AS DATE)) AS INT) AS month
,CAST(EXTRACT('year' FROM CAST(gamedate AS DATE)) AS INT) AS year
,CONCAT(CAST(EXTRACT('week' FROM CAST(gamedate AS DATE)) AS INT) , '-' ,CAST(EXTRACT('year' FROM CAST(gamedate AS DATE)) AS INT)) AS weekyear
,playerteamcity
,playerteamname
,opponentteamcity
,opponentteamname
,gametype
,gamelabel
,gamesublabel
,seriesgamenumber
,CAST(win AS INT)
,CAST(home AS INT)
,numminutes
,CAST(points AS INT)
,CAST(assists AS INT)
,CAST(blocks AS INT)
,CAST(steals AS INT)
,CAST(fieldgoalsmade AS INT)
,CAST(fieldgoalsattempted AS INT)
,fieldgoalspercentage
,CAST(threepointersmade AS INT)
,CAST(threepointersattempted AS INT)
,threepointerspercentage
,CAST(freethrowsmade AS INT)
,CAST(freethrowsattempted AS INT)
,freethrowspercentage
,CAST(reboundsoffensive AS INT)
,CAST(reboundsdefensive AS INT)
,CAST(reboundstotal AS INT)
,CAST(foulspersonal AS INT)
,CAST(turnovers AS INT)
,CAST(plusminuspoints AS INT)

FROM playerstatistics

)

SELECT

firstname
,lastname
,week
,playerteamname
,SUM(numminutes) AS numminutes
,SUM(points) AS points
,SUM(assists) AS assists
,SUM(blocks) AS blocks
,SUM(steals) AS steals
,SUM(fieldgoalsmade) AS fieldgoalsmade
,SUM(fieldgoalsattempted) AS fieldgoalsattempted
,SUM(threepointersmade) AS threepointersmade
,SUM(threepointersattempted) AS threepointersattempted
,SUM(freethrowsmade) AS freethrowsmade
,SUM(freethrowsattempted) AS freethrowsattempted
,SUM(reboundsoffensive) AS reboundsoffensive
,SUM(reboundsdefensive) AS reboundsdefensive
,SUM(reboundstotal) AS reboundstotal
,SUM(foulspersonal) AS foulspersonal
,SUM(turnovers) AS turnovers
,SUM(plusminuspoints) AS plusminuspoints

FROM BaseData
WHERE YEAR = '2025'
GROUP BY firstname, lastname, week, playerteamname
LIMIT 5

""")

Unnamed: 0,firstname,lastname,week,playerteamname,numminutes,points,assists,blocks,steals,fieldgoalsmade,...,threepointersmade,threepointersattempted,freethrowsmade,freethrowsattempted,reboundsoffensive,reboundsdefensive,reboundstotal,foulspersonal,turnovers,plusminuspoints
0,Aaron,Gordon,2,Nuggets,18.22,13,2,1,0,6,...,1,2,0,0,1,5,6,0,1,23
1,Aaron,Gordon,3,Nuggets,60.86,33,3,1,1,10,...,1,7,12,13,2,10,12,4,6,17
2,Aaron,Gordon,4,Nuggets,63.11,35,6,0,2,12,...,6,11,5,6,3,2,5,3,4,6
3,Aaron,Gordon,5,Nuggets,113.82,38,10,1,2,16,...,2,8,4,4,6,8,14,6,5,24
4,Aaron,Gordon,6,Nuggets,77.06,27,23,2,1,7,...,3,8,10,13,4,13,17,4,1,40


In [14]:
#!/usr/bin/env python3
"""
NBA Player of the Week Scraper - Improved Version

This script scrapes the NBA Player of the Week data from basketball.realgm.com
with improved table detection methods.
"""

import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
from datetime import datetime
import time
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

def get_nba_db():
    """
    Creates a connection to the NBA PostgreSQL database.
    """
    host = os.getenv("NBA_DB_HOST")
    port = os.getenv("NBA_DB_PORT")
    database = os.getenv("NBA_DB_NAME")
    user = os.getenv("NBA_DB_USER")
    password = os.getenv("NBA_DB_PASSWORD")
    
    connection_string = f"postgresql://{user}:{password}@{host}:{port}/{database}"
    return create_engine(connection_string)

# Create SQL engine
engine = get_nba_db()

def scrape_nba_potw():
    """
    Scrape the NBA Player of the Week data from basketball.realgm.com
    
    Returns:
        pandas.DataFrame: DataFrame containing the scraped data
    """
    # URL of the page to scrape
    url = "https://basketball.realgm.com/nba/awards/by-type/player-of-the-week/30"
    
    # Send a request to the website
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    
    print(f"Fetching data from {url}...")
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching the webpage: {e}")
        return None
    
    # Print status code for debugging
    print(f"Response status code: {response.status_code}")
    
    # Save HTML to a file for inspection if needed
    with open("webpage.html", "w", encoding="utf-8") as f:
        f.write(response.text)
    print("Saved HTML to webpage.html for inspection")
    
    # Parse the HTML content
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Try multiple methods to find the table
    # Method 1: By class
    table = soup.find('table', class_='tablesaw')
    
    # Method 2: Look for any table that contains the expected headers
    if not table:
        print("Trying to find table by headers...")
        for tbl in soup.find_all('table'):
            headers = [th.text.strip() for th in tbl.find_all('th')]
            # Check if this table has the expected headers
            if 'Player' in headers and 'Season' in headers and 'Date' in headers:
                table = tbl
                print("Found table by headers!")
                break
    
    # Check if we found a table
    if not table:
        # List all tables on the page for debugging
        all_tables = soup.find_all('table')
        print(f"Could not find the Player of the Week table. Found {len(all_tables)} tables on the page.")
        
        # Save info about all tables for debugging
        if all_tables:
            print("Tables found on the page:")
            for i, tbl in enumerate(all_tables):
                headers = [th.text.strip() for th in tbl.find_all('th')]
                rows = len(tbl.find_all('tr'))
                print(f"Table {i+1}: {rows} rows, Headers: {headers[:5]}{'...' if len(headers) > 5 else ''}")
                
                # If this table looks promising, try to use it
                if rows > 5 and len(headers) > 5:
                    print(f"Table {i+1} looks promising. Attempting to use it.")
                    table = tbl
                    break
        
        # If we still couldn't find a suitable table
        if not table:
            print("Could not find a suitable table. Please inspect the HTML manually.")
            return None
    
    # Extract the column headers
    headers = []
    header_row = table.find('tr')
    if header_row:
        for th in header_row.find_all(['th', 'td']):  # Some tables use td for headers
            header = th.text.strip()
            headers.append(header)
    
    # If headers are empty or don't look right, try another approach
    if not headers or 'Player' not in headers:
        print("Headers don't look right. Trying another approach...")
        rows = table.find_all('tr')
        if len(rows) > 1:
            # Try using the first row as headers
            header_cells = rows[0].find_all(['th', 'td'])
            headers = [cell.text.strip() for cell in header_cells]
            rows = rows[1:]  # Skip the header row for data extraction
        else:
            print("Could not extract headers")
            return None
    else:
        # Use normal approach for data rows
        rows = table.find_all('tr')[1:]  # Skip the header row
    
    print(f"Found headers: {headers}")
    
    # Extract the table data
    data = []
    for tr in rows:
        row = []
        for td in tr.find_all(['td', 'th']):  # Some tables might mix th and td
            # Extract the text from the cell
            cell_value = td.text.strip()
            row.append(cell_value)
        
        # Only add rows with enough columns
        if len(row) >= len(headers):
            # Trim to match header count
            row = row[:len(headers)]
            data.append(row)
    
    if not data:
        print("Could not extract any data rows")
        return None
    
    print(f"Extracted {len(data)} rows of data")
    
    # Create a pandas DataFrame from the extracted data
    df = pd.DataFrame(data, columns=headers)
    
    # List of likely column names for each type
    numeric_columns = [
        'Weight', 'Age', 'Draft Yr', 'YOS', 'Year', 'Years', 
        'Points', 'Rebounds', 'Assists', 'Steals', 'Blocks'
    ]
    
    date_columns = ['Date', 'Award Date', 'Week Of']
    
    # Convert numeric columns if they exist
    for col in df.columns:
        # Check if this column name contains any of our numeric column keywords
        if any(nc in col for nc in numeric_columns):
            df[col] = pd.to_numeric(df[col], errors='coerce')
    
    # Convert date columns if they exist
    for col in df.columns:
        if any(dc in col for dc in date_columns):
            try:
                df[col] = pd.to_datetime(df[col], errors='coerce')
            except:
                print(f"Could not convert {col} to datetime")
    
    return df

def save_to_csv(df, filename=None):
    """
    Save the DataFrame to a CSV file
    
    Args:
        df (pandas.DataFrame): DataFrame to save
        filename (str, optional): Name of the output file. Defaults to None.
    """
    if df is None:
        print("No data to save")
        return
    
    if filename is None:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"nba_player_of_the_week_{timestamp}.csv"
    
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

def main():
    print("=== NBA Player of the Week Scraper - Improved Version ===")
    
    # Add a delay to avoid being blocked
    print("Waiting a few seconds before scraping...")
    time.sleep(2)
    
    # Scrape the data
    df = scrape_nba_potw()
    
    if df is not None:
        # Display the first few rows of the DataFrame
        print("\nFirst few rows of the data:")
        print(df.head())
        
        # Display basic information about the DataFrame
        print("\nDataFrame information:")
        print(f"Shape: {df.shape}")
        print(f"Columns: {df.columns.tolist()}")
        
        # Save the data to a CSV file
        #save_to_csv(df)

        # Save data to Postgres DB table
        df.to_sql(
        name='playeroftheweek',  # Name of the target table in the database
        con=engine,             # The SQLAlchemy engine
        if_exists='replace',     # How to handle existing tables: 'fail', 'replace', or 'append'
        index=False             # Whether to write the DataFrame index as a column
        )
    else:
        print("Failed to scrape the data.")
        print("\nTroubleshooting tips:")
        print("1. Open the saved webpage.html file in a browser")
        print("2. Use browser developer tools (F12) to inspect the table")
        print("3. Look for the table's class, id, or other identifying attributes")
        print("4. Update the script with the correct selectors")

if __name__ == "__main__":
    main()

=== NBA Player of the Week Scraper - Improved Version ===
Waiting a few seconds before scraping...
Fetching data from https://basketball.realgm.com/nba/awards/by-type/player-of-the-week/30...
Response status code: 200
Saved HTML to webpage.html for inspection
Trying to find table by headers...
Found table by headers!
Found headers: ['Season', 'Player', 'Conference', 'Date', 'Team', 'Pos', 'Height', 'Weight', 'Age', 'Pre-Draft Team', 'Draft Yr', 'YOS']
Extracted 1562 rows of data

First few rows of the data:
      Season                 Player Conference       Date  \
0  2025-2026  Giannis Antetokounmpo       East 2025-10-27   
1  2025-2026      Victor Wembanyama       West 2025-10-27   
2  2024-2025  Giannis Antetokounmpo       East 2025-04-14   
3  2024-2025           James Harden       West 2025-04-14   
4  2024-2025  Giannis Antetokounmpo       East 2025-04-07   

                   Team Pos Height  Weight  Age  \
0       Milwaukee Bucks   F   6-11     243   31   
1     San Antonio 

In [7]:
pd.read_csv('nba_player_of_the_week_20251101_192256.csv')

Unnamed: 0,Season,Player,Conference,Date,Team,Pos,Height,Weight,Age,Pre-Draft Team,Draft Yr,YOS
0,2025-2026,Giannis Antetokounmpo,East,2025-10-27,Milwaukee Bucks,F,6-11,243,31,Filathlitikos Div II Greece (Greece),2013,12
1,2025-2026,Victor Wembanyama,West,2025-10-27,San Antonio Spurs,F,7-4,235,22,Boulogne-Levallois (France),2023,2
2,2024-2025,Giannis Antetokounmpo,East,2025-04-14,Milwaukee Bucks,F,6-11,243,30,Filathlitikos Div II Greece (Greece),2013,11
3,2024-2025,James Harden,West,2025-04-14,Los Angeles Clippers,SG,6-5,220,35,Arizona State,2009,15
4,2024-2025,Giannis Antetokounmpo,East,2025-04-07,Milwaukee Bucks,F,6-11,243,30,Filathlitikos Div II Greece (Greece),2013,11
...,...,...,...,...,...,...,...,...,...,...,...,...
1557,1979-1980,Phil Ford,,1979-11-18,Kansas City Kings,G,6-2,175,24,North Carolina,1978,1
1558,1979-1980,Magic Johnson,,1979-11-11,Los Angeles Lakers,PG,6-9,255,20,Michigan State,1979,0
1559,1979-1980,Marques Johnson,,1979-11-04,Milwaukee Bucks,GF,6-7,218,24,UCLA,1977,2
1560,1979-1980,Micheal Ray Richardson,,1979-10-28,New York Knicks,PG,6-5,189,24,Montana,1978,1


In [18]:
query("""
SELECT firstname, lastname, COUNT(personid)
FROM playerstatistics
GROUP BY firstname, lastname
HAVING COUNT(personid) > 1
--ORDER BY COUNT(personid) DESC

""")

Unnamed: 0,firstname,lastname,count
0,Awvee,Storey,169
1,Milt,Williams,68
2,Marco,Belinelli,1102
3,James,Palmer Jr.,2
4,Michael,Devoe,2
...,...,...,...
5307,Anthony,Bonner,341
5308,Ahmad,Caver,8
5309,Cuonzo,Martin,14
5310,Mel,Riebe,32


In [3]:
"""
NBA Player of the Week Scraper - Improved Version

This script scrapes the NBA Player of the Week data from basketball.realgm.com
with improved table detection methods.
"""

import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
from datetime import datetime
import time
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

def get_nba_db():
    """
    Creates a connection to the NBA PostgreSQL database.
    """
    host = os.getenv("NBA_DB_HOST")
    port = os.getenv("NBA_DB_PORT")
    database = os.getenv("NBA_DB_NAME")
    user = os.getenv("NBA_DB_USER")
    password = os.getenv("NBA_DB_PASSWORD")
    
    connection_string = f"postgresql://{user}:{password}@{host}:{port}/{database}"
    return create_engine(connection_string)

# Create SQL engine
engine = get_nba_db()

def scrape_nba_potw():
    """
    Scrape the NBA Player of the Week data from basketball.realgm.com
    
    Returns:
        pandas.DataFrame: DataFrame containing the scraped data
    """
    # URL of the page to scrape
    url = "https://basketball.realgm.com/nba/awards/by-type/Player-of-the-Week/30"
    # Send a request to the website
    # headers = {
    #     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    # }
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.9",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
        "Referer": "https://basketball.realgm.com/",
        "Upgrade-Insecure-Requests": "1"
    }
    
    print(f"Fetching data from {url}...")
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching the webpage: {e}")
        return None
    
    # Print status code for debugging
    print(f"Response status code: {response.status_code}")
    
    # Save HTML to a file for inspection if needed
    with open("webpage.html", "w", encoding="utf-8") as f:
        f.write(response.text)
    print("Saved HTML to webpage.html for inspection")
    
    # Parse the HTML content
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Try multiple methods to find the table
    # Method 1: By class
    table = soup.find('table', class_='tablesaw')
    
    # Method 2: Look for any table that contains the expected headers
    if not table:
        print("Trying to find table by headers...")
        for tbl in soup.find_all('table'):
            headers = [th.text.strip() for th in tbl.find_all('th')]
            # Check if this table has the expected headers
            if 'Player' in headers and 'Season' in headers and 'Date' in headers:
                table = tbl
                print("Found table by headers!")
                break
    
    # Check if we found a table
    if not table:
        # List all tables on the page for debugging
        all_tables = soup.find_all('table')
        print(f"Could not find the Player of the Week table. Found {len(all_tables)} tables on the page.")
        
        # Save info about all tables for debugging
        if all_tables:
            print("Tables found on the page:")
            for i, tbl in enumerate(all_tables):
                headers = [th.text.strip() for th in tbl.find_all('th')]
                rows = len(tbl.find_all('tr'))
                print(f"Table {i+1}: {rows} rows, Headers: {headers[:5]}{'...' if len(headers) > 5 else ''}")
                
                # If this table looks promising, try to use it
                if rows > 5 and len(headers) > 5:
                    print(f"Table {i+1} looks promising. Attempting to use it.")
                    table = tbl
                    break
        
        # If we still couldn't find a suitable table
        if not table:
            print("Could not find a suitable table. Please inspect the HTML manually.")
            return None
    
    # Extract the column headers
    headers = []
    header_row = table.find('tr')
    if header_row:
        for th in header_row.find_all(['th', 'td']):  # Some tables use td for headers
            header = th.text.strip()
            headers.append(header)
    
    # If headers are empty or don't look right, try another approach
    if not headers or 'Player' not in headers:
        print("Headers don't look right. Trying another approach...")
        rows = table.find_all('tr')
        if len(rows) > 1:
            # Try using the first row as headers
            header_cells = rows[0].find_all(['th', 'td'])
            headers = [cell.text.strip() for cell in header_cells]
            rows = rows[1:]  # Skip the header row for data extraction
        else:
            print("Could not extract headers")
            return None
    else:
        # Use normal approach for data rows
        rows = table.find_all('tr')[1:]  # Skip the header row
    
    print(f"Found headers: {headers}")
    
    # Extract the table data
    data = []
    for tr in rows:
        row = []
        for td in tr.find_all(['td', 'th']):  # Some tables might mix th and td
            # Extract the text from the cell
            cell_value = td.text.strip()
            row.append(cell_value)
        
        # Only add rows with enough columns
        if len(row) >= len(headers):
            # Trim to match header count
            row = row[:len(headers)]
            data.append(row)
    
    if not data:
        print("Could not extract any data rows")
        return None
    
    print(f"Extracted {len(data)} rows of data")
    
    # Create a pandas DataFrame from the extracted data
    df = pd.DataFrame(data, columns=headers)
    
    # List of likely column names for each type
    numeric_columns = [
        'Weight', 'Age', 'Draft Yr', 'YOS', 'Year', 'Years', 
        'Points', 'Rebounds', 'Assists', 'Steals', 'Blocks'
    ]
    
    date_columns = ['Date', 'Award Date', 'Week Of']
    
    # Convert numeric columns if they exist
    for col in df.columns:
        # Check if this column name contains any of our numeric column keywords
        if any(nc in col for nc in numeric_columns):
            df[col] = pd.to_numeric(df[col], errors='coerce')
    
    # Convert date columns if they exist
    for col in df.columns:
        if any(dc in col for dc in date_columns):
            try:
                df[col] = pd.to_datetime(df[col], errors='coerce')
            except:
                print(f"Could not convert {col} to datetime")
    
    return df

def save_to_csv(df, filename=None):
    """
    Save the DataFrame to a CSV file
    
    Args:
        df (pandas.DataFrame): DataFrame to save
        filename (str, optional): Name of the output file. Defaults to None.
    """
    if df is None:
        print("No data to save")
        return
    
    if filename is None:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"nba_player_of_the_week_{timestamp}.csv"
    
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

def main():
    print("=== NBA Player of the Week Scraper - Improved Version ===")
    
    # Add a delay to avoid being blocked
    print("Waiting a few seconds before scraping...")
    time.sleep(2)
    
    # Scrape the data
    df = scrape_nba_potw()
    
    if df is not None:
        # Display the first few rows of the DataFrame
        print("\nFirst few rows of the data:")
        print(df.head())
        
        # Display basic information about the DataFrame
        print("\nDataFrame information:")
        print(f"Shape: {df.shape}")
        print(f"Columns: {df.columns.tolist()}")
        
        # Save the data to a CSV file
        #save_to_csv(df)

        # Save data to Postgres DB table
        df.to_sql(
        name='playeroftheweek',  # Name of the target table in the database
        con=engine,             # The SQLAlchemy engine
        if_exists='replace',     # How to handle existing tables: 'fail', 'replace', or 'append'
        index=False             # Whether to write the DataFrame index as a column
        )
    else:
        print("Failed to scrape the data.")
        print("\nTroubleshooting tips:")
        print("1. Open the saved webpage.html file in a browser")
        print("2. Use browser developer tools (F12) to inspect the table")
        print("3. Look for the table's class, id, or other identifying attributes")
        print("4. Update the script with the correct selectors")

if __name__ == "__main__":
    main()

=== NBA Player of the Week Scraper - Improved Version ===
Waiting a few seconds before scraping...
Fetching data from https://basketball.realgm.com/nba/awards/by-type/Player-of-the-Week/30...
Error fetching the webpage: 403 Client Error: Forbidden for url: https://basketball.realgm.com/nba/awards/by-type/Player-of-the-Week/30
Failed to scrape the data.

Troubleshooting tips:
1. Open the saved webpage.html file in a browser
2. Use browser developer tools (F12) to inspect the table
3. Look for the table's class, id, or other identifying attributes
4. Update the script with the correct selectors


In [4]:
!pip install nba_api

Collecting nba_api
  Downloading nba_api-1.10.2-py3-none-any.whl.metadata (5.8 kB)
Downloading nba_api-1.10.2-py3-none-any.whl (286 kB)
Installing collected packages: nba_api
Successfully installed nba_api-1.10.2

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


In [5]:
#!/usr/bin/env python3
"""
NBA Players of the Week Extractor

This script extracts all NBA Players of the Week from the NBA API.
It uses the nba_api package to:
1. Get all NBA players
2. Extract player awards for each player
3. Filter to show only Player of the Week awards
4. Output results to a CSV file
"""

import pandas as pd
import time
from nba_api.stats.static import players
from nba_api.stats.endpoints import playerawards

def get_all_players():
    """
    Get all NBA players from the API.
    Returns:
        List of player dictionaries containing IDs and names.
    """
    print("Retrieving all NBA players...")
    return players.get_players()

def get_player_awards(player_id):
    """
    Get awards for a specific player.
    
    Args:
        player_id (str): The ID of the player
        
    Returns:
        DataFrame: Player awards data
    """
    try:
        # Add delay to avoid rate limiting
        time.sleep(0.5)
        awards = playerawards.PlayerAwards(player_id=player_id).get_data_frames()[0]
        return awards
    except Exception as e:
        print(f"Error retrieving awards for player ID {player_id}: {e}")
        return pd.DataFrame()

def extract_players_of_week():
    """
    Extract all NBA Players of the Week.
    
    Returns:
        DataFrame: All Players of the Week data
    """
    all_players = get_all_players()
    total_players = len(all_players)
    
    print(f"Total players to process: {total_players}")
    
    # Initialize empty DataFrame to store all Player of the Week awards
    all_pow_awards = pd.DataFrame()
    
    # Process each player
    for i, player in enumerate(all_players):
        # Print progress every 100 players
        if i % 100 == 0:
            print(f"Processing player {i}/{total_players}: {player['full_name']}")
        
        # Get player awards
        awards = get_player_awards(player['id'])
        
        # If player has awards, filter for Player of the Week
        if not awards.empty:
            # Check for any Player of the Week awards
            # The DESCRIPTION column contains the award name
            pow_awards = awards[awards['DESCRIPTION'].str.contains('Player of the Week', case=False, na=False)]
            
            # If player has Player of the Week awards, add to our collection
            if not pow_awards.empty:
                all_pow_awards = pd.concat([all_pow_awards, pow_awards], ignore_index=True)
    
    return all_pow_awards

def main():
    """Main function to run the script."""
    print("Starting extraction of NBA Players of the Week...")
    
    # Extract all Players of the Week
    pow_data = extract_players_of_week()
    
    # Save to CSV
    output_file = "nba_players_of_week.csv"
    pow_data.to_csv(output_file, index=False)
    
    print(f"Extraction complete. Found {len(pow_data)} Player of the Week awards.")
    print(f"Data saved to {output_file}")
    
    # Display summary of results
    print("\nSummary of Player of the Week awards by season:")
    season_counts = pow_data['SEASON'].value_counts().sort_index()
    print(season_counts)
    
    # Display a sample of the data
    print("\nSample data:")
    print(pow_data[['SEASON', 'FIRST_NAME', 'LAST_NAME', 'TEAM', 'CONFERENCE']].head(10))

if __name__ == "__main__":
    main()

Starting extraction of NBA Players of the Week...
Retrieving all NBA players...
Total players to process: 5135
Processing player 0/5135: Alaa Abdelnaby
Error retrieving awards for player ID 76001: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)


KeyboardInterrupt: 

In [15]:
import wget
wget.download('https://storage.googleapis.com/nba_award_predictor/nba_data/playerstatistics.csv')

'playerstatistics.csv'

In [18]:
pd.read_csv('playerstatistics.csv')

  pd.read_csv('playerstatistics.csv')


Unnamed: 0,firstName,lastName,personId,gameId,gameDate,playerteamCity,playerteamName,opponentteamCity,opponentteamName,gameType,...,threePointersPercentage,freeThrowsAttempted,freeThrowsMade,freeThrowsPercentage,reboundsDefensive,reboundsOffensive,reboundsTotal,foulsPersonal,turnovers,plusMinusPoints
0,Chris,Paul,101108,22500170,2025-11-04T23:00:00Z,LA,Clippers,Oklahoma City,Thunder,,...,0.000,0.0,0.0,0.0,1.0,0.0,1.0,2.0,1.0,-24.0
1,Kris,Dunn,1627739,22500170,2025-11-04T23:00:00Z,LA,Clippers,Oklahoma City,Thunder,,...,0.333,2.0,1.0,0.5,5.0,0.0,5.0,2.0,3.0,-24.0
2,Ivica,Zubac,1627826,22500170,2025-11-04T23:00:00Z,LA,Clippers,Oklahoma City,Thunder,,...,0.000,2.0,1.0,0.5,3.0,4.0,7.0,2.0,2.0,7.0
3,Derrick,Jones Jr.,1627884,22500170,2025-11-04T23:00:00Z,LA,Clippers,Oklahoma City,Thunder,,...,0.429,3.0,3.0,1.0,2.0,2.0,4.0,2.0,1.0,1.0
4,Alex,Caruso,1627936,22500170,2025-11-04T23:00:00Z,Oklahoma City,Thunder,LA,Clippers,,...,0.400,0.0,0.0,0.0,1.0,0.0,1.0,2.0,0.0,22.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1632904,Fred,Sheffield,78131,24600052,1946-11-26 19:00:00,Philadelphia,Warriors,Boston,Celtics,Regular Season,...,0.000,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1632905,Connie,Simmons,78153,24600052,1946-11-26 19:00:00,Boston,Celtics,Philadelphia,Warriors,Regular Season,...,0.000,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1632906,Johnny,Simmons,78154,24600052,1946-11-26 19:00:00,Boston,Celtics,Philadelphia,Warriors,Regular Season,...,0.000,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1632907,Virgil,Vaughn,78411,24600052,1946-11-26 19:00:00,Boston,Celtics,Philadelphia,Warriors,Regular Season,...,0.000,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
