In [2]:
import requests
import pandas as pd
import time
import os

# Constants
BASE_URL = (
    "https://stats.nba.com/stats/leaguedashlineups?"
    "Conference=&DateFrom=&DateTo=&Division=&GameSegment=&"
    "GroupQuantity={lineup_count}&ISTRound=&LastNGames=0&LeagueID=00&"
    "Location=&MeasureType={measure_type}&Month=0&OpponentTeamID=0&Outcome=&PORound=0&"
    "PaceAdjust=N&PerMode=PerGame&Period=0&PlusMinus=N&Rank=N&Season={season}&"
    "SeasonSegment=&SeasonType={season_type}&ShotClockRange=&TeamID=0&VsConference=&VsDivision="
)
HEADERS = {
        "Host": "stats.nba.com",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
        "Accept": "application/json, text/plain, */*",
        "Accept-Language": "en-US,en;q=0.9",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
        "Referer": "https://stats.nba.com/",
        "Origin": "https://stats.nba.com",
        "Sec-Fetch-Dest": "empty",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "same-origin",
    }
SEASON_TYPES = ["Regular Season", "Playoffs"]
LINEUP_COUNTS = [2, 3, 4, 5]
MEASURE_TYPES = ["Advanced", "Scoring", "Opponent", "Misc", "Four%20Factors", "Traditional"]

def season_list(start_year=2007, end_year=None):
    if end_year is None:
        end_year = pd.Timestamp.today().year
    seasons = []
    for y in range(start_year, 2008):
        seasons.append(f"{y}-{str(y+1)[-2:]}")
    return seasons

def fetch_lineup_data(season, season_type, lineup_count, measure_type):
    url = BASE_URL.format(
        season=season,
        season_type=season_type.replace(" ", "+"),
        lineup_count=lineup_count,
        measure_type=measure_type
    )
    resp = requests.get(url, headers=HEADERS, timeout=10)
    resp.raise_for_status()
    data = resp.json()
    headers = data["resultSets"][0]["headers"]
    rows = data["resultSets"][0]["rowSet"]
    df = pd.DataFrame(rows, columns=headers)
    df["SEASON"] = season
    df["SEASON_TYPE"] = season_type
    df["LINEUP_COUNT"] = lineup_count
    df["MEASURE_TYPE"] = measure_type
    return df

def merge_measure_types(measure_dfs):
    """Merge dataframes from different measure types by GROUP_ID, keeping only new columns"""
    if not measure_dfs:
        return pd.DataFrame()
    
    # Start with the first dataframe
    merged_df = measure_dfs[0].copy()
    base_columns = set(merged_df.columns)
    
    # Merge each subsequent dataframe
    for df in measure_dfs[1:]:
        if df.empty:
            continue
            
        # Find columns that don't already exist (except GROUP_ID which is the merge key)
        new_columns = [col for col in df.columns if col not in base_columns or col == 'GROUP_ID']
        df_to_merge = df[new_columns]
        
        # Merge on GROUP_ID
        merged_df = pd.merge(merged_df, df_to_merge, on='GROUP_ID', how='outer')
        
        # Update base columns set
        base_columns.update(df_to_merge.columns)
    
    return merged_df

def save_data_by_year(all_dfs, end_year):
    """Save data organized by year with folder structure"""
    folder_name = str(end_year)
    
    # Create folder if it doesn't exist
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
    
    # Save complete dataset
    final_df = pd.concat(all_dfs, ignore_index=True)
    all_filename = f"all_lineups_{'-'.join(map(str, LINEUP_COUNTS))}.csv"
    final_df.to_csv(os.path.join(folder_name, all_filename), index=False)
    print(f"✅ All data saved to {folder_name}/{all_filename}")
    
    # Group by season and team for individual saves
    for season in final_df['SEASON'].unique():
        season_data = final_df[final_df['SEASON'] == season]
        
        for season_type in SEASON_TYPES:
            season_type_data = season_data[season_data['SEASON_TYPE'] == season_type]
            
            if season_type_data.empty:
                continue
                
            for lineup_count in LINEUP_COUNTS:
                lineup_data = season_type_data[season_type_data['LINEUP_COUNT'] == lineup_count]
                
                if lineup_data.empty:
                    continue
                
                # Group by team
                for team_id in lineup_data['TEAM_ID'].unique():
                    team_data = lineup_data[lineup_data['TEAM_ID'] == team_id]
                    
                    # Create filename
                    suffix = 'ps' if season_type == 'Playoffs' else ''
                    filename = f"{team_id}_{lineup_count}{suffix}.csv"
                    filepath = os.path.join(folder_name, filename)
                    
                    team_data.to_csv(filepath, index=False)
                    print(f"  📁 Saved {filename}")

def main():
    all_dfs = []
    seasons = season_list(2007)
    
    # Get end year from the last season
    end_year = pd.Timestamp.today().year
    
    for season in seasons:
        for season_type in SEASON_TYPES:
            for lineup_count in LINEUP_COUNTS:
                print(f"Fetching: {season} | {season_type} | {lineup_count}-man lineup | All measure types")
                
                # Collect data for all measure types for this combination
                measure_dfs = []
                
                for measure_type in MEASURE_TYPES:
                    try:
                        df = fetch_lineup_data(season, season_type, lineup_count, measure_type)
                        measure_dfs.append(df)
                        time.sleep(0.5)  # shorter sleep between measure types
                        print(f"    ✓ {measure_type}")
                    except Exception as e:
                        print(f"    ✗ Failed for {measure_type}: {e}")
                
                # Merge all measure types for this combination
                if measure_dfs:
                    try:
                        merged_df = merge_measure_types(measure_dfs)
                        if not merged_df.empty:
                            all_dfs.append(merged_df)
                        print(f"    ✅ Merged {len(measure_dfs)} measure types")
                    except Exception as e:
                        print(f"    ✗ Failed to merge measure types: {e}")
                
                time.sleep(1.2)  # be kind to their servers between combinations
    
    # Save all data organized by year
    save_data_by_year(all_dfs, end_year)

if __name__ == "__main__":
    main()

Fetching: 2007-08 | Regular Season | 2-man lineup | All measure types
    ✓ Advanced
    ✓ Scoring
    ✓ Opponent
    ✓ Misc
    ✗ Failed for Four-Factors: 400 Client Error: Bad Request for url: https://stats.nba.com/stats/leaguedashlineups?Conference=&DateFrom=&DateTo=&Division=&GameSegment=&GroupQuantity=2&ISTRound=&LastNGames=0&LeagueID=00&Location=&MeasureType=Four-Factors&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=PerGame&Period=0&PlusMinus=N&Rank=N&Season=2007-08&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&TeamID=0&VsConference=&VsDivision=
    ✗ Failed for Traditional: 400 Client Error: Bad Request for url: https://stats.nba.com/stats/leaguedashlineups?Conference=&DateFrom=&DateTo=&Division=&GameSegment=&GroupQuantity=2&ISTRound=&LastNGames=0&LeagueID=00&Location=&MeasureType=Traditional&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=PerGame&Period=0&PlusMinus=N&Rank=N&Season=2007-08&SeasonSegment=&SeasonType=Regular+Season&Sh


KeyboardInterrupt

