In [4]:
#team stats JSON scraper
import requests
import json
import re
import time

# Base URL template
base_url = "https://www.espn.com/nba/team/stats/_/name/{team_slug}"

# List of teams to iterate through (abbrev → slug)
teams = {
    "bos": "boston-celtics",
    "bkn": "brooklyn-nets",
    "ny": "new-york-knicks",
    "phi": "philadelphia-76ers",
    "tor": "toronto-raptors",
    "chi": "chicago-bulls",
    "cle": "cleveland-cavaliers",
    "det": "detroit-pistons",
    "ind": "indiana-pacers",
    "mil": "milwaukee-bucks",
    "atl": "atlanta-hawks",
    "cha": "charlotte-hornets",
    "mia": "miami-heat",
    "orl": "orlando-magic",
    "wsh": "washington-wizards",
    "den": "denver-nuggets",
    "min": "minnesota-timberwolves",
    "okc": "oklahoma-city-thunder",
    "por": "portland-trail-blazers",
    "utah": "utah-jazz",
    "gs": "golden-state-warriors",
    "lac": "la-clippers",
    "lal": "los-angeles-lakers",
    "phx": "phoenix-suns",
    "sac": "sacramento-kings",
    "dal": "dallas-mavericks",
    "hou": "houston-rockets",
    "mem": "memphis-grizzlies",
    "no": "new-orleans-pelicans",
    "sa": "san-antonio-spurs"
}

# Stat keys
player_stat_keys = [
    "gamesPlayed", "gamesStarted", "avgMinutes", "avgPoints",
    "avgOffensiveRebounds", "avgDefensiveRebounds", "avgRebounds",
    "avgAssists", "avgSteals", "avgBlocks", "avgTurnovers",
    "avgFouls", "assistTurnoverRatio"
]

shooting_stat_keys = [
    "avgFieldGoalsMade", "avgFieldGoalsAttempted", "fieldGoalPct",
    "avgThreePointFieldGoalsMade", "avgThreePointFieldGoalsAttempted", "threePointPct",
    "avgFreeThrowsMade", "avgFreeThrowsAttempted", "freeThrowPct",
    "avgTwoPointFieldGoalsMade", "avgTwoPointFieldGoalsAttempted", "twoPointFieldGoalPct",
    "scoringEfficiency", "shootingEfficiency"
]

# Helper function to extract stats
def extract_stat_block(stats_list, keys):
    return {stat['name']: stat.get('displayValue', '') for stat in stats_list if stat['name'] in keys}

# Master dictionary to store all teams
all_data = {}

# Headers for the request
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0'
}

# Main loop
for team_abbr, slug in teams.items():
    try:
        url = base_url.format(team_slug=f"{team_abbr}/{slug}")
        response = requests.get(url, headers=headers)
        html = response.text

        # Get JSON from embedded script
        match = re.search(r"window\['__espnfitt__'\]\s*=\s*({.*?})\s*;\s*</script>", html, re.DOTALL)
        if not match:
            print(f"Could not find JSON for {team_abbr.upper()}")
            continue
        data = json.loads(match.group(1))

        page_content = data['page']['content']
        team_leaders = page_content['stats']['teamLeaders']['leaders']
        team_stats_blocks = page_content['stats']['teamStats']['team']
        player_stats_groups = page_content['stats']['playerStats']

        # Team leaders
        leaders_data = [{
            'stat': leader['label'],
            'player': leader['athlete']['name'],
            'value': leader['value']
        } for leader in team_leaders]

        # Team totals
        team_totals = {}
        for block in team_stats_blocks:
            if block['title'] == 'Player Stats':
                team_totals.update(extract_stat_block(block['stats'], player_stat_keys))
            elif block['title'] == 'Shooting Stats':
                team_totals.update(extract_stat_block(block['stats'], shooting_stat_keys))

        # Player stats
        players_data = {}
        for group in player_stats_groups:
            for player_entry in group:
                name = player_entry['athlete']['name']
                stat_group = player_entry['statGroups']
                if name not in players_data:
                    players_data[name] = {}

                for stat in stat_group['stats']:
                    if stat['name'] in player_stat_keys + shooting_stat_keys:
                        players_data[name][stat['name']] = stat.get('displayValue', '')

        # Save to final structure
        all_data[team_abbr] = {
            "teamName": slug.replace('-', ' ').title(),
            "teamLeaders": leaders_data,
            "teamTotals": team_totals,
            "playerStats": players_data
        }

        print(f"Scraped: {team_abbr.upper()} - {slug}")
        time.sleep(0.5)  # Be nice to the server

    except Exception as e:
        print(f"Error processing {team_abbr.upper()}: {e}")

# Save to JSON
with open("all_team_stat_data.json", "w") as f:
    json.dump(all_data, f, indent=2)

print("\nAll data saved to all_team_stat_data.json")


✅ Scraped: BOS - boston-celtics
✅ Scraped: BKN - brooklyn-nets
✅ Scraped: NY - new-york-knicks
✅ Scraped: PHI - philadelphia-76ers
✅ Scraped: TOR - toronto-raptors
✅ Scraped: CHI - chicago-bulls
✅ Scraped: CLE - cleveland-cavaliers
✅ Scraped: DET - detroit-pistons
✅ Scraped: IND - indiana-pacers
✅ Scraped: MIL - milwaukee-bucks
✅ Scraped: ATL - atlanta-hawks
✅ Scraped: CHA - charlotte-hornets
✅ Scraped: MIA - miami-heat
✅ Scraped: ORL - orlando-magic
✅ Scraped: WSH - washington-wizards
✅ Scraped: DEN - denver-nuggets
✅ Scraped: MIN - minnesota-timberwolves
✅ Scraped: OKC - oklahoma-city-thunder
✅ Scraped: POR - portland-trail-blazers
✅ Scraped: UTAH - utah-jazz
✅ Scraped: GS - golden-state-warriors
✅ Scraped: LAC - la-clippers
✅ Scraped: LAL - los-angeles-lakers
✅ Scraped: PHX - phoenix-suns
✅ Scraped: SAC - sacramento-kings
✅ Scraped: DAL - dallas-mavericks
✅ Scraped: HOU - houston-rockets
✅ Scraped: MEM - memphis-grizzlies
✅ Scraped: NO - new-orleans-pelicans
✅ Scraped: SA - san-anto

In [2]:
#Roster data scraper
import requests
import re
import json
import time

# Team URL suffixes and team names
teams = {
    "bos/boston-celtics": "Boston Celtics",
    "bkn/brooklyn-nets": "Brooklyn Nets",
    "ny/new-york-knicks": "New York Knicks",
    "phi/philadelphia-76ers": "Philadelphia 76ers",
    "tor/toronto-raptors": "Toronto Raptors",
    "chi/chicago-bulls": "Chicago Bulls",
    "cle/cleveland-cavaliers": "Cleveland Cavaliers",
    "det/detroit-pistons": "Detroit Pistons",
    "ind/indiana-pacers": "Indiana Pacers",
    "mil/milwaukee-bucks": "Milwaukee Bucks",
    "atl/atlanta-hawks": "Atlanta Hawks",
    "cha/charlotte-hornets": "Charlotte Hornets",
    "mia/miami-heat": "Miami Heat",
    "orl/orlando-magic": "Orlando Magic",
    "wsh/washington-wizards": "Washington Wizards",
    "den/denver-nuggets": "Denver Nuggets",
    "min/minnesota-timberwolves": "Minnesota Timberwolves",
    "okc/oklahoma-city-thunder": "Oklahoma City Thunder",
    "por/portland-trail-blazers": "Portland Trail Blazers",
    "utah/utah-jazz": "Utah Jazz",
    "gs/golden-state-warriors": "Golden State Warriors",
    "lac/la-clippers": "LA Clippers",
    "lal/los-angeles-lakers": "Los Angeles Lakers",
    "phx/phoenix-suns": "Phoenix Suns",
    "sac/sacramento-kings": "Sacramento Kings",
    "dal/dallas-mavericks": "Dallas Mavericks",
    "hou/houston-rockets": "Houston Rockets",
    "mem/memphis-grizzlies": "Memphis Grizzlies",
    "no/new-orleans-pelicans": "New Orleans Pelicans",
    "sa/san-antonio-spurs": "San Antonio Spurs"
}

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0',
    'Referer': 'https://www.espn.com/nba/team/roster/_/name/bos/boston-celtics'
}

all_teams_data = {}

for suffix, team_name in teams.items():
    print(f"Scraping: {team_name}")
    url = f"https://www.espn.com/nba/team/roster/_/name/{suffix}"
    
    try:
        response = requests.get(url, headers=headers)
        html = response.text

        match = re.search(r"window\['__espnfitt__'\]\s*=\s*({.*?});\s*</script>", html, re.DOTALL)
        if not match:
            print(f"JSON not found for {team_name}")
            continue

        espn_data = json.loads(match.group(1))
        athletes = espn_data['page']['content']['roster']['athletes']

        team_roster = []
        for player in athletes:
            team_roster.append({
                "name": player.get("name"),
                "id": player.get("id"),
                "height": player.get("height"),
                "weight": player.get("weight"),
                "age": player.get("age"),
                "position": player.get("position"),
                "jersey": player.get("jersey"),
                "salary": player.get("salary"),
                "birthDate": player.get("birthDate"),
                "headshot": player.get("headshot"),
                "experience": player.get("experience"),
                "college": player.get("college"),
            })

        all_teams_data[team_name] = team_roster

        # Be polite to ESPN’s servers
        time.sleep(1)

    except Exception as e:
        print(f"Error with {team_name}: {e}")

# Save to file
with open("team_roster_data.json", "w", encoding="utf-8") as f:
    json.dump(all_teams_data, f, indent=4)

print("All team data saved to team_roster_data.json")


Scraping: Boston Celtics
Scraping: Brooklyn Nets
Scraping: New York Knicks
Scraping: Philadelphia 76ers
Scraping: Toronto Raptors
Scraping: Chicago Bulls
Scraping: Cleveland Cavaliers
Scraping: Detroit Pistons
Scraping: Indiana Pacers
Scraping: Milwaukee Bucks
Scraping: Atlanta Hawks
Scraping: Charlotte Hornets
Scraping: Miami Heat
Scraping: Orlando Magic
Scraping: Washington Wizards
Scraping: Denver Nuggets
Scraping: Minnesota Timberwolves
Scraping: Oklahoma City Thunder
Scraping: Portland Trail Blazers
Scraping: Utah Jazz
Scraping: Golden State Warriors
Scraping: LA Clippers
Scraping: Los Angeles Lakers
Scraping: Phoenix Suns
Scraping: Sacramento Kings
Scraping: Dallas Mavericks
Scraping: Houston Rockets
Scraping: Memphis Grizzlies
Scraping: New Orleans Pelicans
Scraping: San Antonio Spurs
All team data saved to team_roster_data.json


In [1]:
#depth data scraper
import requests
import re
import json
import time

# ESPN team URL suffixes: 'team_abbreviation': 'team-url-slug'
teams = {
    'bos': 'boston-celtics',
    'bkn': 'brooklyn-nets',
    'ny': 'new-york-knicks',
    'phi': 'philadelphia-76ers',
    'tor': 'toronto-raptors',
    'chi': 'chicago-bulls',
    'cle': 'cleveland-cavaliers',
    'det': 'detroit-pistons',
    'ind': 'indiana-pacers',
    'mil': 'milwaukee-bucks',
    'atl': 'atlanta-hawks',
    'cha': 'charlotte-hornets',
    'mia': 'miami-heat',
    'orl': 'orlando-magic',
    'wsh': 'washington-wizards',
    'den': 'denver-nuggets',
    'min': 'minnesota-timberwolves',
    'okc': 'oklahoma-city-thunder',
    'por': 'portland-trail-blazers',
    'utah': 'utah-jazz',
    'gs': 'golden-state-warriors',
    'lac': 'la-clippers',
    'lal': 'los-angeles-lakers',
    'phx': 'phoenix-suns',
    'sac': 'sacramento-kings',
    'dal': 'dallas-mavericks',
    'hou': 'houston-rockets',
    'mem': 'memphis-grizzlies',
    'no': 'new-orleans-pelicans',
    'sa': 'san-antonio-spurs'
}

# Define headers and cookies
cookies = {
    'edition': 'espn-en-us',
    'region': 'ccpa',
    # Your full cookies here
}

headers = {
    'User-Agent': 'Mozilla/5.0',
    'Referer': 'https://www.espn.com/nba/team/stats'
}

# Storage for all teams
all_depth_data = {}

# Scrape each team
for abbr, slug in teams.items():
    url = f'https://www.espn.com/nba/team/depth/_/name/{abbr}/{slug}'
    print(f"Fetching depth chart for: {slug}...")
    
    try:
        response = requests.get(url, cookies=cookies, headers=headers)
        html = response.text

        match = re.search(r"window\['__espnfitt__'\]\s*=\s*({.*?});\s*</script>", html, re.DOTALL)
        if not match:
            print(f"Could not find embedded JSON for {slug}")
            continue

        espn_data = json.loads(match.group(1))
        depth_rows = espn_data['page']['content']['depth']['dethTeamGroups'][0]['rows']

        team_depth_chart = {}
        for row in depth_rows:
            position = row[0]
            players = row[1:]

            team_depth_chart[position] = []
            for idx, player in enumerate(players):
                team_depth_chart[position].append({
                    'depth': idx + 1,
                    'name': player['name'],
                    'injuries': player.get('injuries', [])
                })

        all_depth_data[slug] = team_depth_chart
        time.sleep(1)  # Respectful delay to avoid triggering anti-bot measures

    except Exception as e:
        print(f"Error processing {slug}: {e}")
        continue

# Save to JSON
with open('depth_stat_data.json', 'w', encoding='utf-8') as f:
    json.dump(all_depth_data, f, indent=2, ensure_ascii=False)

print("All data saved to depth_stat_data.json")


Fetching depth chart for: boston-celtics...
Fetching depth chart for: brooklyn-nets...
Fetching depth chart for: new-york-knicks...
Fetching depth chart for: philadelphia-76ers...
Fetching depth chart for: toronto-raptors...
Fetching depth chart for: chicago-bulls...
Fetching depth chart for: cleveland-cavaliers...
Fetching depth chart for: detroit-pistons...
Fetching depth chart for: indiana-pacers...
Fetching depth chart for: milwaukee-bucks...
Fetching depth chart for: atlanta-hawks...
Fetching depth chart for: charlotte-hornets...
Fetching depth chart for: miami-heat...
Fetching depth chart for: orlando-magic...
Fetching depth chart for: washington-wizards...
Fetching depth chart for: denver-nuggets...
Fetching depth chart for: minnesota-timberwolves...
Fetching depth chart for: oklahoma-city-thunder...
Fetching depth chart for: portland-trail-blazers...
Fetching depth chart for: utah-jazz...
Fetching depth chart for: golden-state-warriors...
Fetching depth chart for: la-clippers..

In [4]:
#transaction data scraper
import requests
import re
import json
from collections import defaultdict

teams = {
    'bos': 'boston-celtics',
    'bkn': 'brooklyn-nets',
    'ny': 'new-york-knicks',
    'phi': 'philadelphia-76ers',
    'tor': 'toronto-raptors',
    'chi': 'chicago-bulls',
    'cle': 'cleveland-cavaliers',
    'det': 'detroit-pistons',
    'ind': 'indiana-pacers',
    'mil': 'milwaukee-bucks',
    'atl': 'atlanta-hawks',
    'cha': 'charlotte-hornets',
    'mia': 'miami-heat',
    'orl': 'orlando-magic',
    'wsh': 'washington-wizards',
    'den': 'denver-nuggets',
    'min': 'minnesota-timberwolves',
    'okc': 'oklahoma-city-thunder',
    'por': 'portland-trail-blazers',
    'utah': 'utah-jazz',
    'gs': 'golden-state-warriors',
    'lac': 'la-clippers',
    'lal': 'los-angeles-lakers',
    'phx': 'phoenix-suns',
    'sac': 'sacramento-kings',
    'dal': 'dallas-mavericks',
    'hou': 'houston-rockets',
    'mem': 'memphis-grizzlies',
    'no': 'new-orleans-pelicans',
    'sa': 'san-antonio-spurs'
}

cookies = {
    'edition': 'espn-en-us',
    'region': 'ccpa',
    'SWID': 'E60CC3F9-1369-4ABF-CA12-DE959C8E80F3',
    'usprivacy': '1YNY',
    'country': 'us',
}

headers = {
    'user-agent': 'Mozilla/5.0',
    'accept-language': 'en-US,en;q=0.9',
}

all_team_transactions = defaultdict(list)

for abbr, name in teams.items():
    print(f"Scraping {name.title()}...")
    url = f'https://www.espn.com/nba/team/transactions/_/name/{abbr}/{name}'
    try:
        response = requests.get(url, headers=headers, cookies=cookies, timeout=10)
        match = re.search(r"window\['__espnfitt__'\]\s*=\s*({.*?});\s*</script>", response.text, re.DOTALL)
        if not match:
            print(f"Skipping {name.title()} - No data found")
            continue

        data = json.loads(match.group(1))
        transactions = data['page']['content']['transactions']['transactions']
        for month in transactions:
            for item in transactions[month]:
                all_team_transactions[name.replace('-', ' ').title()].append({
                    'date': item['date'],
                    'description': item['description']
                })

    except Exception as e:
        print(f"Failed to process {name.title()}: {e}")

# Save to file
with open("team_transaction_data.json", "w") as f:
    json.dump(all_team_transactions, f, indent=2)

print("Done! Data saved to 'team_transaction_data.json'")


Scraping Boston-Celtics...
Scraping Brooklyn-Nets...
Scraping New-York-Knicks...
Scraping Philadelphia-76Ers...
Scraping Toronto-Raptors...
Scraping Chicago-Bulls...
Scraping Cleveland-Cavaliers...
Scraping Detroit-Pistons...
Scraping Indiana-Pacers...
Scraping Milwaukee-Bucks...
Scraping Atlanta-Hawks...
Scraping Charlotte-Hornets...
Scraping Miami-Heat...
Scraping Orlando-Magic...
Scraping Washington-Wizards...
Scraping Denver-Nuggets...
Scraping Minnesota-Timberwolves...
Scraping Oklahoma-City-Thunder...
Scraping Portland-Trail-Blazers...
Scraping Utah-Jazz...
Scraping Golden-State-Warriors...
Scraping La-Clippers...
Scraping Los-Angeles-Lakers...
Scraping Phoenix-Suns...
Scraping Sacramento-Kings...
Scraping Dallas-Mavericks...
Scraping Houston-Rockets...
Scraping Memphis-Grizzlies...
Scraping New-Orleans-Pelicans...
Scraping San-Antonio-Spurs...
Done! Data saved to 'team_transaction_data.json'
