In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

def extract_player_data_team(team_str: str):
    url = f"https://en.wikipedia.org/wiki/2015%E2%80%9316_{team_str}_season"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    roster_section = soup.find('span', id='Roster').parent
    roster_table = roster_section.find_next('table', class_='toccolours')

    players = []
    rows = roster_table.find_all('tr')

    team_dict = {}
    for row in rows[2:]:  # Skip the header row
        cols = row.find_all('td')
        if len(cols) > 0:
            position = cols[0].text.strip()
            number = cols[1].text.strip()
            name = cols[2].text.strip()
            team_dict[name] = {'position': position, 'number': number, 'name': name}

    return team_dict

In [None]:
import re

def get_team_colors(team_str: str):

    url = f"https://en.wikipedia.org/wiki/{team_str}"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Step 2: Locate the infobox containing the team details
    infobox = soup.find('table', class_='infobox vcard')

    # Step 3: Identify and extract the team colors
    if not team_str == "Toronto_Raptors":
        colors_section = infobox.find(text="Team colors").find_next('td')
        colors = colors_section.text.strip().split(',')
    else:
        colors_section = infobox.find(text="Team colours").find_next('td')
        colors = colors_section.text.strip().split(',')

    # Step 4: Clean up the color data
    colors = [re.sub(r'[^a-zA-Z]', '', color.strip()).lower() for color in colors]
    return colors

In [33]:
nba_teams = [
    "Boston_Celtics",
    "Brooklyn_Nets",
    "New_York_Knicks",
    "Philadelphia_76ers",
    "Toronto_Raptors",
    "Chicago_Bulls",
    "Cleveland_Cavaliers",
    "Detroit_Pistons",
    "Indiana_Pacers",
    "Milwaukee_Bucks",
    "Atlanta_Hawks",
    "Charlotte_Hornets",
    "Miami_Heat",
    "Orlando_Magic",
    "Washington_Wizards",
    "Denver_Nuggets",
    "Minnesota_Timberwolves",
    "Oklahoma_City_Thunder",
    "Portland_Trail_Blazers",
    "Utah_Jazz",
    "Golden_State_Warriors",
    "Los_Angeles_Clippers",
    "Los_Angeles_Lakers",
    "Phoenix_Suns",
    "Sacramento_Kings",
    "Dallas_Mavericks",
    "Houston_Rockets",
    "Memphis_Grizzlies",
    "New_Orleans_Pelicans",
    "San_Antonio_Spurs"
]

all_team_rosters = {}
for team in nba_teams:
    all_team_rosters[str.lower(team)] = {}
    all_team_rosters[str.lower(team)]['players'] = extract_player_data_team(team)

In [None]:
for team in nba_teams:
    print(team)
    all_team_rosters[str.lower(team)]['team_colors'] = get_team_colors(team)

In [34]:
import json

with open('team_rosters.json', 'w') as f:
    json.dump(all_team_rosters, f, indent=4)