In [0]:
table=spark.read.table("nba.bronze.game_player_stats").limit(10000)
table=(table.groupBy(['game_id','team_id','player_id'])
       .pivot('stat_type')
       .agg({'stat_value': 'first'})
)
display(table)

In [0]:
from pyspark.sql.functions import col, explode, expr, collect_set, concat_ws, filter as spark_filter

player_list = spark.read.table(f"nba.bronze.game_players")\
    .groupBy(
        "player_id",
        "first_name",
        "family_name",
        "name_i",
        "status",
        "loaded_date"
    )\
    .agg(
        concat_ws(",", spark_filter(collect_set("position"), lambda x: x!='None')).alias("positions"),
        concat_ws(",", collect_set("jersey_num")).alias("jersey_numbers")
    )
display(player_list)

In [0]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

def get_box_scores(date, team1, team2):
    url_date = pd.to_datetime(date).strftime('%Y%m%d')
    url = f"https://www.basketball-reference.com/boxscores/?month={date[5:7]}&day={date[8:10]}&year={date[:4]}"
    res = requests.get(url)
    soup = BeautifulSoup(res.content, "html.parser")
    links = [a['href'] for a in soup.select('a[href^="/boxscores/"]') if 'html' in a['href']]
    game_link = None
    for link in links:
        if team1 in link and team2 in link:
            game_link = link
            break
    if not game_link:
        return pd.DataFrame()
    box_url = f"https://www.basketball-reference.com{game_link}"
    box_res = requests.get(box_url)
    box_soup = BeautifulSoup(box_res.content, "html.parser")
    tables = box_soup.find_all('table', {'id': lambda x: x and x.endswith('basic')})
    dfs = []
    for table in tables:
        df = pd.read_html(str(table))[0]
        dfs.append(df)
    return pd.concat(dfs, ignore_index=True)

display(get_box_scores('2024-04-14', 'CLE', 'TOR'))

In [0]:
import requests
try:
    res = requests.get("https://www.google.com", timeout=5)
    print("Success:", res.status_code)
except Exception as e:
    print("Failed:", e)

In [0]:
from nba_api.stats.endpoints.scoreboardv2 import ScoreboardV2
from nba_api.live.nba.endpoints import boxscore,scoreboard

class Game:
  def __init__(self,game_id):
    self.game_id = game_id
    self.game_data = boxscore.BoxScore(game_id).get_dict()['game']

  def game_info(self):
    game_info={
        'home_team_id': self.game_data['homeTeam']['teamId'],
        'away_team_id': self.game_data['awayTeam']['teamId'],
        'game_id': self.game_id,
        'date': self.game_data['gameTimeUTC'],
        'home_team_result': self.game_data['homeTeam']['score'],
        'away_team_result': self.game_data['awayTeam']['score'],
        'regulation_time': self.game_data['regulationPeriods']}
    return game_info

def list_games_for_date(date):
    scoreboard = ScoreboardV2(game_date=date).game_header.data['data']
    game_list=[]
    for game in scoreboard:
        game_list.append(game[0])
    return game_list

game_list=list_games_for_date('2025-12-02')
games={}
for i in game_list:
    game=Game(i)
    games[i]=game.game_info()

a = pd.DataFrame.from_dict(games, orient='index')
print(a)

In [0]:
import requests
requests.get("https://stats.nba.com/stats/scoreboardv3?GameDate=2025-12-02")

In [0]:
#%pip install nba_api
%pip install 'PyArrow >= 4.0.0'

In [0]:
import requests

API_KEY = dbutils.secrets.get(
    scope="nba_secrets",
    key="balldontlie_api_key"
)
BASE_URL = "https://api.balldontlie.io/v1"

def get_games_by_date(date):
    url = f"{BASE_URL}/games"
    headers = {
        "Authorization": API_KEY
    }
    params = {
        "dates[]": date,
    }
    r = requests.get(url, headers=headers, params=params)
    print("Status:", r.status_code)
    if r.status_code != 200:
        return r.status_code
    data = r.json()
    games = data.get("data", [])
    results = []
    for g in games:
        results.append({
            "game_id": g["id"],
            "date": g["date"],
            "home_team": g["home_team"]["full_name"],
            "away_team": g["visitor_team"]["full_name"]
        })
    return results

games = get_games_by_date(["2025-12-02", "2025-12-03"])

for game in games:
    print(game)

In [0]:
import pandas as pd
catalog='nba'
source_schema='source'
df = spark.read.parquet(
     f"/Volumes/{catalog}/{source_schema}/game_boxscores/game_boxscore.parquet"
)
display(df)
display(df.select("game_id"))

In [0]:
# Delete all volumes under nba/source using Databricks SQL
for row in spark.sql("SHOW VOLUMES IN nba.source").collect():
    volume_name = row['volume_name']
    spark.sql(f"DROP VOLUME IF EXISTS nba.source.{volume_name}")

# Delete all tables under nba/bronze using Databricks SQL
for row in spark.sql("SHOW TABLES IN nba.bronze").collect():
    table_name = row['tableName']
    spark.sql(f"DROP TABLE IF EXISTS nba.bronze.{table_name}")

# Delete all tables under nba/silver using Databricks SQL
for row in spark.sql("SHOW TABLES IN nba.silver").collect():
    table_name = row['tableName']
    spark.sql(f"DROP TABLE IF EXISTS nba.silver.{table_name}")

In [0]:
# Delete all tables under nba/bronz using Databricks SQL
for row in spark.sql("SHOW TABLES IN nba.bronze").collect():
    table_name = row['tableName']
    spark.sql(f"DROP TABLE IF EXISTS nba.bronze.{table_name}")