In [1]:
import pandas as pd
import requests
import httpx
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
from concurrent.futures import ThreadPoolExecutor, as_completed
import concurrent.futures
from random import choice
import sqlite3
pd.set_option('display.max_columns', None)



### Game Data Endpoint: [https://site.web.api.espn.com/apis/site/v2/sports/basketball/nba/summary?event={event_id}](https://site.web.api.espn.com/apis/site/v2/sports/basketball/nba/summary?event=401705073)

#### Data included:
- Team Boxscore
- Player Boxscore
- Game Info(Venue, Referees, Attendance)
- Stat Leaders for both teams
- Season series score up to that date
- ESPN Predictor info
- Odds for the game
- Win probability as the game goes on
- Play by Play
- ESPN Video Highlights
- ESPN Article on the game

## Grab Conference Data

In [65]:
season = 2025
base_conference_url = f"https://sports.core.api.espn.com/v2/sports/basketball/leagues/nba/seasons/{season}/types/2/groups/"
base_conference_data = httpx.get(base_conference_url).json().get('items',[])

conference_urls = [x['$ref'] for x in base_conference_data]

conference_df = []
for conference_url in conference_urls:
    conference_data = httpx.get(conference_url).json()
    conference_dict = {
        'conference_id' : conference_data.get('id'),
        'conference_name' : conference_data.get('name')
    }
    conference_df.append(conference_dict)

conference_ids = [x['conference_id'] for x in conference_df]

division_urls = []
for conference_id in conference_ids:
    children_url = f"https://sports.core.api.espn.com/v2/sports/basketball/leagues/nba/seasons/{season}/types/2/groups/{conference_id}/children?lang=en&region=us"
    children_data = httpx.get(children_url).json().get('items',[])

    division_urls.extend([x['$ref'] for x in children_data])

conference_df = pd.DataFrame(conference_df)
division_df = []
for division_url in division_urls:
    division_data = httpx.get(division_url).json()
    conference_id = division_data.get('parent',{}).get('$ref','').split('?')[0].split('/')[-1]
    division_dict = {
        'division_id' : division_data.get('id'),
        'division_name' : division_data.get('name'),
        'division_abbreviation' : division_data.get('abbreviation'),
        'conference_id' : conference_id,
        'conference_name' : conference_df[conference_df['conference_id'] == conference_id]['conference_name'].iloc[0]

    }
    division_df.append(division_dict)



conn = sqlite3.connect("nbaDatasets/nba_database.db")
cursor = conn.cursor()

cursor.execute("""
CREATE TABLE IF NOT EXISTS Divisions (
               division_id INTEGER PRIMARY KEY,
               division_name TEXT,
               division_abbreviation TEXT,
               conference_id INTEGER,
               conference_name TEXT
               )
               """)

for row in division_df:
    cursor.execute("""
    INSERT OR REPLACE INTO Divisions (
               division_id,
               division_name,
               division_abbreviation,
               conference_id,
               conference_name
               )
    VALUES (?, ?, ?, ?, ?)
                   """, (
                       row['division_id'],
                       row['division_name'],
                       row['division_abbreviation'],
                       row['conference_id'],
                       row['conference_name']

            ))
conn.commit()
conn.close()


### Grab Team Data

In [2]:
sport_name = 'basketball'
league_name = 'nba'
team_urls = httpx.get(f'https://sports.core.api.espn.com/v2/sports/{sport_name}/leagues/{league_name}/teams?limit=1000').json().get('items')
team_urls = [team_url['$ref'] for team_url in team_urls]
team_url = team_urls[0]

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=1, max=10))
def get_team_data(team_url):
    try:
        team_response = httpx.get(team_url)
        team_response.raise_for_status()
        team_data = team_response.json()

        team_dict = {
            'team.id' : team_data.get('id'), # Primary Key
            'team.guid' : team_data.get('guid'),
            'team.uid' : team_data.get('uid'),
            'team.slug' : team_data.get('slug'),
            'team.location' : team_data.get('location'),
            'team.name' : team_data.get('name'),
            'team.abbreviation' : team_data.get('abbreviation'),
            'team.displayName' : team_data.get('displayName'),
            'team.color' : team_data.get('color'),
            'team.alternateColor' : team_data.get('alternateColor'),
            'team.venue.id' : team_data.get('venue',{}).get('id'),
            'team.logo' : f"https://a.espncdn.com/i/teamlogos/nba/500/{team_data.get('abbreviation')}.png",
            'conference.id' : team_data.get('groups',{}).get('$ref').split('?')[0].split('/')[-1]
        }
        return team_dict
    except Exception as e:
        print(f"Error fetching data for URL {team_url}: {e}")

def get_all_teams_data(team_urls):
    teams_df = []
    with ThreadPoolExecutor(max_workers=30) as executor:
        # Prepare future tasks for each combination of team ID and date range
        future_to_sched = {
            executor.submit(get_team_data, team_url): (team_urls)
            for team_url in team_urls
        }
        # As each future completes, append its result to fullSched
        for future in as_completed(future_to_sched):
            sched = future.result()
            teams_df.append(sched)
    return teams_df

teams_df = pd.DataFrame(get_all_teams_data(team_urls))
teams_df['team.id'] = teams_df['team.id'].astype(int)
teams_df = teams_df.sort_values(by='team.id')
teams_df.to_csv('nbaDatasets/basicTeamsDF.csv', index=False)

In [3]:
conn = sqlite3.connect("nbaDatasets/nba_database.db")
cursor = conn.cursor()

cursor.execute("""
CREATE TABLE IF NOT EXISTS Teams (
               team_id INTEGER PRIMARY KEY,
               team_guid TEXT,
               team_uid TEXT,
               team_slug TEXT,
               team_location TEXT,
               team_abbreviation TEXT,
               team_displayName TEXT,
               team_color TEXT,
               team_alternateColor TEXT,
               team_venue_id INTEGER,
               team_logo TEXT,
               conference_id INTEGER
               )
               """)

for index, row in teams_df.iterrows():
    cursor.execute("""
    INSERT OR REPLACE INTO Teams (
                   team_id, 
                   team_guid, 
                   team_uid, 
                   team_slug, 
                   team_location, 
                   team_abbreviation, 
                   team_displayName, 
                   team_color, 
                   team_alternateColor,
                   team_venue_id,
                   team_logo,
                   conference_id)
    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                   """, (
                       row['team.id'],
                       row['team.guid'],
                       row['team.uid'],
                       row['team.slug'],
                       row['team.location'],
                       row['team.abbreviation'],
                       row['team.displayName'],
                       row['team.color'],
                       row['team.alternateColor'],
                       row['team.venue.id'],
                       row['team.logo'],
                       row['conference.id']

            ))
conn.commit()
conn.close()

### Grab Player Data

In [4]:
sport_name = 'basketball'
league_name = 'nba'

athletes_url = f"https://sports.core.api.espn.com/v2/sports/{sport_name}/leagues/{league_name}/athletes?limit=1000"
athlete_response = httpx.get(athletes_url)
base_athlete_data = athlete_response.json()

if int(base_athlete_data.get('pageCount')) > 1:
    print("Over 1")
else:
    athlete_url_list = [url['$ref'] for url in base_athlete_data.get('items')]



@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=1, max=10))
def get_athlete_data(athlete_url):
    try:
        athlete_response = httpx.get(athlete_url)
        athlete_response.raise_for_status()
        athlete_data = athlete_response.json()

        athlete_dict = {
            'athlete_id' : athlete_data.get('id'),
            'firstName' : athlete_data.get('firstName'),
            'lastName' : athlete_data.get('lastName'),
            'fullName' : athlete_data.get('fullName'),
            'displayName' : athlete_data.get('displayName'),
            'weight' : athlete_data.get('weight'),
            'displayWeight' : athlete_data.get('displayWeight'),
            'height' : athlete_data.get('height'),
            'displayHeight' : athlete_data.get('displayHeight'),
            'age' : athlete_data.get('age'),
            'dateOfBirth' : athlete_data.get('dateOfBirth'),
            'birthPlace.city' : athlete_data.get('birthPlace',{}).get('city'),
            'birthPlace.state' : athlete_data.get('birthPlace',{}).get('state'),
            'birthPlace.country' : athlete_data.get('birthPlace',{}).get('country'),
            'jerseyNumber' : athlete_data.get('jersey'),
            'position.id' : athlete_data.get('position',{}).get('id'),
            'position.displayName' : athlete_data.get('position',{}).get('displayName'),
            'position.abbreviation' : athlete_data.get('position',{}).get('abbreviation'),
            'yearsExperience' : athlete_data.get('experience',{}).get('years'),
            'isAthleteActive' : athlete_data.get('active'),
            'status.id' : athlete_data.get('status',{}).get('id'),
            'status.name' : athlete_data.get('status',{}).get('name'),
            'status.abbreviation' : athlete_data.get('status',{}).get('abbreviation')

        }
        return athlete_dict
    except Exception as e:
        print(f"Error fetching data for URL {athlete_url}: {e}")


def get_all_players_data(athlete_url_list):
    athlete_df = []
    with ThreadPoolExecutor(max_workers=30) as executor:
        # Prepare future tasks for each combination of team ID and date range
        future_to_sched = {
            executor.submit(get_athlete_data, athlete_url): (athlete_url_list)
            for athlete_url in athlete_url_list
        }
        # As each future completes, append its result to fullSched
        for future in as_completed(future_to_sched):
            sched = future.result()
            athlete_df.append(sched)
    return athlete_df


athletes_df = pd.DataFrame(get_all_players_data(athlete_url_list))


In [5]:
conn = sqlite3.connect("nbaDatasets/nba_database.db")
cursor = conn.cursor()

cursor.execute("""
CREATE TABLE IF NOT EXISTS ActivePlayers (
               athlete_id INTEGER PRIMARY KEY,
               firstName TEXT,
               lastName TEXT,
               fullName TEXT,
               displayName TEXT,
               weight INTEGER,
               displayWeight TEXT,
               height INTEGER,
               displayHeight TEXT,
               age INTEGER,
               dateOfBirth TEXT,
               birthPlace_city TEXT,
               birthPlace_state TEXT,
               birthPlace_country TEXT,
               jerseyNumber INTEGER,
               position_id INTEGER,
               position_displayName TEXT,
               position_abbreviation TEXT,
               yearsExperience INTEGER,
               isAthleteActive BOOL,
               status_id INTEGER,
               status_name TEXT,
               status_abbreviation TEXT     
               )
               """)

for index, row in athletes_df.iterrows():
    cursor.execute("""
    INSERT OR REPLACE INTO ActivePlayers (
               athlete_id,
               firstName,
               lastName,
               fullName,
               displayName,
               weight,
               displayWeight,
               height,
               displayHeight,
               age,
               dateOfBirth,
               birthPlace_city,
               birthPlace_state,
               birthPlace_country,
               jerseyNumber,
               position_id,
               position_displayName,
               position_abbreviation,
               yearsExperience,
               isAthleteActive,
               status_id,
               status_name,
               status_abbreviation     
               )
    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                   """, (
                       row['athlete_id'],
                       row['firstName'],
                       row['lastName'],
                       row['fullName'],
                       row['displayName'],
                       row['weight'],
                       row['displayWeight'],
                       row['height'],
                       row['displayHeight'],
                       row['age'],
                       row['dateOfBirth'],
                       row['birthPlace.city'],
                       row['birthPlace.state'],
                       row['birthPlace.country'],
                       row['jerseyNumber'],
                       row['position.id'],
                       row['position.displayName'],
                       row['position.abbreviation'],
                       row['yearsExperience'],
                       row['isAthleteActive'],
                       row['status.id'],
                       row['status.name'],
                       row['status.abbreviation']

            ))
conn.commit()
conn.close()

In [6]:
team_ids = list(teams_df['team.id'].unique())



def get_sched_for_team(season, team_id):

    sched_url = f"https://site.api.espn.com/apis/site/v2/sports/basketball/nba/teams/{team_id}/schedule?season={season}"


    try:
        sched_response = httpx.get(sched_url)
        sched_response.raise_for_status()
        sched_data = sched_response.json()
        event_data = sched_data.get('events',[])

        sched_df = []
        for event in event_data:
            competition_data = event.get('competitions',[{}])[0]
            event_dict = {
                'event.id' : competition_data.get('id'),
                'event.date' : competition_data.get('date'),
                'event.season' : season,
                'event.neutralSite' : competition_data.get('neutralSite'),
                'event.boxScoreAvailable' : competition_data.get('boxscoreAvailable'),
                'venue.name' : competition_data.get('venue',{}).get('fullName')
            }
            for competitor in competition_data.get('competitors',[]):
                homeAway = competitor.get('homeAway')
                event_dict[f"{homeAway}.id"] = competitor.get('id')
                event_dict[f"{homeAway}.score"] = competitor.get('score',{}).get('value')
            
            event_dict['status.completed'] = competition_data.get('status',{}).get('type',{}).get('completed')
            event_dict['status.description'] = competition_data.get('status',{}).get('type',{}).get('description')

            sched_df.append(event_dict)
        return sched_df
    except Exception as e:
        print(f"Error fetching data for URL {sched_url}: {e}")

def get_all_teams_sched(team_ids):
    sched_df = []
    with ThreadPoolExecutor(max_workers=30) as executor:
        # Prepare future tasks for each combination of team ID and date range
        future_to_sched = {
            executor.submit(get_sched_for_team, season, team_id): (team_ids)
            for team_id in team_ids
            for season in range(2022,2026)
        }
        # As each future completes, append its result to fullSched
        for future in as_completed(future_to_sched):
            sched = future.result()
            sched_df.extend(sched)
    return sched_df

sched_df = get_all_teams_sched(team_ids)


In [7]:
sched_df = pd.DataFrame(sched_df)

In [8]:
sched_df = sched_df.drop_duplicates(keep='first')

In [9]:
# Assuming your dataframe is named 'df'
sched_df['temp_datetime'] = pd.to_datetime(sched_df['event.date']) - pd.Timedelta(hours=2)

# Extract date and time into separate columns
sched_df['event.startDate'] = sched_df['temp_datetime'].dt.date
sched_df['event.startTime'] = sched_df['temp_datetime'].dt.time

# Drop the original and temporary columns
sched_df = sched_df.drop(['event.date', 'temp_datetime'], axis=1)
sched_df = sched_df.sort_values(by=['event.startDate','event.startTime'])


In [10]:
sched_df = sched_df[sched_df['status.completed']]


In [11]:

# Connect to the database
conn = sqlite3.connect("nbaDatasets/nba_database.db")
cursor = conn.cursor()

# Create the Schedule table with TEXT for event_startTime
cursor.execute("""
CREATE TABLE IF NOT EXISTS Schedule (
    event_id INTEGER PRIMARY KEY,
    event_season INTEGER,
    event_neutralSite BOOL,
    event_boxScoreAvailable BOOL,
    venue_name TEXT,
    home_id INTEGER,
    home_score INTEGER,
    away_id INTEGER,
    away_score INTEGER,
    status_completed TEXT,
    status_description TEXT,
    event_startDate TEXT,
    event_startTime TEXT
)
""")

# Loop through the DataFrame rows and insert data
for index, row in sched_df.iterrows():
    # Convert `event_startTime` to string (if it's not already a string)
    start_time = row['event.startTime']
    if not isinstance(start_time, str):
        start_time = start_time.strftime("%H:%M:%S")  # Assuming it's a `datetime.time` object

    cursor.execute("""
    INSERT OR REPLACE INTO Schedule (
        event_id,
        event_season,
        event_neutralSite,
        event_boxScoreAvailable,
        venue_name,
        home_id,
        home_score,
        away_id,
        away_score,
        status_completed,
        status_description,
        event_startDate,
        event_startTime
    )
    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
    """, (
        row['event.id'],
        row['event.season'],
        row['event.neutralSite'],
        row['event.boxScoreAvailable'],
        row['venue.name'],
        row['home.id'],
        row['home.score'],
        row['away.id'],
        row['away.score'],
        row['status.completed'],
        row['status.description'],
        row['event.startDate'],
        start_time  # Insert the time as a string
    ))

# Commit changes and close the connection
conn.commit()
conn.close()


  cursor.execute("""


In [12]:
event_ids = list(sched_df[sched_df['event.boxScoreAvailable']]['event.id'].unique())

event_id = choice(event_ids)
def grab_event_data(event_id):

    event_url = f"https://site.web.api.espn.com/apis/site/v2/sports/basketball/nba/summary?event={event_id}"



    try:
        event_response = httpx.get(event_url, timeout=10)
        event_response.raise_for_status()

        event_data = event_response.json()

        team_boxScores = event_data.get('boxscore',{}).get('teams',[])
        player_boxScores = event_data.get('boxscore',{}).get('players',[])

        
        teamBoxScores_df = []
        for team in team_boxScores:
            team_dict = team.get('team',{})
            team_BoxScore_dict = {
                'event_id' : event_id,
                'team_id' : team_dict.get('id')
            }
            for statistic in team.get('statistics',[]):
                if '-' not in statistic.get('name'):
                    team_BoxScore_dict[f"{statistic.get('name')}"] = statistic.get('displayValue')
                else:

                    key1 = statistic.get('name').split('-')[0]
                    key2 = statistic.get('name').split('-')[1]
                    value1 = statistic.get('displayValue').split('-')[0]
                    value2 = statistic.get('displayValue').split('-')[1]

                    team_BoxScore_dict[f"{key1}"] = value1
                    team_BoxScore_dict[f"{key2}"] = value2




            teamBoxScores_df.append(team_BoxScore_dict)
        
        playerBoxScores_df = []
        for team in player_boxScores:
            team_dict = team.get('team',{})
            player_stats = team.get('statistics',[{}])[0]
            player_dict_keys = player_stats.get('keys')

            for athlete in player_stats.get('athletes',[]):

                athlete_boxScore_dict = {
                    'event_id' : event_id,
                    'team_id' : team_dict.get('id'),
                    'team_displayName' : team_dict.get('displayName'),
                    'athlete_active' : athlete.get('active'),
                    'athlete_starter' : athlete.get('starter'),
                    'athlete_didNotPlay' : athlete.get('didNotPlay'),
                    'athlete_reason' : athlete.get('reason'),
                    'athlete_ejected' : athlete.get('ejected'),
                    'athlete_id' : athlete.get('athlete',{}).get('id'),
                    'athlete_displayName' : athlete.get('athlete',{}).get('displayName')
                }

                for index, stat in enumerate(athlete.get('stats',[])):
                    if '-' not in player_dict_keys[index]:
                        athlete_boxScore_dict[f"{player_dict_keys[index]}"] = stat
                    else:
                        key1 = player_dict_keys[index].split('-')[0]
                        key2 = player_dict_keys[index].split('-')[1]

                        value1 = stat.split('-')[0]
                        value2 = stat.split('-')[1]
                        athlete_boxScore_dict[key1] = value1
                        athlete_boxScore_dict[key2] = value2
                

                playerBoxScores_df.append(athlete_boxScore_dict)
        predictor_df = []
        base_predictor_dict = event_data.get('predictor',{})
        predictor_dict = {'event_id' : event_id}
        for team in ['awayTeam', 'homeTeam']:
            team_predictor_dict = base_predictor_dict.get(team,{})
            predictor_dict[f"{team}_id"] = team_predictor_dict.get('id')
            if 'gameProjection' in team_predictor_dict:
                predictor_dict[f"{team}_gameProjection"] = team_predictor_dict['gameProjection']
            else:
                otherTeam = [ot for ot in ['awayTeam', 'homeTeam'] if ot != team][0]
                predictor_dict[f"{team}_gameProjection"] = base_predictor_dict.get(otherTeam,{}).get('teamChanceLoss')
        predictor_df.append(predictor_dict)


        play_by_play_df = []

        for play in event_data.get('plays',[]):

            play_by_play_dict = {
                'game_id' : event_id,
                'play_id' : play.get('id'),
                'sequenceNumber' : play.get('sequenceNumber'),
                'play_type_id' : play.get('type',{}).get('id'),
                'play_type_text' : play.get('type',{}).get('text'),
                'play_text' : play.get('text'),
                'awayScore' : play.get('awayScore'),
                'homeScore' : play.get('homeScore'),
                'period_number' : play.get('period',{}).get('number'),
                'period_displayValue' : play.get('period',{}).get('displayValue'),
                'clock_displayValue' : play.get('clock',{}).get('displayValue'),
                'isScoringPlay' : play.get('scoringPlay'),
                'scoreValue' : play.get('scoreValue'),
                'offenseTeam' : play.get('team',{}).get('id'),
                'isShootingPlay' : play.get('shootingPlay'),
                'x_coordinate' : play.get('coordinate',{}).get('x'),
                'y_coordinate' : play.get('coordinate',{}).get('y')

            }
            play_by_play_df.append(play_by_play_dict)


        return teamBoxScores_df, playerBoxScores_df, predictor_df, play_by_play_df

        

    except Exception as e:
        print(f"Error fetching data for URL {event_url}: {e}")
        return [], [], [], []



teamBoxScores_df, playerBoxScores_df, predictor_df, play_by_play_df = grab_event_data(event_id)

In [21]:
teamBoxScores_all, playerBoxScores_all, predictor_all, play_by_play_all = [], [], [], []

with ThreadPoolExecutor(max_workers=100) as executor:  # Adjust max_workers as needed
    future_to_event = {executor.submit(grab_event_data, event_id): event_id for event_id in event_ids}

    for future in as_completed(future_to_event):
        event_id = future_to_event[future]
        try:
            teamBoxScores, playerBoxScores, predictor, play_by_play = future.result()
            teamBoxScores_all.extend(teamBoxScores)
            playerBoxScores_all.extend(playerBoxScores)
            predictor_all.extend(predictor)
            play_by_play_all.extend(play_by_play)
        except Exception as e:
            print(f"Error processing event {event_id}: {e}")

# Convert results to DataFrames
# teamBoxScores_df = pd.DataFrame(teamBoxScores_all)
# playerBoxScores_df = pd.DataFrame(playerBoxScores_all)
# predictor_df = pd.DataFrame(predictor_all)
# play_by_play_df = pd.DataFrame(play_by_play_all)

In [27]:
pd.DataFrame(play_by_play_all)

Unnamed: 0,game_id,play_id,sequenceNumber,play_type_id,play_type_text,play_text,awayScore,homeScore,period_number,period_displayValue,clock_displayValue,isScoringPlay,scoreValue,offenseTeam,isShootingPlay,x_coordinate,y_coordinate
0,401358773,4013587734,4,615,Jumpball,Nic Claxton vs. Brook Lopez (Giannis Antetokou...,0,0,1,1st Quarter,12:00,False,0,15,False,-214748340,-214748365
1,401358773,4013587737,7,92,Jump Shot,Grayson Allen misses 27-foot three point jumper,0,0,1,1st Quarter,11:42,False,0,15,True,9,21
2,401358773,4013587738,8,155,Defensive Rebound,Kevin Durant defensive rebound,0,0,1,1st Quarter,11:39,False,0,17,False,9,21
3,401358773,4013587739,9,44,Shooting Foul,Giannis Antetokounmpo shooting foul,0,0,1,1st Quarter,11:27,False,0,15,False,21,14
4,401358773,40135877311,11,98,Free Throw - 1 of 2,Nic Claxton misses free throw 1 of 2,0,0,1,1st Quarter,11:27,False,0,17,True,-214748340,-214748365
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2002435,401705108,401705108691,691,584,Substitution,Daniel Theis enters the game for Jose Alvarado,119,120,4,4th Quarter,6.1,False,0,3,False,-214748340,-214748365
2002436,401705108,401705108696,696,145,Driving Floating Bank Jump Shot,CJ McCollum misses two point shot,119,120,4,4th Quarter,1.1,False,0,3,True,28,4
2002437,401705108,401705108697,697,156,Offensive Rebound,Pelicans offensive team rebound,119,120,4,4th Quarter,1.1,False,0,3,False,28,4
2002438,401705108,401705108698,698,412,End Period,End of the 4th Quarter,119,120,4,4th Quarter,0.0,False,0,,False,-214748340,-214748365


In [28]:
conn = sqlite3.connect("nbaDatasets/nba_database.db")
cursor = conn.cursor()


# Create and Add Player Boxscores
cursor.execute("""
CREATE TABLE IF NOT EXISTS playerBoxScores (
    event_id TEXT,
    team_id INTEGER,
    team_displayName TEXT,
    athlete_active BOOLEAN,
    athlete_starter BOOLEAN,
    athlete_didNotPlay BOOLEAN,
    athlete_reason TEXT,
    athlete_ejected BOOLEAN,
    athlete_id INTEGER,
    athlete_displayName TEXT,
    minutes INTEGER,
    fieldGoalsMade INTEGER,
    fieldGoalsAttempted INTEGER,
    threePointFieldGoalsMade INTEGER,
    threePointFieldGoalsAttempted INTEGER,
    freeThrowsMade INTEGER,
    freeThrowsAttempted INTEGER,
    offensiveRebounds INTEGER,
    defensiveRebounds INTEGER,
    rebounds INTEGER,
    assists INTEGER,
    steals INTEGER,
    blocks INTEGER,
    turnovers INTEGER,
    fouls INTEGER,
    plusMinus INTEGER,
    points INTEGER,
    PRIMARY KEY (event_id, athlete_id)
);
""")

# # Loop through the DataFrame rows and insert data
for index, row in enumerate(playerBoxScores_all):

    cursor.execute("""
    INSERT OR IGNORE INTO playerBoxScores (
    event_id,
    team_id,
    team_displayName,
    athlete_active,
    athlete_starter,
    athlete_didNotPlay,
    athlete_reason,
    athlete_ejected,
    athlete_id,
    athlete_displayName,
    minutes,
    fieldGoalsMade,
    fieldGoalsAttempted,
    threePointFieldGoalsMade,
    threePointFieldGoalsAttempted,
    freeThrowsMade,
    freeThrowsAttempted,
    offensiveRebounds,
    defensiveRebounds,
    rebounds,
    assists,
    steals,
    blocks,
    turnovers,
    fouls,
    plusMinus,
    points
    )
    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? , ? ,? ,? ,? ,? ,? ,? ,? ,? ,? ,? ,? ,?)
    """, (
        row['event_id'],
        row['team_id'],
        row['team_displayName'],
        row['athlete_active'],
        row['athlete_starter'],
        row['athlete_didNotPlay'],
        row['athlete_reason'],
        row['athlete_ejected'],
        row['athlete_id'],
        row['athlete_displayName'],
        row.get('minutes'),
        row.get('fieldGoalsMade'),
        row.get('fieldGoalsAttempted'),
        row.get('threePointFieldGoalsMade'),
        row.get('threePointFieldGoalsAttempted'),
        row.get('freeThrowsMade'),
        row.get('freeThrowsAttempted'),
        row.get('offensiveRebounds'),
        row.get('defensiveRebounds'),
        row.get('rebounds'),
        row.get('assists'),
        row.get('steals'),
        row.get('blocks'),
        row.get('turnovers'),
        row.get('fouls'),
        row.get('plusMinus'),
        row.get('points'),
    ))

# Create and Add Team Boxscores
cursor.execute("""
CREATE TABLE IF NOT EXISTS teamBoxScores (
    event_id INTEGER,
    team_id INTEGER,
    fieldGoalsMade INTEGER,
    fieldGoalsAttempted INTEGER,
    fieldGoalPct REAL,
    threePointFieldGoalsMade INTEGER,
    threePointFieldGoalsAttempted INTEGER,
    threePointFieldGoalPct REAL,
    freeThrowsMade INTEGER,
    freeThrowsAttempted INTEGER,
    freeThrowPct REAL,
    totalRebounds INTEGER,
    offensiveRebounds INTEGER,
    defensiveRebounds INTEGER,
    assists INTEGER,
    steals INTEGER,
    blocks INTEGER,
    turnovers INTEGER,
    teamTurnovers INTEGER,
    totalTurnovers INTEGER,
    technicalFouls INTEGER,
    totalTechnicalFouls INTEGER,
    flagrantFouls INTEGER,
    turnoverPoints INTEGER,
    fastBreakPoints INTEGER,
    pointsInPaint INTEGER,
    fouls INTEGER,
    largestLead INTEGER,
    PRIMARY KEY (event_id, team_id)
);
""")

for row in teamBoxScores_all:
    cursor.execute("""
    INSERT OR IGNORE INTO teamBoxScores (
        event_id,
        team_id,
        fieldGoalsMade,
        fieldGoalsAttempted,
        fieldGoalPct,
        threePointFieldGoalsMade,
        threePointFieldGoalsAttempted,
        threePointFieldGoalPct,
        freeThrowsMade,
        freeThrowsAttempted,
        freeThrowPct,
        totalRebounds,
        offensiveRebounds,
        defensiveRebounds,
        assists,
        steals,
        blocks,
        turnovers,
        teamTurnovers,
        totalTurnovers,
        technicalFouls,
        totalTechnicalFouls,
        flagrantFouls,
        turnoverPoints,
        fastBreakPoints,
        pointsInPaint,
        fouls,
        largestLead
    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)

    """, (
        row['event_id'],
        row['team_id'],
        row['fieldGoalsMade'],
        row['fieldGoalsAttempted'],
        row['fieldGoalPct'],
        row['threePointFieldGoalsMade'],
        row['threePointFieldGoalsAttempted'],
        row['threePointFieldGoalPct'],
        row['freeThrowsMade'],
        row['freeThrowsAttempted'],
        row['freeThrowPct'],
        row['totalRebounds'],
        row['offensiveRebounds'],
        row['defensiveRebounds'],
        row['assists'],
        row['steals'],
        row['blocks'],
        row['turnovers'],
        row['teamTurnovers'],
        row['totalTurnovers'],
        row['technicalFouls'],
        row['totalTechnicalFouls'],
        row['flagrantFouls'],
        row['turnoverPoints'],
        row['fastBreakPoints'],
        row['pointsInPaint'],
        row['fouls'],
        row.get('largestLead')
    ))


# Create and Add Prediction
cursor.execute("""
CREATE TABLE IF NOT EXISTS Predictions (
    event_id INTEGER,
    awayTeam_id INTEGER,
    awayTeam_gameProjection REAL,
    homeTeam_id INTEGER,
    homeTeam_gameProjection REAL,
    PRIMARY KEY (event_id, awayTeam_id, homeTeam_id)
)
               """)


cursor.executemany("""
INSERT OR IGNORE INTO Predictions (
    event_id,
    awayTeam_id,
    awayTeam_gameProjection,
    homeTeam_id,
    homeTeam_gameProjection
) VALUES (?, ?, ?, ?, ?)
""", [
    (
        pred["event_id"],
        pred["awayTeam_id"],
        pred["awayTeam_gameProjection"],
        pred["homeTeam_id"],
        pred["homeTeam_gameProjection"]
    )
    for pred in predictor_all
])

# Create and Add Play by Play

cursor.execute("""
CREATE TABLE IF NOT EXISTS playByPlay (
    game_id INTEGER,
    play_id INTEGER,
    sequenceNumber INTEGER,
    play_type_id INTEGER,
    play_type_text TEXT,
    play_text TEXT,
    awayScore INTEGER,
    homeScore INTEGER,
    period_number INTEGER,
    period_displayValue TEXT,
    clock_displayValue TEXT,
    isScoringPlay BOOLEAN,
    scoreValue INTEGER,
    offenseTeam INTEGER,
    isShootingPlay BOOLEAN,
    x_coordinate REAL,
    y_coordinate REAL,
    PRIMARY KEY (game_id, play_id)
);
""")

cursor.executemany("""
INSERT OR IGNORE INTO playByPlay (
    game_id,
    play_id,
    sequenceNumber,
    play_type_id,
    play_type_text,
    play_text,
    awayScore,
    homeScore,
    period_number,
    period_displayValue,
    clock_displayValue,
    isScoringPlay,
    scoreValue,
    offenseTeam,
    isShootingPlay,
    x_coordinate,
    y_coordinate
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", [
    (
        play["game_id"],
        play["play_id"],
        play["sequenceNumber"],
        play["play_type_id"],
        play["play_type_text"],
        play["play_text"],
        play["awayScore"],
        play["homeScore"],
        play["period_number"],
        play["period_displayValue"],
        play["clock_displayValue"],
        play["isScoringPlay"],
        play["scoreValue"],
        play["offenseTeam"],
        play["isShootingPlay"],
        play["x_coordinate"],
        play["y_coordinate"]
    )
    for play in play_by_play_all
])


conn.commit()
conn.close()

In [48]:
conn = sqlite3.connect("nbaDatasets/nba_database.db")
existing_sched = pd.read_sql_query("SELECT * FROM Schedule", conn)
# existing_sched = pd.read_sql_query("SELECT * FROM playerBoxScores", conn)

conn.close()
existing_sched = pd.DataFrame(existing_sched)
existing_sched = existing_sched.sort_values(by=['event_startDate', 'event_startTime'])

event_ids = list(existing_sched['event_id'].unique())

In [None]:
event_id = choice(event_ids)
event_id
odds_url = f"https://sports.core.api.espn.com/v2/sports/basketball/leagues/nba/events/{event_id}/competitions/{event_id}/odds"

try:
    odds_response = httpx.get(odds_url)
    odds_response.raise_for_status()
    odds_data = odds_response.json()
    ESPN_DICT = False
    CONSENSUS_DICT = False
    for provider in odds_data.get('items',[]):
        if provider.get('provider',{}).get('id') == '58':
            ESPN_DICT = provider
        if provider.get('provider',{}).get('id') == '1004':
            CONSENSUS_DICT = provider
        
        print(provider.get('provider',{}).get('name'))
    
    odds_dict = {}

    if ESPN_DICT:
        odds_dict['event_id'] = event_id
        odds_dict['provider_id'] = ESPN_DICT.get('provider',{}).get('id')
        odds_dict['provider_name'] = ESPN_DICT.get('provider',{}).get('name')
        odds_dict['details'] = ESPN_DICT.get('details')
        odds_dict['overUnder'] = ESPN_DICT.get('overUnder')
        odds_dict['spread'] = ESPN_DICT.get('spread')
        odds_dict['overOdds'] = ESPN_DICT.get('overOdds')
        odds_dict['underOdds'] = ESPN_DICT.get('underOdds')

    print(odds_url)
except:
    print(odds_url)


ESPN BET
BetfairSportsbook
Caesars Sportsbook (Colorado)
Caesars Sportsbook (New Jersey)
Caesars Sportsbook (New Jersey) - Live Odds
Caesars Sportsbook (Tennessee)
DraftKings
MGM
PointsBet
SugarHouse
Titanbets
Unibet
accuscore
https://sports.core.api.espn.com/v2/sports/basketball/leagues/nba/events/401584793/competitions/401584793/odds


In [53]:

CONSENSUS_DICT

{'$ref': 'http://sports.core.api.espn.com/v2/sports/basketball/leagues/nba/events/400828889/competitions/400828889/odds/1004?lang=en&region=us',
 'provider': {'$ref': 'http://sports.core.api.espn.com/v2/sports/basketball/leagues/nba/providers/1004?lang=en&region=us',
  'id': '1004',
  'name': 'consensus',
  'priority': 0},
 'details': 'SAC -3',
 'spread': 3.0,
 'initialSpread': 3.0,
 'initialOverUnder': 217.0,
 'awayTeamOdds': {'winPercentage': 58.0,
  'favorite': True,
  'underdog': False,
  'current': {'pointSpread': {'alternateDisplayValue': '-3',
    'american': '-3'}},
  'team': {'$ref': 'http://sports.core.api.espn.com/v2/sports/basketball/leagues/nba/seasons/2016/teams/23?lang=en&region=us'}},
 'homeTeamOdds': {'winPercentage': 42.0,
  'favorite': False,
  'underdog': True,
  'current': {'pointSpread': {'alternateDisplayValue': '+3',
    'american': '+3'}},
  'team': {'$ref': 'http://sports.core.api.espn.com/v2/sports/basketball/leagues/nba/seasons/2016/teams/13?lang=en&region=u

In [33]:
conn.commit()
conn.close()

ProgrammingError: Cannot operate on a closed database.

In [26]:
conn.close()

In [29]:
conn = sqlite3.connect("nbaDatasets/nba_database.db")
view_df = pd.read_sql_query("SELECT * FROM playByPlay", conn)

conn.close()
view_df = pd.DataFrame(view_df)
# view_df['team_displayName'].unique()

In [30]:
view_df

Unnamed: 0,game_id,play_id,sequenceNumber,play_type_id,play_type_text,play_text,awayScore,homeScore,period_number,period_displayValue,clock_displayValue,isScoringPlay,scoreValue,offenseTeam,isShootingPlay,x_coordinate,y_coordinate
0,401358773,4013587734,4,615,Jumpball,Nic Claxton vs. Brook Lopez (Giannis Antetokou...,0,0,1,1st Quarter,12:00,0,0,15.0,0,-214748340.0,-214748365.0
1,401358773,4013587737,7,92,Jump Shot,Grayson Allen misses 27-foot three point jumper,0,0,1,1st Quarter,11:42,0,0,15.0,1,9.0,21.0
2,401358773,4013587738,8,155,Defensive Rebound,Kevin Durant defensive rebound,0,0,1,1st Quarter,11:39,0,0,17.0,0,9.0,21.0
3,401358773,4013587739,9,44,Shooting Foul,Giannis Antetokounmpo shooting foul,0,0,1,1st Quarter,11:27,0,0,15.0,0,21.0,14.0
4,401358773,40135877311,11,98,Free Throw - 1 of 2,Nic Claxton misses free throw 1 of 2,0,0,1,1st Quarter,11:27,0,0,17.0,1,-214748340.0,-214748365.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2002435,401705108,401705108691,691,584,Substitution,Daniel Theis enters the game for Jose Alvarado,119,120,4,4th Quarter,6.1,0,0,3.0,0,-214748340.0,-214748365.0
2002436,401705108,401705108696,696,145,Driving Floating Bank Jump Shot,CJ McCollum misses two point shot,119,120,4,4th Quarter,1.1,0,0,3.0,1,28.0,4.0
2002437,401705108,401705108697,697,156,Offensive Rebound,Pelicans offensive team rebound,119,120,4,4th Quarter,1.1,0,0,3.0,0,28.0,4.0
2002438,401705108,401705108698,698,412,End Period,End of the 4th Quarter,119,120,4,4th Quarter,0.0,0,0,,0,-214748340.0,-214748365.0
