# NBA Today's games score importer

## Import Libraries

In [4]:
%pip install pandas nba_api pymongo python-dotenv dnspython

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.2 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
import pandas as pd
from datetime import date, timedelta
from nba_api.stats.endpoints import scoreboardv3
import os
from dotenv import load_dotenv
from pymongo import MongoClient, UpdateOne

# NBA game Scores

### Get scores for Yesterday
Yesterday's scores are obtained in order to ensure complete scores.

> Today's live scores can be obtained using `today_date` 

In [6]:
today_date = date.today()
yesterday_date = date.today() - timedelta(days=1)

scoreboard = scoreboardv3.ScoreboardV3(
    league_id="00",
    game_date= yesterday_date
)

game_header = scoreboard.game_header.get_data_frame()
line_score = scoreboard.line_score.get_data_frame()

# Preview a game
example_game_id = line_score['gameId'].iloc[0]
display(game_header[game_header['gameId'] == example_game_id])
display(line_score[line_score['gameId'] == example_game_id])


Unnamed: 0,gameId,gameCode,gameStatus,gameStatusText,period,gameClock,gameTimeUTC,gameEt,regulationPeriods,seriesGameNumber,gameLabel,gameSubLabel,seriesText,ifNecessary,seriesConference,poRoundDesc,gameSubtype,isNeutral
0,22500811,20260221/ORLPHX,3,Final/OT2,6,,2026-02-21T22:00:00Z,2026-02-21T17:00:00Z,4,,,,,False,,,,False


Unnamed: 0,gameId,teamId,teamCity,teamName,teamTricode,teamSlug,wins,losses,score,seed,inBonus,timeoutsRemaining
0,22500811,1610612756,Phoenix,Suns,PHX,suns,33,24,113,0,,0
1,22500811,1610612753,Orlando,Magic,ORL,magic,29,26,110,0,,0


## Build Game-Level NBA Results Table

This code transforms raw NBA scoreboard data into a clean, game-level dataset.

### 1. Identify Home and Away Teams
Using the `gameCode` from `game_header`, the script extracts:
- Away team tricode (first 3 letters)
- Home team tricode (last 3 letters)

Each row in `line_score` is then labeled as either `home` or `away`.
> Example: 20260222/CLEOKC means away CLE, home OKC

In [7]:
ls = line_score.copy()
gh = game_header.copy()

ls["home_tricode"] = ls["gameId"].map(gh.set_index("gameId")["gameCode"]).str.split("/").str[1].str[-3:]
ls["away_tricode"] = ls["gameId"].map(gh.set_index("gameId")["gameCode"]).str.split("/").str[1].str[:3]

ls["is_home"] = ls["teamTricode"] == ls["home_tricode"]
ls["side"] = ls["is_home"].map({True: "home", False: "away"})

### 2. Select Relevant Fields
From the enriched `line_score`, only the necessary columns are kept:
- Game ID
- Team info
- Wins / Losses
- Score
- Home/Away side

In [8]:
ls_small = ls[[
    "gameId","side",
    "teamId","teamCity","teamName","teamTricode",
    "wins","losses","score"
]]

### 3. Reshape to One Row per Game
The dataset is pivoted from:
- Two rows per game (one per team)

Into:
- One row per game
- Separate columns for home and away statistics

Example:
- `teamName_home`
- `teamName_away`
- `score_home`
- `score_away`

In [9]:
game_level_df = (
    ls_small
    .set_index(["gameId","side"])
    .unstack("side")
)

# Flatten columns like ("score","home") -> "home_score"
game_level_df.columns = [f"{col}_{side}" for col, side in game_level_df.columns]
game_level_df = game_level_df.reset_index()

### 4. Add Game Metadata
The reshaped table is merged with `game_header` to include:
- Game status
- Period
- Game clock
- Game time

In [10]:
game_scores_df = game_level_df.merge(
    gh[["gameId","gameStatus","gameStatusText","period","gameClock","gameTimeUTC","gameEt"]],
    on="gameId",
    how="left"
)

### Output table
A clean, structured DataFrame where:
- Each row represents one NBA game
- Home and away team statistics are clearly separated
- Game status information is included

In [11]:
print(game_scores_df.columns)
preview_df = game_scores_df[[
    "gameId",'teamName_away','teamName_home',
    "score_away","score_home",
    "wins_away","wins_home",
    "losses_away","losses_home"
]]
preview_df.head()

Index(['gameId', 'teamId_away', 'teamId_home', 'teamCity_away',
       'teamCity_home', 'teamName_away', 'teamName_home', 'teamTricode_away',
       'teamTricode_home', 'wins_away', 'wins_home', 'losses_away',
       'losses_home', 'score_away', 'score_home', 'gameStatus',
       'gameStatusText', 'period', 'gameClock', 'gameTimeUTC', 'gameEt'],
      dtype='str')


Unnamed: 0,gameId,teamName_away,teamName_home,score_away,score_home,wins_away,wins_home,losses_away,losses_home
0,22500811,Magic,Suns,110,113,29,33,26,24
1,22500812,76ers,Pelicans,111,126,30,16,26,42
2,22500813,Grizzlies,Heat,120,136,21,31,34,27
3,22500814,Pistons,Bulls,126,110,42,24,13,33
4,22500815,Kings,Spurs,122,139,12,40,46,16


# Upload to Mongo

### Connect to Mongo

In [2]:
load_dotenv()

MONGO_URI = os.getenv("MONGO_URI")
MONGO_DB = os.getenv("MONGO_DB", "nba")
MONGO_COLLECTION = os.getenv("MONGO_COLLECTION", "scoreboard_games")

if not MONGO_URI:
    raise ValueError("Falta MONGO_URI en el archivo .env")

client = MongoClient(MONGO_URI)
col = client[MONGO_DB][MONGO_COLLECTION]

### Insert scores using upsert
* Uses `gameID`as key for unique rows.
* It only uploads new games.
* Updates previously added games if new values are found.

In [13]:
df_out = game_scores_df.copy()
records = df_out.to_dict("records")

ops = []
for r in records:
    key = {"gameId": r["gameId"]}  # game key as unique identifier
    ops.append(UpdateOne(key, {"$set": r}, upsert=True))

result = None
if ops:
    result = col.bulk_write(ops, ordered=False)
    result.bulk_api_result
    
    print("Load result:")
    print(f"New inserted: {result.upserted_count}")
    print(f"Modified existing: {result.modified_count}")
    print(f"Total processed: {len(ops)}")
else:
    print("No rows to update.")

Load result:
New inserted: 0
Modified existing: 0
Total processed: 6
