In [None]:
import nba_api.stats.endpoints as nbaapi
import rdshelp
import pandas as pd

# ✅ RDS CONNECTION NOW WORKING! (Fixed security group)
# print("🔍 Testing RDS connection after security group fix...")

conn = rdshelp.connect_to_rds('thebigone', 'ajwin', 'CharlesBark!23', 'nba-rds-instance.c9wwc0ukkiu5.us-east-1.rds.amazonaws.com')


In [44]:
# 🔍 COMPREHENSIVE MISSING GAME ANALYSIS
# Compare master NBA games with BoxScore Traditional table + failed API tracking

import json
import psycopg2

print("🔍 NBA GAMES vs BOXSCORE TRADITIONAL ANALYSIS")
print("=" * 60)

# Load database config
with open('endpoints/config/database_config.json', 'r') as f:
    config = json.load(f)

# Connect to database
conn = psycopg2.connect(
    host=config['host'],
    database=config['name'],
    user=config['user'],
    password=config['password'],
    port=int(config['port'])
)

cursor = conn.cursor()

# 1. Get all NBA game IDs from master table
cursor.execute("SELECT DISTINCT gameid FROM nba_games ORDER BY gameid;")
nba_master_gameids = {row[0] for row in cursor.fetchall()}
print(f"📊 Master NBA games table: {len(nba_master_gameids):,} unique game IDs")

# 2. Get all game IDs from BoxScore Traditional table
cursor.execute("SELECT DISTINCT gameid FROM nba_boxscoretraditionalv3_1 ORDER BY gameid;")
boxscore_gameids = {row[0] for row in cursor.fetchall()}
print(f"📊 BoxScore Traditional table: {len(boxscore_gameids):,} unique game IDs")

# 3. Get failed game IDs for BoxScore Traditional endpoint
cursor.execute("""
    SELECT DISTINCT id_value 
    FROM failed_api_calls 
    WHERE id_column = 'gameid' 
    AND endpoint_prefix LIKE '%boxscoretraditional%'
    ORDER BY id_value;
""")
failed_boxscore_gameids = {row[0] for row in cursor.fetchall()}
print(f"📊 Failed BoxScore Traditional calls: {len(failed_boxscore_gameids):,} unique game IDs")

print(f"\n🎯 ANALYSIS RESULTS:")
print("-" * 40)

# Calculate missing game IDs
missing_gameids = nba_master_gameids - boxscore_gameids
print(f"❌ Missing from BoxScore: {len(missing_gameids):,} game IDs")

# Calculate coverage
coverage_percentage = (len(boxscore_gameids) / len(nba_master_gameids)) * 100
print(f"✅ BoxScore coverage: {coverage_percentage:.1f}%")

# Check overlap between missing and failed
missing_and_failed = missing_gameids & failed_boxscore_gameids
missing_not_failed = missing_gameids - failed_boxscore_gameids
failed_but_have_data = failed_boxscore_gameids & boxscore_gameids

print(f"\n🔍 DETAILED BREAKDOWN:")
print(f"   Missing + Failed: {len(missing_and_failed):,} (failed attempts)")
print(f"   Missing + Not Failed: {len(missing_not_failed):,} (never attempted)")  
print(f"   Failed but have data: {len(failed_but_have_data):,} (retry succeeded)")

# Show sample missing game IDs
if missing_gameids:
    sample_missing = sorted(list(missing_gameids))[:10]
    print(f"\n🔍 Sample missing game IDs:")
    for gameid in sample_missing:
        status = "FAILED" if gameid in failed_boxscore_gameids else "NOT ATTEMPTED"
        print(f"   {gameid} - {status}")

# Check if missing games are from specific time periods
if missing_not_failed:
    sample_not_attempted = sorted(list(missing_not_failed))[:5]
    print(f"\n🎯 Sample games that were never attempted:")
    for gameid in sample_not_attempted:
        print(f"   {gameid}")

cursor.close()
conn.close()

print(f"\n📈 SUMMARY:")
print(f"   Total NBA games: {len(nba_master_gameids):,}")
print(f"   BoxScore collected: {len(boxscore_gameids):,}")
print(f"   Still missing: {len(missing_gameids):,}")
print(f"   Coverage: {coverage_percentage:.1f}%")

🔍 NBA GAMES vs BOXSCORE TRADITIONAL ANALYSIS
📊 Master NBA games table: 52,782 unique game IDs
📊 Master NBA games table: 52,782 unique game IDs
📊 BoxScore Traditional table: 25,812 unique game IDs
📊 Failed BoxScore Traditional calls: 150 unique game IDs

🎯 ANALYSIS RESULTS:
----------------------------------------
❌ Missing from BoxScore: 26,970 game IDs
✅ BoxScore coverage: 48.9%

🔍 DETAILED BREAKDOWN:
   Missing + Failed: 150 (failed attempts)
   Missing + Not Failed: 26,820 (never attempted)
   Failed but have data: 0 (retry succeeded)

🔍 Sample missing game IDs:
   0010500004 - FAILED
   0010500006 - FAILED
   0010500012 - FAILED
   0010500014 - FAILED
   0010500015 - FAILED
   0010500022 - FAILED
   0010500028 - FAILED
   0010500031 - FAILED
   0010500033 - FAILED
   0010500044 - FAILED

🎯 Sample games that were never attempted:
   0022000334
   0022000335
   0022000336
   0022000337
   0022000338

📈 SUMMARY:
   Total NBA games: 52,782
   BoxScore collected: 25,812
   Still missing

In [46]:
conn = psycopg2.connect(
    host=config['host'],
    database=config['name'],
    user=config['user'],
    password=config['password'],
    port=int(config['port'])
)
df_failed_api = pd.read_sql("SELECT * FROM failed_api_calls", conn)

  df_failed_api = pd.read_sql("SELECT * FROM failed_api_calls", conn)


In [48]:
df_failed_api.endpoint_prefix.value_counts()

endpoint_prefix
nba_playbyplayv3                       746
nba_boxscoretraditionalv3              150
nba_boxscoremiscv3                     148
nba_boxscorescoringv3                  146
nba_boxscoreusagev3                    129
nba_boxscorefourfactorsv3               57
nba_boxscoreadvancedv3                  57
nba_boxscoreplayertrackv3               56
nba_playerdashboardbyshootingsplits     27
nba_playergamelog                       15
Name: count, dtype: int64