In [24]:
# 📊 COMPREHENSIVE API CALL SUCCESS DIAGNOSTIC - ALL ENDPOINTS
import pandas as pd
import psycopg2
import json
import sys
from datetime import datetime

# Add config path and load endpoint configuration
sys.path.append('../endpoints/config')
import nba_endpoints_config as config

print("🔍 NBA API SUCCESS DIAGNOSTIC - ALL ENDPOINTS")
print("="*80)
print(f"⏰ Diagnostic run at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print()

# Connect to database
with open('../endpoints/config/database_config.json', 'r') as f:
    db_config = json.load(f)

conn = psycopg2.connect(
    host=db_config['host'],
    database=db_config['name'],  # Note: config uses 'name' instead of 'database'
    user=db_config['user'],
    password=db_config['password'],
    port=db_config['port']
)
cursor = conn.cursor()

# Get all existing tables
cursor.execute("""
    SELECT table_name 
    FROM information_schema.tables 
    WHERE table_schema = 'public' 
    ORDER BY table_name
""")
all_tables = [row[0] for row in cursor.fetchall()]

# Master table counts - using correct column names from database
master_tables = {
    'nba_games': {'id_column': 'gameid', 'count': 0},
    'nba_players': {'id_column': 'playerid', 'count': 0},  # Correct: playerid
    'nba_teams': {'id_column': 'teamid', 'count': 0}       # Correct: teamid
}

# Get master table counts
for master_table, info in master_tables.items():
    if master_table in all_tables:
        cursor.execute(f"SELECT COUNT(DISTINCT {info['id_column']}) FROM {master_table}")
        info['count'] = cursor.fetchone()[0]

print("📋 MASTER TABLE COUNTS:")
for table, info in master_tables.items():
    print(f"   {table}: {info['count']:,} unique {info['id_column']}s")

# Get failed API calls by endpoint
try:
    cursor.execute("""
        SELECT endpoint_prefix, 
               COUNT(*) as total_failed,
               COUNT(CASE WHEN gameid IS NOT NULL THEN 1 END) as failed_games,
               COUNT(CASE WHEN playerid IS NOT NULL THEN 1 END) as failed_players,
               COUNT(CASE WHEN teamid IS NOT NULL THEN 1 END) as failed_teams
        FROM failed_api_calls 
        GROUP BY endpoint_prefix
        ORDER BY endpoint_prefix
    """)
    failed_calls = {}
    for row in cursor.fetchall():
        failed_calls[row[0]] = {
            'total_failed': row[1],
            'failed_games': row[2],
            'failed_players': row[3], 
            'failed_teams': row[4]
        }
except Exception as e:
    print(f"⚠️  Could not read failed_api_calls table: {e}")
    failed_calls = {}

print(f"\n🚫 FAILED API CALLS BY ENDPOINT:")
if failed_calls:
    for endpoint, counts in failed_calls.items():
        total_failed = counts.get('total_failed', 0)
        print(f"   {endpoint}: {total_failed:,} failed calls")
else:
    print("   No failed API calls recorded")

# Analyze all endpoints
print(f"\n📊 ENDPOINT SUCCESS ANALYSIS:")
print("-" * 80)

endpoint_stats = []
total_successful_calls = 0
total_possible_calls = 0

for category_name, endpoints in config.ALL_ENDPOINTS.items():
    print(f"\n🎯 {category_name.upper()} ENDPOINTS:")
    
    for endpoint_config in endpoints:
        endpoint_name = endpoint_config['endpoint']
        
        # Determine master table and ID column based on endpoint parameters
        params = endpoint_config.get('parameters', {})
        if 'game_id' in params or 'from_mastergames' in str(params):
            master_table = 'nba_games'
            endpoint_id_column = 'gameid'  # Column name in endpoint tables
            total_ids = master_tables['nba_games']['count']
        elif 'player_id' in params or 'from_masterplayers' in str(params):
            master_table = 'nba_players' 
            endpoint_id_column = 'playerid'  # Column name in endpoint tables
            total_ids = master_tables['nba_players']['count']
        elif 'team_id' in params or 'from_masterteams' in str(params):
            master_table = 'nba_teams'
            endpoint_id_column = 'teamid'  # Column name in endpoint tables
            total_ids = master_tables['nba_teams']['count']
        else:
            # League-level endpoints don't have individual ID tracking
            print(f"   📈 {endpoint_name}: League-level endpoint (no ID tracking)")
            continue
        
        # Find first table for this endpoint
        endpoint_tables = [t for t in all_tables if t.startswith(f'nba_{endpoint_name.lower()}')]
        
        if not endpoint_tables:
            successful_ids = 0
            print(f"   ❌ {endpoint_name}: No tables found (0 successful calls)")
        else:
            # Use first table to count unique IDs processed
            first_table = sorted(endpoint_tables)[0]
            try:
                cursor.execute(f"SELECT COUNT(DISTINCT {endpoint_id_column}) FROM {first_table}")
                successful_ids = cursor.fetchone()[0]
            except Exception as e:
                # Try alternative column names if the primary fails
                try:
                    if 'personid' in str(e).lower():
                        cursor.execute(f"SELECT COUNT(DISTINCT playerid) FROM {first_table}")
                        successful_ids = cursor.fetchone()[0]
                    else:
                        cursor.execute(f"SELECT COUNT(*) FROM {first_table}")
                        successful_ids = cursor.fetchone()[0]
                        print(f"   ⚠️  {endpoint_name}: Using row count instead of unique ID count")
                except:
                    successful_ids = 0
                    print(f"   ⚠️  {endpoint_name}: Error reading {first_table} - {str(e)[:50]}...")
                    continue
        
        # Get failed calls for this endpoint
        endpoint_prefix = f"nba_{endpoint_name.lower()}"
        failed_ids = 0
        if endpoint_prefix in failed_calls:
            if master_table == 'nba_games':
                failed_ids = failed_calls[endpoint_prefix]['failed_games']
            elif master_table == 'nba_players':
                failed_ids = failed_calls[endpoint_prefix]['failed_players'] 
            elif master_table == 'nba_teams':
                failed_ids = failed_calls[endpoint_prefix]['failed_teams']
        
        # Calculate success metrics
        processed_ids = successful_ids + failed_ids
        remaining_ids = max(0, total_ids - processed_ids)
        success_rate = (successful_ids / total_ids * 100) if total_ids > 0 else 0
        
        # Display results
        status_icon = "✅" if success_rate > 90 else "🟡" if success_rate > 50 else "🔴" if success_rate > 0 else "❌"
        print(f"   {status_icon} {endpoint_name}: {successful_ids:,}/{total_ids:,} successful ({success_rate:.1f}%)")
        
        # Track totals
        endpoint_stats.append({
            'endpoint': endpoint_name,
            'category': category_name,
            'master_table': master_table,
            'total_ids': total_ids,
            'successful_ids': successful_ids,
            'failed_ids': failed_ids,
            'remaining_ids': remaining_ids,
            'success_rate': success_rate
        })
        
        total_successful_calls += successful_ids
        total_possible_calls += total_ids

# Summary statistics
print(f"\n{'='*80}")
print("📊 OVERALL SUCCESS SUMMARY")
print(f"{'='*80}")

if endpoint_stats:
    avg_success_rate = sum(stat['success_rate'] for stat in endpoint_stats) / len(endpoint_stats)
    completed_endpoints = sum(1 for stat in endpoint_stats if stat['success_rate'] > 95)
    partial_endpoints = sum(1 for stat in endpoint_stats if 0 < stat['success_rate'] <= 95)
    empty_endpoints = sum(1 for stat in endpoint_stats if stat['success_rate'] == 0)
    
    print(f"🎯 Total endpoints analyzed: {len(endpoint_stats)}")
    print(f"✅ Completed endpoints (>95%): {completed_endpoints}")
    print(f"🟡 Partial endpoints (1-95%): {partial_endpoints}")
    print(f"❌ Empty endpoints (0%): {empty_endpoints}")
    print(f"📈 Average success rate: {avg_success_rate:.1f}%")
    print(f"🔢 Total successful API calls: {total_successful_calls:,}")
    
    # Show top and bottom performers
    sorted_stats = sorted(endpoint_stats, key=lambda x: x['success_rate'], reverse=True)
    
    print(f"\n🏆 TOP 5 PERFORMING ENDPOINTS:")
    for stat in sorted_stats[:5]:
        print(f"   ✅ {stat['endpoint']}: {stat['success_rate']:.1f}% ({stat['successful_ids']:,}/{stat['total_ids']:,})")
    
    if len(sorted_stats) > 5:
        print(f"\n⚠️  BOTTOM 5 PERFORMING ENDPOINTS:")
        for stat in sorted_stats[-5:]:
            print(f"   {('❌' if stat['success_rate'] == 0 else '🔴')} {stat['endpoint']}: {stat['success_rate']:.1f}% ({stat['successful_ids']:,}/{stat['total_ids']:,})")

else:
    print("❌ No endpoint statistics available")

print(f"\n🔄 Run this cell again to refresh the diagnostic")
print(f"⚡ This diagnostic can run while the endpoint processor is actively collecting data")

cursor.close()
conn.close()

🔍 NBA API SUCCESS DIAGNOSTIC - ALL ENDPOINTS
⏰ Diagnostic run at: 2025-08-28 11:11:40

📋 MASTER TABLE COUNTS:
   nba_games: 52,782 unique gameids
   nba_players: 571 unique playerids
   nba_teams: 30 unique teamids
⚠️  Could not read failed_api_calls table: relation "failed_api_calls" does not exist
LINE 7:         FROM failed_api_calls 
                     ^


🚫 FAILED API CALLS BY ENDPOINT:
   No failed API calls recorded

📊 ENDPOINT SUCCESS ANALYSIS:
--------------------------------------------------------------------------------

🎯 GAME_BASED ENDPOINTS:
   ❌ BoxScoreAdvancedV3: No tables found (0 successful calls)
   ❌ BoxScoreAdvancedV3: 0/52,782 successful (0.0%)
   ❌ BoxScoreAdvancedV2: No tables found (0 successful calls)
   ❌ BoxScoreAdvancedV2: 0/52,782 successful (0.0%)
   ❌ BoxScoreDefensiveV2: No tables found (0 successful calls)
   ❌ BoxScoreDefensiveV2: 0/52,782 successful (0.0%)
   ❌ BoxScoreFourFactorsV3: No tables found (0 successful calls)
   ❌ BoxScoreFourFactorsV3

In [23]:
# 🔍 QUICK TABLE STRUCTURE CHECK
import pandas as pd
import psycopg2
import json

# Connect to database
with open('../endpoints/config/database_config.json', 'r') as f:
    db_config = json.load(f)

conn = psycopg2.connect(
    host=db_config['host'],
    database=db_config['name'],
    user=db_config['user'],
    password=db_config['password'],
    port=db_config['port']
)
cursor = conn.cursor()

# Check master table structures
master_tables = ['nba_games', 'nba_players', 'nba_teams']

print("🔍 MASTER TABLE COLUMN STRUCTURES:")
print("="*50)

table_structures = {}
for table in master_tables:
    try:
        cursor.execute(f"""
            SELECT column_name, data_type 
            FROM information_schema.columns 
            WHERE table_name = '{table}' 
            ORDER BY ordinal_position
        """)
        columns = cursor.fetchall()
        table_structures[table] = columns
        print(f"\n📋 {table.upper()}:")
        for col_name, col_type in columns[:8]:  # Show first 8 columns
            print(f"   {col_name} ({col_type})")
        if len(columns) > 8:
            print(f"   ... and {len(columns) - 8} more columns")
            
    except Exception as e:
        print(f"❌ Error checking {table}: {e}")

# Check if any endpoint tables exist to see their structure
cursor.execute("""
    SELECT table_name 
    FROM information_schema.tables 
    WHERE table_schema = 'public' 
    AND table_name LIKE 'nba_%'
    AND table_name NOT IN ('nba_games', 'nba_players', 'nba_teams')
    LIMIT 3
""")
sample_tables = [row[0] for row in cursor.fetchall()]

if sample_tables:
    print(f"\n🔍 SAMPLE ENDPOINT TABLE STRUCTURES:")
    print("="*50)
    
    for table in sample_tables:
        try:
            cursor.execute(f"""
                SELECT column_name, data_type 
                FROM information_schema.columns 
                WHERE table_name = '{table}' 
                ORDER BY ordinal_position
            """)
            columns = cursor.fetchall()
            print(f"\n📋 {table.upper()}:")
            for col_name, col_type in columns[:6]:  # Show first 6 columns
                print(f"   {col_name} ({col_type})")
            if len(columns) > 6:
                print(f"   ... and {len(columns) - 6} more columns")
                
        except Exception as e:
            print(f"❌ Error checking {table}: {e}")

cursor.close()
conn.close()

🔍 MASTER TABLE COLUMN STRUCTURES:

📋 NBA_GAMES:
   gameid (character varying)
   seasonid (character varying)
   gamedate (date)
   hometeamid (character varying)
   hometeamabbreviation (character varying)
   hometeamname (character varying)
   awayteamid (character varying)
   awayteamabbreviation (character varying)
   ... and 8 more columns

📋 NBA_PLAYERS:
   playerid (character varying)
   playername (character varying)
   teamid (character varying)
   teamabbreviation (character varying)
   season (character varying)
   position (character varying)
   height (character varying)
   weight (character varying)
   ... and 11 more columns

📋 NBA_TEAMS:
   teamid (character varying)
   teamname (character varying)
   teamabbreviation (character varying)
   teamcity (character varying)
   teamstate (character varying)
   yearfounded (integer)
   conference (character varying)
   division (character varying)
   ... and 3 more columns

🔍 SAMPLE ENDPOINT TABLE STRUCTURES:

📋 NBA_ENDPOINT_F