In [8]:
# 📋 SHOW ALL TABLES CURRENTLY IN DATABASE
import psycopg2
import json

print("📋 ALL TABLES IN DATABASE")
print("="*60)

# Connect to database
with open('../endpoints/config/database_config.json', 'r') as f:
    db_config = json.load(f)

conn = psycopg2.connect(
    host=db_config['host'],
    database=db_config['name'],
    user=db_config['user'],
    password=db_config['password'],
    port=db_config['port']
)
cursor = conn.cursor()

# Get all tables
cursor.execute("""
    SELECT table_name, 
           (SELECT COUNT(*) FROM information_schema.columns WHERE table_name = t.table_name) as column_count
    FROM information_schema.tables t
    WHERE table_schema = 'public' 
    ORDER BY table_name
""")
all_tables_info = cursor.fetchall()

# Categorize tables
master_tables = []
endpoint_tables = []
other_tables = []

for table_name, col_count in all_tables_info:
    if table_name in ['nba_games', 'nba_players', 'nba_teams']:
        master_tables.append((table_name, col_count))
    elif table_name.startswith('nba_'):
        endpoint_tables.append((table_name, col_count))
    else:
        other_tables.append((table_name, col_count))

print(f"📊 SUMMARY:")
print(f"   🏀 Master tables: {len(master_tables)}")
print(f"   🎯 Endpoint tables: {len(endpoint_tables)}")
print(f"   📝 Other tables: {len(other_tables)}")
print(f"   📋 Total tables: {len(all_tables_info)}")

# Show master tables with row counts
print(f"\n🏀 MASTER TABLES:")
for table_name, col_count in master_tables:
    try:
        if table_name == 'nba_games':
            cursor.execute(f"SELECT COUNT(DISTINCT gameid), COUNT(*) FROM {table_name}")
            unique_count, total_count = cursor.fetchone()
            print(f"   ✅ {table_name}: {unique_count:,} unique games, {total_count:,} total rows, {col_count} columns")
        elif table_name == 'nba_players':
            cursor.execute(f"SELECT COUNT(DISTINCT playerid), COUNT(*) FROM {table_name}")
            unique_count, total_count = cursor.fetchone()
            print(f"   ✅ {table_name}: {unique_count:,} unique players, {total_count:,} total rows, {col_count} columns")
        elif table_name == 'nba_teams':
            cursor.execute(f"SELECT COUNT(DISTINCT teamid), COUNT(*) FROM {table_name}")
            unique_count, total_count = cursor.fetchone()
            print(f"   ✅ {table_name}: {unique_count:,} unique teams, {total_count:,} total rows, {col_count} columns")
    except Exception as e:
        print(f"   ❌ {table_name}: Error reading table - {str(e)[:50]}...")

# Show endpoint tables
print(f"\n🎯 ENDPOINT TABLES ({len(endpoint_tables)} total):")

# Check naming convention
old_naming_tables = [t for t in endpoint_tables if t[0].split('_')[-1].isdigit()]
new_naming_tables = [t for t in endpoint_tables if not t[0].split('_')[-1].isdigit()]

print(f"   ✅ New naming convention: {len(new_naming_tables)} tables")
print(f"   🔴 Old naming convention: {len(old_naming_tables)} tables")

if old_naming_tables:
    print(f"\n   🔴 OLD NAMING CONVENTION TABLES:")
    for table_name, col_count in old_naming_tables:
        try:
            cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
            row_count = cursor.fetchone()[0]
            print(f"      {table_name}: {row_count:,} rows, {col_count} columns")
        except:
            print(f"      {table_name}: Error reading, {col_count} columns")

# Show ALL new naming tables
print(f"\n   ✅ NEW NAMING CONVENTION TABLES (all {len(new_naming_tables)} tables):")
for i, (table_name, col_count) in enumerate(new_naming_tables, 1):
    try:
        cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
        row_count = cursor.fetchone()[0]
        print(f"   {i:2d}. {table_name}: {row_count:,} rows, {col_count} columns")
    except:
        print(f"   {i:2d}. {table_name}: Error reading, {col_count} columns")

# Show other tables
if other_tables:
    print(f"\n📝 OTHER TABLES:")
    for table_name, col_count in other_tables:
        try:
            cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
            row_count = cursor.fetchone()[0]
            print(f"   📋 {table_name}: {row_count:,} rows, {col_count} columns")
        except:
            print(f"   📋 {table_name}: Error reading, {col_count} columns")

cursor.close()
conn.close()

print(f"\n{'='*60}")
print(f"🎯 DATABASE STATUS: {'✅ Clean (all meaningful names)' if not old_naming_tables else f'⚠️ {len(old_naming_tables)} old naming tables remain'}")

📋 ALL TABLES IN DATABASE
📊 SUMMARY:
   🏀 Master tables: 3
   🎯 Endpoint tables: 52
   📝 Other tables: 7
   📋 Total tables: 62

🏀 MASTER TABLES:
📊 SUMMARY:
   🏀 Master tables: 3
   🎯 Endpoint tables: 52
   📝 Other tables: 7
   📋 Total tables: 62

🏀 MASTER TABLES:
   ✅ nba_games: 52,782 unique games, 52,782 total rows, 16 columns
   ✅ nba_players: 571 unique players, 571 total rows, 19 columns
   ✅ nba_teams: 30 unique teams, 30 total rows, 11 columns

🎯 ENDPOINT TABLES (52 total):
   ✅ New naming convention: 52 tables
   🔴 Old naming convention: 0 tables

   ✅ NEW NAMING CONVENTION TABLES (all 52 tables):
   ✅ nba_games: 52,782 unique games, 52,782 total rows, 16 columns
   ✅ nba_players: 571 unique players, 571 total rows, 19 columns
   ✅ nba_teams: 30 unique teams, 30 total rows, 11 columns

🎯 ENDPOINT TABLES (52 total):
   ✅ New naming convention: 52 tables
   🔴 Old naming convention: 0 tables

   ✅ NEW NAMING CONVENTION TABLES (all 52 tables):
    1. nba_boxscoreadvancedv3_playersta

In [9]:
# 📊 COMPREHENSIVE ENDPOINT COVERAGE ANALYSIS
import pandas as pd
import psycopg2
import json
import sys
from datetime import datetime

# Add config path and load endpoint configuration
sys.path.append('../endpoints/config')
import nba_endpoints_config as config

print("🔍 NBA ENDPOINT COVERAGE ANALYSIS")
print("="*80)
print(f"⏰ Analysis run at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("🎯 Analyzing coverage of unique parameters across all endpoint tables")
print()

# Connect to database
with open('../endpoints/config/database_config.json', 'r') as f:
    db_config = json.load(f)

conn = psycopg2.connect(
    host=db_config['host'],
    database=db_config['name'],
    user=db_config['user'],
    password=db_config['password'],
    port=db_config['port']
)
cursor = conn.cursor()

# Get all existing tables
cursor.execute("""
    SELECT table_name 
    FROM information_schema.tables 
    WHERE table_schema = 'public' 
    ORDER BY table_name
""")
all_tables = [row[0] for row in cursor.fetchall()]

# Get master table totals
master_totals = {}
master_tables = {
    'nba_games': {'param_column': 'gameid', 'total': 0},
    'nba_players': {'param_column': 'playerid', 'total': 0},
    'nba_teams': {'param_column': 'teamid', 'total': 0}
}

print("📋 MASTER TABLE TOTALS:")
for master_table, info in master_tables.items():
    if master_table in all_tables:
        try:
            cursor.execute(f"SELECT COUNT(DISTINCT {info['param_column']}) FROM {master_table}")
            info['total'] = cursor.fetchone()[0]
            master_totals[info['param_column']] = info['total']
            print(f"   {master_table}: {info['total']:,} unique {info['param_column']}s")
        except Exception as e:
            print(f"   ❌ {master_table}: Error reading - {e}")
            master_totals[info['param_column']] = 0

# Analyze coverage by endpoint category
print(f"\n🎯 ENDPOINT COVERAGE ANALYSIS BY CATEGORY:")
print("-" * 80)

total_endpoint_coverage = {
    'gameid': {'covered': set(), 'total_tables': 0},
    'playerid': {'covered': set(), 'total_tables': 0}, 
    'teamid': {'covered': set(), 'total_tables': 0}
}

for category_name, endpoints in config.ALL_ENDPOINTS.items():
    print(f"\n🏷️  {category_name.upper()} ENDPOINTS:")
    
    category_coverage = {
        'gameid': {'covered': set(), 'total_tables': 0},
        'playerid': {'covered': set(), 'total_tables': 0},
        'teamid': {'covered': set(), 'total_tables': 0}
    }
    
    for endpoint_config in endpoints:
        endpoint_name = endpoint_config['endpoint']
        
        # Find all tables for this endpoint (new naming convention)
        endpoint_tables = [t for t in all_tables if t.startswith(f'nba_{endpoint_name.lower()}_')]
        
        if not endpoint_tables:
            print(f"   ❌ {endpoint_name}: No tables found")
            continue
            
        print(f"   📊 {endpoint_name}: {len(endpoint_tables)} table(s)")
        
        # Determine which parameter this endpoint uses
        params = endpoint_config.get('parameters', {})
        param_type = None
        if 'game_id' in params or 'from_mastergames' in str(params):
            param_type = 'gameid'
        elif 'player_id' in params or 'from_masterplayers' in str(params):
            param_type = 'playerid'
        elif 'team_id' in params or 'from_masterteams' in str(params):
            param_type = 'teamid'
        
        if not param_type:
            print(f"      📈 League-level endpoint (no ID tracking)")
            continue
            
        # Check coverage for each table of this endpoint
        endpoint_unique_ids = set()
        for table in endpoint_tables:
            try:
                # Try different possible column names for the parameter
                possible_columns = []
                if param_type == 'gameid':
                    possible_columns = ['gameid', 'game_id']
                elif param_type == 'playerid':
                    possible_columns = ['playerid', 'player_id', 'personid', 'person_id']
                elif param_type == 'teamid':
                    possible_columns = ['teamid', 'team_id']
                
                table_ids = set()
                for col in possible_columns:
                    try:
                        cursor.execute(f"SELECT DISTINCT {col} FROM {table} WHERE {col} IS NOT NULL")
                        table_ids.update([row[0] for row in cursor.fetchall()])
                        break  # Success, no need to try other column names
                    except:
                        continue  # Try next column name
                
                if table_ids:
                    endpoint_unique_ids.update(table_ids)
                    category_coverage[param_type]['total_tables'] += 1
                    total_endpoint_coverage[param_type]['total_tables'] += 1
                    print(f"      ✅ {table}: {len(table_ids):,} unique {param_type}s")
                else:
                    print(f"      ⚠️  {table}: No {param_type} column found")
                    
            except Exception as e:
                print(f"      ❌ {table}: Error reading - {str(e)[:30]}...")
        
        # Add to category and total coverage
        if endpoint_unique_ids:
            category_coverage[param_type]['covered'].update(endpoint_unique_ids)
            total_endpoint_coverage[param_type]['covered'].update(endpoint_unique_ids)
            
            # Calculate coverage percentage for this endpoint
            if master_totals[param_type] > 0:
                coverage_pct = len(endpoint_unique_ids) / master_totals[param_type] * 100
                print(f"      📈 {endpoint_name} coverage: {len(endpoint_unique_ids):,}/{master_totals[param_type]:,} {param_type}s ({coverage_pct:.1f}%)")
    
    # Category summary
    print(f"\n   📊 {category_name.upper()} CATEGORY SUMMARY:")
    for param_type, coverage in category_coverage.items():
        if coverage['total_tables'] > 0:
            coverage_pct = len(coverage['covered']) / master_totals[param_type] * 100 if master_totals[param_type] > 0 else 0
            print(f"      {param_type}: {len(coverage['covered']):,}/{master_totals[param_type]:,} covered ({coverage_pct:.1f}%) across {coverage['total_tables']} tables")

# Overall coverage summary
print(f"\n{'='*80}")
print("🎯 OVERALL COVERAGE SUMMARY")
print(f"{'='*80}")

for param_type, coverage in total_endpoint_coverage.items():
    if coverage['total_tables'] > 0:
        master_total = master_totals[param_type]
        covered_count = len(coverage['covered'])
        coverage_pct = covered_count / master_total * 100 if master_total > 0 else 0
        remaining = master_total - covered_count
        
        status_icon = "✅" if coverage_pct > 90 else "🟡" if coverage_pct > 50 else "🔴"
        print(f"{status_icon} {param_type.upper()}: {covered_count:,}/{master_total:,} covered ({coverage_pct:.1f}%)")
        print(f"   📊 Tables with {param_type}: {coverage['total_tables']}")
        print(f"   📈 Remaining to process: {remaining:,}")
        print()

# Find tables not following naming convention
print(f"🔍 TABLE NAMING ANALYSIS:")
endpoint_names = config.list_all_endpoint_names()
expected_prefixes = [f'nba_{name.lower()}_' for name in endpoint_names]

nba_tables = [t for t in all_tables if t.startswith('nba_') and t not in ['nba_games', 'nba_players', 'nba_teams']]
recognized_tables = []
unrecognized_tables = []

for table in nba_tables:
    recognized = False
    for prefix in expected_prefixes:
        if table.startswith(prefix):
            recognized_tables.append(table)
            recognized = True
            break
    if not recognized:
        unrecognized_tables.append(table)

print(f"   ✅ Tables following naming convention: {len(recognized_tables)}")
print(f"   ❓ Tables not matching endpoints: {len(unrecognized_tables)}")

if unrecognized_tables:
    print(f"\n   ❓ UNRECOGNIZED TABLES:")
    for table in unrecognized_tables[:10]:
        print(f"      {table}")
    if len(unrecognized_tables) > 10:
        print(f"      ... and {len(unrecognized_tables) - 10} more")

cursor.close()
conn.close()

print(f"\n🔄 Run this analysis again to track coverage progress")
print(f"📊 This analysis shows how much of your master data has been processed by each endpoint category")

🔍 NBA ENDPOINT COVERAGE ANALYSIS
⏰ Analysis run at: 2025-08-29 09:53:59
🎯 Analyzing coverage of unique parameters across all endpoint tables

📋 MASTER TABLE TOTALS:
📋 MASTER TABLE TOTALS:
   nba_games: 52,782 unique gameids
   nba_players: 571 unique playerids
   nba_teams: 30 unique teamids

🎯 ENDPOINT COVERAGE ANALYSIS BY CATEGORY:
--------------------------------------------------------------------------------

🏷️  GAME_BASED ENDPOINTS:
   📊 BoxScoreAdvancedV3: 2 table(s)
   nba_games: 52,782 unique gameids
   nba_players: 571 unique playerids
   nba_teams: 30 unique teamids

🎯 ENDPOINT COVERAGE ANALYSIS BY CATEGORY:
--------------------------------------------------------------------------------

🏷️  GAME_BASED ENDPOINTS:
   📊 BoxScoreAdvancedV3: 2 table(s)
      ✅ nba_boxscoreadvancedv3_playerstats: 7,392 unique gameids
      ✅ nba_boxscoreadvancedv3_playerstats: 7,392 unique gameids
      ✅ nba_boxscoreadvancedv3_teamstats: 7,392 unique gameids
      📈 BoxScoreAdvancedV3 coverage