# NBA Systematic Data Processing

This notebook demonstrates the new systematic approach to NBA data collection using the endpoint processor.

In [None]:
import sys
import os
import pandas as pd

# Add src directory to path
sys.path.append(os.path.join(os.getcwd(), '..', 'src'))
sys.path.append(os.path.join(os.getcwd(), '..', 'config'))

import allintwo
from nba_endpoint_processor import NBAEndpointProcessor
from nba_endpoints_config import ALL_ENDPOINTS, get_endpoints_by_priority

In [None]:
# Connect to database
conn = allintwo.connect_to_rds('thebigone', 'ajwin', 'CharlesBark!23', 'nba-rds-instance.c9wwc0ukkiu5.us-east-1.rds.amazonaws.com')

# Create processor
processor = NBAEndpointProcessor(conn, rate_limit=0.6)
print("NBA Endpoint Processor initialized!")

In [None]:
# Test with a small sample of high-priority game endpoints
print("Testing high-priority game-based endpoints with 5 games...")
results = processor.process_endpoints_by_category('game_based', priority='high', limit=5)

# Show results
for endpoint, result in results.items():
    status = "✓" if result['success'] else "✗"
    print(f"{status} {endpoint}: {result['duration']:.2f}s")

summary = processor.get_processing_summary()
print(f"\nSummary: {summary}")

In [None]:
# Check what tables were created
cursor = conn.cursor()
cursor.execute("""
    SELECT table_name 
    FROM information_schema.tables 
    WHERE table_schema = 'public' 
    AND table_name LIKE 'boxscore%'
    ORDER BY table_name;
""")

tables = cursor.fetchall()
print(f"Created {len(tables)} boxscore tables:")
for table in tables:
    print(f"  - {table[0]}")

In [None]:
# Test one of the created tables
if tables:
    test_table = tables[0][0]
    df = allintwo.fetch_table_to_dataframe(conn, test_table)
    print(f"Sample data from {test_table}:")
    print(f"Rows: {len(df)}, Columns: {len(df.columns)}")
    display(df.head())

In [None]:
# Show all available endpoint configurations
print("Available endpoint categories:")
for category, endpoints in ALL_ENDPOINTS.items():
    print(f"\n{category.upper()}: {len(endpoints)} endpoints")
    for ep in endpoints:
        print(f"  - {ep['endpoint']} ({ep['priority']} priority, {ep['frequency']} frequency)")