In [43]:
import nba_api.stats.endpoints as nbaapi
import pandas as pd
#         ("PlayerDashboardByClutch", {"player_id": 2544, "last_n_games": 30}, True),
# test
df = nbaapi.playerdashboardbyshootingsplits.PlayerDashboardByShootingSplits(player_id=2544,season='2023-24',league_id_nullable='10').get_data_frames()[1]

In [44]:
df

Unnamed: 0,GROUP_SET,GROUP_VALUE,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,EFG_PCT,BLKA,...,FG3A_RANK,FG3_PCT_RANK,EFG_PCT_RANK,BLKA_RANK,PCT_AST_2PM_RANK,PCT_UAST_2PM_RANK,PCT_AST_3PM_RANK,PCT_UAST_3PM_RANK,PCT_AST_FGM_RANK,PCT_UAST_FGM_RANK


In [23]:
def match_dataframes_to_names_v2(dataframes, endpoint_instance):
    """
    Advanced DataFrame matching using multiple strategies:
    1. Column count matching
    2. Unique row patterns
    3. GROUP_VALUE content analysis (for dashboard endpoints)
    4. Fallback to heuristics
    """
    
    if not hasattr(endpoint_instance, 'expected_data'):
        return [f"dataframe_{i}" for i in range(len(dataframes))]
    
    expected_data = endpoint_instance.expected_data
    matched_names = [None] * len(dataframes)
    used_names = set()
    
    print("=== ADVANCED DATAFRAME MATCHING ===")
    
    # Strategy 1: Exact column count matching (when counts are unique)
    print("\n1️⃣ Trying column count matching...")
    expected_counts = {name: len(cols) for name, cols in expected_data.items()}
    actual_counts = [(i, len(df.columns)) for i, df in enumerate(dataframes) if df is not None]
    
    # Check if any column counts are unique
    count_frequency = {}
    for name, count in expected_counts.items():
        count_frequency[count] = count_frequency.get(count, []) + [name]
    
    for i, actual_count in actual_counts:
        if actual_count in count_frequency and len(count_frequency[actual_count]) == 1:
            # Unique count - we can match confidently
            name = count_frequency[actual_count][0]
            if name not in used_names:
                matched_names[i] = name
                used_names.add(name)
                print(f"   DataFrame {i} → {name} (unique column count: {actual_count})")
    
    # Strategy 2: Content-based matching (for dashboard endpoints)
    print("\n2️⃣ Trying content-based matching...")
    for i, df in enumerate(dataframes):
        if matched_names[i] is not None or df is None or df.empty:
            continue
            
        # For dashboard endpoints, check GROUP_VALUE patterns
        if 'GROUP_VALUE' in df.columns:
            group_values = set(df['GROUP_VALUE'].astype(str))
            
            # Match based on expected content patterns
            content_hints = {
                'OverallPlayerDashboard': {'Overall'},
                'ByHalfPlayerDashboard': {'1st Half', '2nd Half'},
                'ByPeriodPlayerDashboard': {'1st Quarter', '2nd Quarter', '3rd Quarter', '4th Quarter'},
                'ByScoreMarginPlayerDashboard': {'Behind', 'Ahead', 'Tied'},
                'ByActualMarginPlayerDashboard': {'Lost', 'Won'}  # These might vary
            }
            
            for name, expected_values in content_hints.items():
                if name in used_names:
                    continue
                    
                # Check if any expected values are in the actual GROUP_VALUE
                if any(expected in ' '.join(group_values) for expected in expected_values):
                    matched_names[i] = name
                    used_names.add(name)
                    print(f"   DataFrame {i} → {name} (content match: {group_values})")
                    break
    
    # Strategy 3: Row count heuristics
    print("\n3️⃣ Trying row count heuristics...")
    for i, df in enumerate(dataframes):
        if matched_names[i] is not None or df is None or df.empty:
            continue
            
        row_count = len(df)
        
        # Common patterns for dashboard endpoints
        if row_count == 1 and 'OverallPlayerDashboard' not in used_names:
            matched_names[i] = 'OverallPlayerDashboard'
            used_names.add('OverallPlayerDashboard')
            print(f"   DataFrame {i} → OverallPlayerDashboard (1 row = overall stats)")
        elif row_count == 2 and 'ByHalfPlayerDashboard' not in used_names:
            matched_names[i] = 'ByHalfPlayerDashboard'
            used_names.add('ByHalfPlayerDashboard')
            print(f"   DataFrame {i} → ByHalfPlayerDashboard (2 rows = halves)")
        elif row_count == 4 and 'ByPeriodPlayerDashboard' not in used_names:
            matched_names[i] = 'ByPeriodPlayerDashboard'
            used_names.add('ByPeriodPlayerDashboard')
            print(f"   DataFrame {i} → ByPeriodPlayerDashboard (4 rows = quarters)")
    
    # Strategy 4: Fill remaining with available names
    print("\n4️⃣ Assigning remaining names...")
    available_names = [name for name in expected_data.keys() if name not in used_names]
    
    for i, df in enumerate(dataframes):
        if matched_names[i] is None:
            if available_names:
                name = available_names.pop(0)
                matched_names[i] = name
                print(f"   DataFrame {i} → {name} (remaining assignment)")
            else:
                matched_names[i] = f"dataframe_{i}"
                print(f"   DataFrame {i} → dataframe_{i} (fallback)")
    
    # Convert to lowercase for table naming
    final_names = [name.lower() if name else f"dataframe_{i}" for i, name in enumerate(matched_names)]
    
    print(f"\n✅ Final matching: {final_names}")
    return final_names

# Test the advanced matching
print("Testing advanced matching...")
matched_names = match_dataframes_to_names_v2(dataframes, endpoint)

print(f"\n📊 RESULTING TABLE NAMES:")
for i, (df, name) in enumerate(zip(dataframes, matched_names)):
    if df is not None and not df.empty:
        table_name = f"nba_playerdashboardbygamesplits_{name}"
        print(f"   {table_name} ({df.shape[0]} rows, {df.shape[1]} cols)")

Testing advanced matching...
=== ADVANCED DATAFRAME MATCHING ===

1️⃣ Trying column count matching...

2️⃣ Trying content-based matching...
   DataFrame 3 → ByScoreMarginPlayerDashboard (content match: {'1 - 5 Points', 'more than 20 Points', '6 - 10 Points', 'Tied', '16 - 20 Points', '11 - 15 Points'})

3️⃣ Trying row count heuristics...
   DataFrame 0 → OverallPlayerDashboard (1 row = overall stats)

4️⃣ Assigning remaining names...
   DataFrame 1 → ByActualMarginPlayerDashboard (remaining assignment)
   DataFrame 2 → ByHalfPlayerDashboard (remaining assignment)
   DataFrame 4 → ByPeriodPlayerDashboard (remaining assignment)

✅ Final matching: ['overallplayerdashboard', 'byactualmarginplayerdashboard', 'byhalfplayerdashboard', 'byscoremarginplayerdashboard', 'byperiodplayerdashboard']

📊 RESULTING TABLE NAMES:
   nba_playerdashboardbygamesplits_overallplayerdashboard (1 rows, 63 cols)
   nba_playerdashboardbygamesplits_byactualmarginplayerdashboard (3 rows, 63 cols)
   nba_playerdashb

In [24]:
# Test our production solution
print("🚀 TESTING PRODUCTION SOLUTION")
print("=" * 50)

# Import our new matcher (simulating what the processor will do)
import sys
import os
sys.path.append(r'c:\Users\ajwin\Projects\Personal\NBA\thebigone\endpoints\collectors')

def simulate_old_vs_new_naming():
    """Simulate the difference between old and new naming approaches"""
    
    endpoint = nbaapi.playerdashboardbygamesplits.PlayerDashboardByGameSplits(player_id=2544)
    dataframes = endpoint.get_data_frames()
    
    print("📊 OLD APPROACH (PROBLEMATIC):")
    print("Using expected_data.keys() order...")
    old_names = [name.lower() for name in endpoint.expected_data.keys()]
    print(f"Names from dict keys: {old_names}")
    
    print(f"\nOld table names would be:")
    for i, (df, name) in enumerate(zip(dataframes, old_names)):
        if df is not None and not df.empty:
            table_name = f"nba_playerdashboardbygamesplits_{name}"
            print(f"  DataFrame {i} ({df.shape[0]} rows) → {table_name}")
    
    print(f"\n✅ NEW APPROACH (FIXED):")
    print("Using content-based matching...")
    
    # Use our advanced matching
    new_names = match_dataframes_to_names_v2(dataframes, endpoint)
    
    print(f"\nNew table names:")
    for i, (df, name) in enumerate(zip(dataframes, new_names)):
        if df is not None and not df.empty:
            table_name = f"nba_playerdashboardbygamesplits_{name}"
            print(f"  DataFrame {i} ({df.shape[0]} rows) → {table_name}")
    
    # Show the key differences
    print(f"\n🔄 COMPARISON:")
    print("Index | Old Name (Wrong Order) | New Name (Correct Match)")
    print("-" * 65)
    for i, (old, new) in enumerate(zip(old_names, new_names)):
        match_symbol = "✅" if old != new else "➡️"
        print(f"  {i}   | {old:<22} | {new:<22} {match_symbol}")
    
    return old_names, new_names

# Run the simulation
old_names, new_names = simulate_old_vs_new_naming()

print(f"\n📈 RESULTS:")
changes = sum(1 for old, new in zip(old_names, new_names) if old != new)
print(f"Changed: {changes}/{len(old_names)} table names will now be correct!")
print(f"Fixed the alphabetical ordering issue! 🎉")

🚀 TESTING PRODUCTION SOLUTION
📊 OLD APPROACH (PROBLEMATIC):
Using expected_data.keys() order...
Names from dict keys: ['byactualmarginplayerdashboard', 'byhalfplayerdashboard', 'byperiodplayerdashboard', 'byscoremarginplayerdashboard', 'overallplayerdashboard']

Old table names would be:
  DataFrame 0 (1 rows) → nba_playerdashboardbygamesplits_byactualmarginplayerdashboard
  DataFrame 1 (3 rows) → nba_playerdashboardbygamesplits_byhalfplayerdashboard
  DataFrame 2 (5 rows) → nba_playerdashboardbygamesplits_byperiodplayerdashboard
  DataFrame 3 (6 rows) → nba_playerdashboardbygamesplits_byscoremarginplayerdashboard
  DataFrame 4 (11 rows) → nba_playerdashboardbygamesplits_overallplayerdashboard

✅ NEW APPROACH (FIXED):
Using content-based matching...
=== ADVANCED DATAFRAME MATCHING ===

1️⃣ Trying column count matching...

2️⃣ Trying content-based matching...
   DataFrame 3 → ByScoreMarginPlayerDashboard (content match: {'1 - 5 Points', 'more than 20 Points', '6 - 10 Points', 'Tied', '1

In [45]:
# Test the new Player Dashboard Enhancement functionality
print("🏀 TESTING PLAYER DASHBOARD ENHANCEMENT")
print("=" * 60)

# Test 1: Test the enhancement function
import sys
sys.path.append(r'c:\Users\ajwin\Projects\Personal\NBA\thebigone\endpoints\collectors')

try:
    from player_dashboard_enhancer import (
        is_player_dashboard_endpoint,
        enhance_player_dashboard_dataframes,
        validate_player_dashboard_data
    )
    print("✅ Successfully imported enhancement functions")
except ImportError as e:
    print(f"❌ Import failed: {e}")

# Test 2: Test endpoint detection
test_endpoints = [
    "PlayerDashboardByShootingSplits",
    "BoxScoreTraditionalV2", 
    "PlayerDashboardByGeneralSplits",
    "LeagueDashPlayerStats"
]

print("\n📋 Testing endpoint detection:")
for endpoint in test_endpoints:
    is_player_dash = is_player_dashboard_endpoint(endpoint)
    symbol = "🏀" if is_player_dash else "📊"
    print(f"  {symbol} {endpoint}: {'Player Dashboard' if is_player_dash else 'Other'}")

# Test 3: Test with real API data
print("\n🧪 Testing with real API data:")

# Get real player dashboard data
endpoint = nbaapi.playerdashboardbyshootingsplits.PlayerDashboardByShootingSplits(
    player_id=2544,
    season="2023-24"
)
original_dataframes = endpoint.get_data_frames()

print(f"Original data: {len(original_dataframes)} dataframes")
for i, df in enumerate(original_dataframes):
    if df is not None and not df.empty:
        print(f"  DataFrame {i}: {df.shape} - columns: {list(df.columns)[:5]}...")

# Test enhancement
print("\n🔧 Testing enhancement:")
try:
    enhanced_dataframes = enhance_player_dashboard_dataframes(
        dataframes=original_dataframes,
        player_id=2544,
        season="2023-24", 
        endpoint_name="PlayerDashboardByShootingSplits"
    )
    
    print(f"Enhanced data: {len(enhanced_dataframes)} dataframes")
    
    # Show the difference
    for i, (orig_df, enhanced_df) in enumerate(zip(original_dataframes, enhanced_dataframes)):
        if orig_df is not None and enhanced_df is not None and not orig_df.empty:
            print(f"\n  DataFrame {i}:")
            print(f"    Original:  {orig_df.shape} - columns: {list(orig_df.columns)[:5]}...")
            print(f"    Enhanced:  {enhanced_df.shape} - columns: {list(enhanced_df.columns)[:7]}...")
            
            # Show sample of enhanced data
            print(f"    Sample enhanced data:")
            print(f"      player_id: {enhanced_df['player_id'].iloc[0]}")
            print(f"      season: {enhanced_df['season'].iloc[0]}")
            print(f"      collected_at: {enhanced_df['collected_at'].iloc[0]}")
            
    print("✅ Enhancement successful!")
    
except Exception as e:
    print(f"❌ Enhancement failed: {e}")

# Test 4: Validation
print("\n✅ Testing validation:")
if enhanced_dataframes and len(enhanced_dataframes) > 0:
    test_df = enhanced_dataframes[0]
    if test_df is not None and not test_df.empty:
        # Test with correct parameters
        is_valid = validate_player_dashboard_data(test_df, 2544, "2023-24")
        print(f"  Correct params validation: {'✅ PASSED' if is_valid else '❌ FAILED'}")
        
        # Test with wrong parameters
        is_valid_wrong = validate_player_dashboard_data(test_df, 1234, "2022-23")
        print(f"  Wrong params validation: {'❌ FAILED (expected)' if not is_valid_wrong else '⚠️ PASSED (unexpected)'}")

print(f"\n🎉 Player Dashboard Enhancement testing complete!")

🏀 TESTING PLAYER DASHBOARD ENHANCEMENT
✅ Successfully imported enhancement functions

📋 Testing endpoint detection:
  🏀 PlayerDashboardByShootingSplits: Player Dashboard
  📊 BoxScoreTraditionalV2: Other
  🏀 PlayerDashboardByGeneralSplits: Player Dashboard
  📊 LeagueDashPlayerStats: Other

🧪 Testing with real API data:
Original data: 8 dataframes
  DataFrame 0: (1, 30) - columns: ['GROUP_SET', 'GROUP_VALUE', 'FGM', 'FGA', 'FG_PCT']...
  DataFrame 1: (9, 30) - columns: ['GROUP_SET', 'GROUP_VALUE', 'FGM', 'FGA', 'FG_PCT']...
  DataFrame 2: (5, 30) - columns: ['GROUP_SET', 'GROUP_VALUE', 'FGM', 'FGA', 'FG_PCT']...
  DataFrame 3: (7, 30) - columns: ['GROUP_SET', 'GROUP_VALUE', 'FGM', 'FGA', 'FG_PCT']...
  DataFrame 4: (2, 30) - columns: ['GROUP_SET', 'GROUP_VALUE', 'FGM', 'FGA', 'FG_PCT']...
  DataFrame 5: (9, 16) - columns: ['GROUP_SET', 'GROUP_VALUE', 'FGM', 'FGA', 'FG_PCT']...
  DataFrame 6: (48, 30) - columns: ['GROUP_SET', 'GROUP_VALUE', 'FGM', 'FGA', 'FG_PCT']...
  DataFrame 7: (12, 3