In [None]:
%%writefile src/config.py
import os
import sys
from pathlib import Path
import logging
from typing import Dict, List, Optional, Union
from datetime import datetime

class ProjectConfig:
    """
    Comprehensive configuration class for the freethrow_predictions project.
    Handles paths, parameters, and environment setup automatically.
    """
    
    def __init__(self, debug: bool = False):
        self.debug = debug
        self._setup_logging()
        self._detect_environment()
        self._setup_paths()
        self._setup_parameters()
        
        if self.debug:
            self._print_config_summary()
    
    def _setup_logging(self):
        """Setup logging configuration"""
        log_level = logging.DEBUG if self.debug else logging.INFO
        logging.basicConfig(
            level=log_level,
            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
            handlers=[
                logging.StreamHandler(sys.stdout),
                logging.FileHandler('freethrow_predictions.log', mode='a')
            ]
        )
        self.logger = logging.getLogger(__name__)
    
    def _detect_environment(self):
        """Detect the current execution environment and set base paths"""
        self.current_dir = Path(os.getcwd())
        self.script_dir = Path(__file__).parent if __file__ else self.current_dir
        
        # Environment detection
        self.environments = {
            'workspace': '/workspace' in str(self.current_dir),
            'notebooks': self.current_dir.name == 'notebooks',
            'src': self.current_dir.name == 'src',
            'docker': os.path.exists('/.dockerenv'),
            'streamlit': 'streamlit' in sys.modules,
            'jupyter': 'ipykernel' in sys.modules
        }
        
        # Determine project root
        self.project_root = self._find_project_root()
        
        if self.debug:
            self.logger.debug(f"Current directory: {self.current_dir}")
            self.logger.debug(f"Script directory: {self.script_dir}")
            self.logger.debug(f"Project root: {self.project_root}")
            self.logger.debug(f"Environment flags: {self.environments}")
    
    def _find_project_root(self) -> Path:
        """Find the project root directory by looking for key indicators"""
        # Start from current directory and work up
        check_dir = self.current_dir
        
        # Key indicators of project root
        root_indicators = [
            'src',
            'notebooks', 
            'data',
            'requirements.txt',
            'README.md',
            '.git'
        ]
        
        for _ in range(10):  # Prevent infinite loops
            # Check if this directory contains key indicators
            indicators_found = sum(1 for indicator in root_indicators 
                                 if (check_dir / indicator).exists())
            
            if indicators_found >= 2:  # Found at least 2 indicators
                return check_dir
            
            # Move up one level
            parent = check_dir.parent
            if parent == check_dir:  # Reached filesystem root
                break
            check_dir = parent
        
        # Fallback strategies
        if self.environments['workspace']:
            return Path('/workspace')
        elif self.environments['notebooks']:
            return self.current_dir.parent
        elif self.environments['src']:
            return self.current_dir.parent
        else:
            return self.current_dir
    
    def _add_src_to_sys_path(self):
        """
        Add SRC_DIR to sys.path without clobbering sys.path[0] (script directory).
        Streamlit/watchdog rely on sys.path[0] for reload path resolution.
        """
        try:
            src_str = str(self.SRC_DIR)
            before0 = sys.path[0] if sys.path else None

            if src_str not in sys.path:
                # Insert right after index 0 (script dir), or append as a fallback.
                if sys.path:
                    sys.path.insert(1, src_str)
                else:
                    sys.path.append(src_str)

            after0 = sys.path[0] if sys.path else None
            if self.debug:
                self.logger.debug(f"sys.path[0] before: {before0}")
                self.logger.debug(f"Inserted SRC_DIR: {src_str}")
                self.logger.debug(f"sys.path[0] after: {after0}")
                self.logger.debug(f"SRC_DIR index in sys.path: {sys.path.index(src_str)}")
        except Exception as e:
            self.logger.error(f"Failed to adjust sys.path safely: {e}")

    
    def _setup_paths(self):
        """Setup all directory paths"""
        # Base directories
        self.BASE_DIR = self.project_root
        self.SRC_DIR = self.BASE_DIR / 'src'
        self.DATA_DIR = self.BASE_DIR / 'data'
        self.NOTEBOOKS_DIR = self.BASE_DIR / 'notebooks'
        
        # Data subdirectories
        self.DATA_RAW_DIR = self.DATA_DIR / 'raw'
        self.DATA_PROCESSED_DIR = self.DATA_DIR / 'processed'
        self.DATA_MODELS_DIR = self.DATA_DIR / 'models'
        self.DATA_CACHE_DIR = self.DATA_DIR / 'cache'
        
        # Application specific directories
        self.TRADE_IMPACT_DIR = self.SRC_DIR / 'trade_impact'
        self.SHOT_CHART_DIR = self.SRC_DIR / 'shot_chart'
        self.SALARY_MODEL_DIR = self.SRC_DIR / 'salary_model_training'
        self.SALARY_DATA_DIR = self.SRC_DIR / 'salary_nba_data_pull'
        
        # Cache directories
        self.NBA_API_CACHE_DIR = self.DATA_CACHE_DIR / 'nba_api'
        self.SHOT_CACHE_DIR = self.DATA_CACHE_DIR / 'shot_chart'
        self.MAE_CACHE_DIR = self.DATA_CACHE_DIR / 'mae'
        
        # NEW: Trade specific directories
        self.TRADE_DATA_DIR = self.DATA_PROCESSED_DIR / 'trades'
        self.PRELOADED_TRADES_DIR = self.TRADE_DATA_DIR / 'preloaded'
        self.GRADED_TRADES_DIR = self.TRADE_DATA_DIR / 'graded'
        
        # Log directory
        self.LOG_DIR = self.BASE_DIR / 'logs'
        
        # Ensure critical directories exist
        self._create_directories()
        
        # SAFELY add src to Python path (preserve sys.path[0], which is the script dir)
        self._add_src_to_sys_path()


    def diagnose_runtime(self) -> Dict[str, Union[str, bool, int, Dict]]:
        """
        Return a snapshot of runtime/path environment for debugging.
        No side effects; safe to call from Streamlit UI.
        """
        try:
            script_arg = sys.argv[0] if sys.argv else None
            resolved_script = None
            try:
                resolved_script = str(Path(script_arg).resolve()) if script_arg else None
            except Exception:
                resolved_script = None

            info = {
                "cwd": str(self.current_dir),
                "script_arg": script_arg,
                "resolved_script": resolved_script,
                "sys_path_0": sys.path[0] if sys.path else None,
                "src_dir": str(self.SRC_DIR),
                "src_in_sys_path": str(self.SRC_DIR) in sys.path,
                "src_index_in_sys_path": (sys.path.index(str(self.SRC_DIR)) if str(self.SRC_DIR) in sys.path else None),
                "project_root": str(self.project_root),
                "environments": self.environments,
                "key_dirs": {
                    "BASE_DIR": str(self.BASE_DIR),
                    "DATA_DIR": str(self.DATA_DIR),
                    "MODELS_DIR": str(self.DATA_MODELS_DIR),
                    "CACHE_DIR": str(self.DATA_CACHE_DIR),
                    "LOG_DIR": str(self.LOG_DIR),
                    "TRADE_DATA_DIR": str(self.TRADE_DATA_DIR),
                    "PRELOADED_TRADES_DIR": str(self.PRELOADED_TRADES_DIR),
                    "GRADED_TRADES_DIR": str(self.GRADED_TRADES_DIR),
                }
            }
            return info
        except Exception as e:
            # Last resort: never crash the app because of diagnostics
            return {"diagnostics_error": str(e)}


    
    def _create_directories(self):
        """Create all necessary directories"""
        directories = [
            self.DATA_RAW_DIR,
            self.DATA_PROCESSED_DIR,
            self.DATA_MODELS_DIR,
            self.DATA_CACHE_DIR,
            self.NBA_API_CACHE_DIR,
            self.SHOT_CACHE_DIR,
            self.MAE_CACHE_DIR,
            self.LOG_DIR,
            # NEW: Trade directories
            self.TRADE_DATA_DIR,
            self.PRELOADED_TRADES_DIR,
            self.GRADED_TRADES_DIR
        ]
        
        for directory in directories:
            try:
                directory.mkdir(parents=True, exist_ok=True)
                if self.debug:
                    self.logger.debug(f"Ensured directory exists: {directory}")
            except Exception as e:
                self.logger.error(f"Failed to create directory {directory}: {e}")
    
    def _setup_parameters(self):
        """Setup all configuration parameters"""
        
        # General application settings
        self.APP_CONFIG = {
            'app_name': 'NBA Freethrow Predictions',
            'version': '1.0.0',
            'debug': self.debug,
            'cache_enabled': True,
            'auto_refresh_data': False,
            'default_season': '2023-24',
            'timezone': 'America/New_York'
        }
        
        # NBA API Configuration
        self.NBA_API_CONFIG = {
            'timeout': 90,
            'retries': 3,
            'rate_limit_delay': 0.6,
            'cache_expiry_hours': 12,
            'user_agent': 'NBA-Analysis-App/1.0',
            'endpoints': {
                'base_url': 'https://stats.nba.com/stats/',
                'player_gamelogs': 'playergamelogs',
                'team_roster': 'commonteamroster',
                'shot_chart': 'shotchartdetail'
            }
        }
        
        # Data Processing Configuration
        self.DATA_CONFIG = {
            'seasons_range': ['2018-19', '2019-20', '2020-21', '2021-22', '2022-23', '2023-24'],
            'current_season': '2023-24',
            'salary_cap_2023': 136_021_000,
            'first_tax_apron_2023': 172_346_000,
            'second_tax_apron_2023': 182_794_000,
            'min_games_played': 10,
            'min_minutes_per_game': 5.0,
            'inflation_base_year': 2024
        }
        
        # Model Configuration
        self.MODEL_CONFIG = {
            'random_state': 42,
            'test_size': 0.2,
            'validation_size': 0.2,
            'cross_val_folds': 5,
            'feature_selection_threshold': 0.01,
            'models': {
                'random_forest': {
                    'n_estimators': 100,
                    'max_depth': 10,
                    'min_samples_split': 5,
                    'min_samples_leaf': 2
                },
                'xgboost': {
                    'n_estimators': 100,
                    'max_depth': 6,
                    'learning_rate': 0.1,
                    'subsample': 0.8
                },
                'ridge': {
                    'alpha': 1.0,
                    'normalize': True
                }
            }
        }
        
        # ENHANCED: Trade Impact Configuration with Preloaded Paths
        self.TRADE_CONFIG = {
            'cap_per_team': 999,
            'max_players_side': 3,
            'roster_size_limit': None,
            'opponents_per_team': None,
            'top_n_minutes': 8,
            'min_attempts': 800,
            'max_display_trades': 20,
            'show_debug': self.debug,
            'champion_seasons': [
                '2014-15', '2015-16', '2016-17', '2017-18', '2018-19',
                '2019-20', '2021-22', '2022-23', '2023-24'
            ],
            'relevant_stats': [
                'PTS', 'AST', 'REB', 'STL', 'BLK', 'TOV',
                'OREB', 'DREB', 'FGM', 'FG3M', 'FGA'
            ],
            # NEW: Preloaded trade configuration
            'preload_trades': True,
            'preload_priority': True,  # Always try preloaded first
            'auto_generate': True,     # Auto-generate if missing
            'cache_trades': True,      # Cache generated trades
            'max_preload_age_days': 7, # Regenerate if older than 7 days
        }
        
        # Shot Chart Configuration
        self.SHOT_CHART_CONFIG = {
            'court_areas': [
                'Left Corner 3', 'Right Corner 3', 'Above the Break 3',
                'Mid-Range', 'Paint', 'Free Throw Line', 'Backcourt'
            ],
            'min_attempts_for_analysis': 50,
            'cache_shot_data': True,
            'plot_settings': {
                'figure_size': (12, 8),
                'hexbin_gridsize': 25,
                'colormap': 'RdYlBu_r',
                'alpha': 0.8
            }
        }
        
        # Advanced Metrics Configuration
        self.METRICS_CONFIG = {
            'clustering': {
                'n_clusters': 7,
                'algorithm': 'kmeans',
                'random_state': 42
            },
            'salary_metrics': [
                'Salary_per_WS', 'Salary_per_VORP',
                'Salary_per_OWS', 'Salary_per_DWS'
            ],
            'efficiency_weights': {
                'offense': 0.6,
                'defense': 0.4
            }
        }
        
        # ENHANCED: File naming conventions with trade paths
        self.FILE_PATTERNS = {
            'player_data': 'nba_player_data_final_inflated.csv',
            'predictions': 'predictions_df.csv',
            'valid_trades': 'valid_trades_{season}.parquet',
            'graded_trades': 'graded_preloaded_trades_{season}.parquet',
            'mae_index': 'mae_index_{season}.parquet',
            'shot_cache': 'shot_data_{player}_{season}.pkl',
            'model_save': 'season_{year}',
            # NEW: Trade file patterns
            'preloaded_trades': 'preloaded_trades_{season}.parquet',
            'preloaded_trades_test': 'preloaded_trades_{season}_test{size}.parquet',
            'trade_index': 'trade_index_{season}.json',
            'team_trades': 'team_trades_{team}_{season}.parquet'
        }
        
        # Streamlit Configuration
        self.STREAMLIT_CONFIG = {
            'page_title': 'NBA Analysis Dashboard',
            'page_icon': '🏀',
            'layout': 'wide',
            'initial_sidebar_state': 'expanded',
            'menu_items': {
                'Get Help': 'https://github.com/your-repo/issues',
                'Report a bug': 'https://github.com/your-repo/issues',
                'About': 'NBA Analysis and Prediction Dashboard'
            }
        }
    
    def get_file_path(self, file_type: str, **kwargs) -> Path:
        """Get file path based on type and parameters"""
        pattern = self.FILE_PATTERNS.get(file_type)
        if not pattern:
            raise ValueError(f"Unknown file type: {file_type}")
        
        # Replace placeholders
        filename = pattern.format(**kwargs)
        
        # Determine directory based on file type
        if file_type in ['player_data', 'predictions']:
            return self.DATA_PROCESSED_DIR / filename
        elif file_type in ['valid_trades', 'graded_trades', 'mae_index']:
            return self.DATA_PROCESSED_DIR / filename
        elif file_type in ['preloaded_trades', 'preloaded_trades_test', 'team_trades']:
            return self.PRELOADED_TRADES_DIR / filename
        elif file_type == 'trade_index':
            return self.TRADE_DATA_DIR / filename
        elif file_type == 'shot_cache':
            return self.SHOT_CACHE_DIR / filename
        elif file_type == 'model_save':
            return self.DATA_MODELS_DIR / filename
        else:
            return self.DATA_PROCESSED_DIR / filename
    
    def get_preloaded_trade_path(self, season: str, test_size: Optional[int] = None) -> Path:
        """Get path to preloaded trades file for a season"""
        if test_size:
            return self.get_file_path('preloaded_trades_test', season=season, size=test_size)
        else:
            return self.get_file_path('preloaded_trades', season=season)
    
    def get_graded_trade_path(self, season: str, test_size: Optional[int] = None) -> Path:
        """Get path to graded trades file for a season"""
        season_clean = season.replace('/', '-')
        suffix = f"_test{test_size}" if test_size else ""
        filename = f"graded_preloaded_trades_{season_clean}{suffix}.parquet"
        return self.GRADED_TRADES_DIR / filename
    
    def get_trade_paths_for_season(self, season: str) -> Dict[str, Path]:
        """Get all trade-related paths for a season"""
        season_clean = season.replace('/', '-')
        return {
            'preloaded_trades': self.get_preloaded_trade_path(season),
            'graded_trades': self.get_graded_trade_path(season),
            'trade_index': self.get_file_path('trade_index', season=season_clean),
            'mae_index': self.get_file_path('mae_index', season=season_clean)
        }
    
    def get_season_year(self, season: str) -> int:
        """Convert season string to year integer"""
        if isinstance(season, str):
            return int(season.split('-')[0])
        return int(season)
    
    def normalize_season(self, season: Union[str, int]) -> str:
        """Normalize season to standard format (e.g., '2023-24')"""
        if isinstance(season, int):
            return f"{season}-{str(season + 1)[-2:]}"
        elif isinstance(season, str):
            if '-' in season and len(season) >= 7:
                return season
            elif season.isdigit():
                year = int(season)
                return f"{year}-{str(year + 1)[-2:]}"
        raise ValueError(f"Invalid season format: {season}")
    
    def update_config(self, section: str, updates: Dict):
        """Update configuration section with new values"""
        if hasattr(self, section):
            config_section = getattr(self, section)
            if isinstance(config_section, dict):
                config_section.update(updates)
                if self.debug:
                    self.logger.debug(f"Updated {section}: {updates}")
            else:
                self.logger.warning(f"Cannot update non-dict section: {section}")
        else:
            self.logger.warning(f"Unknown config section: {section}")
    
    def resolve_model_dir(self, season_year: int) -> Path:
        """
        Return the canonical model directory for a given season: /data/models/season_{year}.
        Does not create anything. No existence check here.
        """
        return self.DATA_MODELS_DIR / f"season_{season_year}"

    def resolve_predictions_path(self, season_year: Union[int, None] = None) -> Path:
        """
        Determine the best predictions.csv to use.
        Priority:
          1) /data/models/season_{YYYY}/predictions_df.csv (if season provided and exists)
          2) /data/processed/predictions_df.csv
        Raises FileNotFoundError with all attempted candidates if none exist.
        """
        candidates = []
        if season_year is not None:
            season_dir = self.resolve_model_dir(int(season_year))
            candidates.append(season_dir / "predictions_df.csv")
        candidates.append(self.DATA_PROCESSED_DIR / "predictions_df.csv")

        tried = []
        for p in candidates:
            tried.append(str(p))
            if p.exists():
                if self.debug:
                    self.logger.debug(f"resolve_predictions_path -> using: {p}")
                return p

        msg = "No predictions file found. Tried:\n  " + "\n  ".join(tried)
        if self.debug:
            self.logger.error(msg)
        raise FileNotFoundError(msg)

    def diagnose_data_files(self, season_year: Union[int, None] = None) -> Dict:
        """
        Non-throwing diagnostic view of important files for the current run.
        """
        out = {
            "season_year": season_year,
            "processed_player_data": str(self.get_file_path("player_data")),
            "processed_player_data_exists": self.get_file_path("player_data").exists(),
            "predictions_candidates": [],
            "chosen_predictions": None,
            "chosen_predictions_exists": False,
        }
        # Predictions candidates
        candidates = []
        if season_year is not None:
            candidates.append(str(self.resolve_model_dir(int(season_year)) / "predictions_df.csv"))
        candidates.append(str(self.DATA_PROCESSED_DIR / "predictions_df.csv"))
        out["predictions_candidates"] = candidates

        # Chosen (if any)
        try:
            chosen = self.resolve_predictions_path(season_year)
            out["chosen_predictions"] = str(chosen)
            out["chosen_predictions_exists"] = chosen.exists()
        except Exception as e:
            out["chosen_predictions_error"] = str(e)

        return out

    def _print_config_summary(self):
        """Print configuration summary for debugging"""
        print("\n" + "="*80)
        print("FREETHROW PREDICTIONS - CONFIGURATION SUMMARY")
        print("="*80)
        print(f"Project Root: {self.project_root}")
        print(f"Current Environment: {[k for k, v in self.environments.items() if v]}")
        print(f"Debug Mode: {self.debug}")
        print(f"Python Path Includes Src: {str(self.SRC_DIR) in sys.path}")
        
        print(f"\nKey Directories:")
        print(f"  Data: {self.DATA_DIR}")
        print(f"  Models: {self.DATA_MODELS_DIR}")
        print(f"  Cache: {self.DATA_CACHE_DIR}")
        print(f"  Logs: {self.LOG_DIR}")
        print(f"  Trade Data: {self.TRADE_DATA_DIR}")
        print(f"  Preloaded Trades: {self.PRELOADED_TRADES_DIR}")
        print(f"  Graded Trades: {self.GRADED_TRADES_DIR}")
        
        print(f"\nCurrent Season: {self.DATA_CONFIG['current_season']}")
        print(f"Available Seasons: {len(self.DATA_CONFIG['seasons_range'])}")
        print(f"NBA API Timeout: {self.NBA_API_CONFIG['timeout']}s")
        print(f"Preload Trades: {self.TRADE_CONFIG['preload_trades']}")
        print("="*80 + "\n")


# Global configuration instance
config = None

def get_config(debug: bool = False) -> ProjectConfig:
    """Get or create global configuration instance"""
    global config
    if config is None:
        config = ProjectConfig(debug=debug)
    return config

def setup_environment(debug: bool = False) -> ProjectConfig:
    """Setup the complete environment and return config"""
    cfg = get_config(debug=debug)
    
    # Ensure all paths exist
    cfg._create_directories()
    
    # Setup logging
    cfg._setup_logging()
    
    if debug:
        cfg._print_config_summary()
    
    return cfg

# Convenience functions for common operations
def get_data_path(filename: str = None) -> Path:
    """Get path to data directory or specific file"""
    cfg = get_config()
    if filename:
        return cfg.DATA_PROCESSED_DIR / filename
    return cfg.DATA_PROCESSED_DIR

def get_model_path(season: Union[str, int]) -> Path:
    """Get path to model directory for specific season"""
    cfg = get_config()
    year = cfg.get_season_year(str(season))
    return cfg.DATA_MODELS_DIR / f"season_{year}"

def get_cache_path(cache_type: str = 'general') -> Path:
    """Get path to cache directory"""
    cfg = get_config()
    cache_dirs = {
        'general': cfg.DATA_CACHE_DIR,
        'nba_api': cfg.NBA_API_CACHE_DIR,
        'shot_chart': cfg.SHOT_CACHE_DIR,
        'mae': cfg.MAE_CACHE_DIR
    }
    return cache_dirs.get(cache_type, cfg.DATA_CACHE_DIR)

# NEW: Trade-specific convenience functions
def get_trade_data_path() -> Path:
    """Get path to trade data directory"""
    cfg = get_config()
    return cfg.TRADE_DATA_DIR

def get_preloaded_trades_path(season: str, test_size: Optional[int] = None) -> Path:
    """Get path to preloaded trades for a season"""
    cfg = get_config()
    return cfg.get_preloaded_trade_path(season, test_size)

def get_graded_trades_path(season: str, test_size: Optional[int] = None) -> Path:
    """Get path to graded trades for a season"""
    cfg = get_config()
    return cfg.get_graded_trade_path(season, test_size)

# Auto-setup when module is imported
try:
    config = setup_environment(debug=False)
except Exception as e:
    print(f"Warning: Could not auto-setup environment: {e}")
    config = None

if __name__ == "__main__":
    # Test configuration
    cfg = setup_environment(debug=True)
    
    # Test path functions
    print("Testing path functions:")
    print(f"Data path: {get_data_path()}")
    print(f"Model path for 2023: {get_model_path('2023-24')}")
    print(f"NBA API cache: {get_cache_path('nba_api')}")
    print(f"Trade data: {get_trade_data_path()}")
    print(f"Preloaded trades: {get_preloaded_trades_path('2023-24')}")
    print(f"Graded trades: {get_graded_trades_path('2023-24')}")
    
    # Test file path generation
    print(f"\nTesting file path generation:")
    try:
        print(f"Player data: {cfg.get_file_path('player_data')}")
        print(f"Valid trades: {cfg.get_file_path('valid_trades', season='2023-24')}")
        print(f"Model save: {cfg.get_file_path('model_save', year=2023)}")
        print(f"Trade paths for 2023-24: {cfg.get_trade_paths_for_season('2023-24')}")
    except Exception as e:
        print(f"Error: {e}")


Overwriting src/config.py


In [None]:
%%writefile src/app.py

import os
import streamlit as st
import pandas as pd
import seaborn as sns
import numpy as np
import joblib
import matplotlib.pyplot as plt
from nba_api.stats.static import teams, players

#importing model utils
from salary_model_training.data_loader_preprocessor import format_season, engineer_features, label_encode_injury_risk, build_pipeline, filter_seasons
from salary_model_training.util_functions import check_or_train_model, display_feature_importance, display_model_metrics, identify_overpaid_underpaid, plot_feature_importance
# Importing Shot Chart Analysis functions
from shot_chart.nba_helpers import get_team_abbreviation, categorize_shot, get_all_court_areas
from shot_chart.nba_shots import fetch_shots_data, fetch_defensive_shots_data, fetch_shots_for_multiple_players
from shot_chart.nba_plotting import plot_shot_chart_hexbin
from shot_chart.nba_efficiency import create_mae_table, save_mae_table, load_mae_table, get_seasons_range, calculate_compatibility_between_players
from shot_chart.shot_chart_main import run_scenario, preload_mae_tables, create_and_save_mae_table_specific, create_and_save_mae_table_all

# Import functions from the small example app
from advanced_metrics import plot_career_clusters, plot_injury_risk_vs_salary, plot_availability_vs_salary, plot_vorp_vs_salary, table_metric_salary, display_top_10_salary_per_metric, cluster_players_specialized, display_top_10_salary_per_metric_with_ws

# Import New and improved Trade functions
try:
    from updated_trade_simulator import trade_impact_simulator_app
except ImportError:
    from trade_impact_section_st_app import trade_impact_simulator_app

from config import get_config 

def runtime_debug_panel(season_year: int | None = None):
    """
    Optional sidebar diagnostics panel; shows runtime path state and data file status.
    """
    try:
        cfg = get_config()
        info = cfg.diagnose_runtime()
        files = cfg.diagnose_data_files(season_year)

        with st.sidebar.expander("🔍 Runtime Debug", expanded=False):
            st.json(info)
            st.write("**Data Files**")
            st.json(files)
    except Exception as e:
        with st.sidebar.expander("🔍 Runtime Debug (error)", expanded=False):
            st.write(f"Diagnostics failed: {e}")




@st.cache_data
def get_teams_list():
    """Get the list of NBA teams."""
    return [team['full_name'] for team in teams.get_teams()]

@st.cache_data
def get_players_list():
    """Get the list of NBA players."""
    return [player['full_name'] for player in players.get_players()]

@st.cache_data
def load_team_data():
    nba_teams = teams.get_teams()
    team_df = pd.DataFrame(nba_teams)
    return team_df[['id', 'full_name', 'abbreviation']]

@st.cache_data
def load_player_data(start_year, end_year):
    player_data = pd.DataFrame()
    for year in range(start_year, end_year + 1):
        data = fetch_season_data_by_year(year)
        if data is not None:
            player_data = pd.concat([player_data, data], ignore_index=True)
    return player_data



def advanced_metrics_analysis():
    from config import get_config
    cfg = get_config()

    st.header("NBA Advanced Metrics and Salary Analysis")

    # Load the data via config (centralized)
    player_data_fp = cfg.get_file_path('player_data')
    data = pd.read_csv(player_data_fp)

    # Add a dropdown to select the season
    seasons = sorted(data['Season'].unique(), reverse=True)
    selected_season = st.selectbox("Select a Season", seasons)

    # Filter the data by the selected season
    data_season = data[data['Season'] == selected_season]

    # Cluster players based on the filtered data
    data_season = cluster_players_specialized(data_season, n_clusters=7)

    st.header("Plots")

    # Dropdown to select the plot
    plot_choice = st.selectbox("Select a plot to view:",
                               ["Career Clusters: Age vs Salary",
                                "Injury Risk vs Salary",
                                "Availability vs Salary",
                                "VORP vs Salary"])

    if plot_choice == "Career Clusters: Age vs Salary":
        fig = plot_career_clusters(data_season)
        st.pyplot(fig)
    elif plot_choice == "Injury Risk vs Salary":
        fig = plot_injury_risk_vs_salary(data_season)
        st.pyplot(fig)
    elif plot_choice == "Availability vs Salary":
        fig = plot_availability_vs_salary(data_season)
        st.pyplot(fig)
    elif plot_choice == "VORP vs Salary":
        fig = plot_vorp_vs_salary(data_season)
        st.pyplot(fig)

    st.header("Top 10 Salary per Metric Tables")

    # Calculate metrics table
    metric_salary_table = table_metric_salary(data_season)

    # Dropdown to select the metric table
    metric_choice = st.selectbox("Select a metric to view top 10:",
                                 ["Salary_per_WS",
                                  "Salary_per_VORP",
                                  "Salary_per_OWS",
                                  "Salary_per_DWS"])

    # Display the selected top 10 table with WS included
    top_10_table = display_top_10_salary_per_metric_with_ws(metric_salary_table, metric_choice)
    st.write(f"Top 10 {metric_choice}:")
    st.dataframe(top_10_table)


# Shot Chart Analysis function
def shot_chart_analysis():
    st.header("Shot Chart Analysis")

    # Add guidelines and purpose explanation at the top
    st.markdown("""
    ### Welcome to the NBA Shot Analysis App!
    
    This app allows you to analyze the offensive and defensive efficiency of NBA teams and players. 
    You can compare players or teams to identify the most efficient spots on the court, 
    analyze player compatibility based on shot area efficiency, and much more.
    
    **Options and Guidelines:**
    - **Analysis Type**: Choose between offensive, defensive, or both types of analysis.
    - **Team or Player**: Analyze a team or an individual player.
    - **Court Areas**: Select specific court areas or analyze all areas.
    - **Comparison**: Compare multiple players to see how their offensive efficiencies align or differ.
    """)

    analysis_type = st.selectbox("Select analysis type", options=["offensive", "defensive", "both"])

    entity_type = st.selectbox("Analyze a Team or Player?", options=["team", "player"])

    if entity_type == "team":
        st.markdown("_**Team option is able to analyze both offense and defense by looking into the defense by shot detail from other teams' shot charts against the Opposing Team.**_")
        entity_name = st.selectbox("Select a Team", options=get_teams_list())
    else:
        st.markdown("_**Player Option is only able to look at offense.**_")
        player_names = st.multiselect("Select Players to Analyze", options=get_players_list())

    season = st.selectbox("Select the season", options=["2023-24", "2022-23", "2021-22", "2020-21"])

    opponent_type = st.selectbox("Compare against all teams or a specific team?", options=["all", "specific"])

    opponent_name = None
    if opponent_type == "specific":
        opponent_name = st.selectbox("Select an Opponent Team", options=get_teams_list())

    court_areas = st.selectbox("Select court areas to analyze", options=["all", "specific"], index=0)

    if court_areas == "specific":
        court_areas = st.multiselect("Select specific court areas", options=get_all_court_areas())
    else:
        court_areas = "all"

    debug_mode = st.checkbox("Enable Debug Mode", value=False)

    if st.button("Run Analysis"):
        if entity_type == "player" and (not player_names or len(player_names) < 1):
            st.error("Please select at least one player.")
        else:
            if entity_type == "player":
                if len(player_names) == 1:
                    # Single player analysis
                    run_scenario(
                        entity_name=player_names[0],
                        entity_type=entity_type,
                        season=season,
                        opponent_name=opponent_name,
                        analysis_type=analysis_type,
                        compare_players=False,
                        player_names=None,
                        court_areas=court_areas
                    )
                else:
                    # Multiple players comparison
                    player_shots = fetch_shots_for_multiple_players(player_names, season, court_areas, opponent_name, debug=debug_mode)

                    for player, shots in player_shots.items():
                        st.pyplot(plot_shot_chart_hexbin(shots['shots'], f'{player} Shot Chart', opponent=opponent_name if opponent_name else "all teams"))
                        st.write(f"Efficiency for {player}:")
                        st.write(shots['efficiency'])

                    compatibility_df = calculate_compatibility_between_players(player_shots)
                    st.write("Player Shooting Area Compatibility:")
                    st.write(compatibility_df)
            else:
                # Team analysis
                run_scenario(
                    entity_name=entity_name,
                    entity_type=entity_type,
                    season=season,
                    opponent_name=opponent_name,
                    analysis_type=analysis_type,
                    compare_players=False,
                    court_areas=court_areas
                )

    # Add explanation for shot chart MAE analysis
    with st.expander("Understanding MAE in Player Analysis with context from their Shooting"):
        st.markdown("""
        **MAE** is a metric that measures the average magnitude of errors between predicted values and actual values, without considering their direction.
        
        In our context, MAE is used to measure the difference between the shooting efficiencies of two players across various areas on the court.
        
        **Steps to Analyze MAE:**
        1. **Define Common Areas**: The court is divided into areas like "Left Corner 3", "Top of Key", "Paint", etc.
        2. **Calculate Individual Efficiencies**: Fetch shot data for each player and calculate their shooting efficiency in these areas.
        3. **Identify Common Areas**: When comparing players, identify the areas where both players have taken shots.
        4. **Calculate MAE**: Compute the absolute difference between efficiencies in each common area and average them.
        5. **Interpret Compatibility**:
            - **High MAE**: Indicates players excel in different areas (more compatible).
            - **Low MAE**: Indicates similar efficiencies in the same areas (less compatible).
        
        **Use this metric to assess player compatibility based on where they excel on the court!**
        """)

    with st.expander("Understanding MAE in Team (offensive or defensive) in comparison to other Teams"):
        st.markdown("""
        **MAE** is a metric that measures the average magnitude of errors between predicted values and actual values, without considering their direction.
        
        In the context of team analysis, MAE is used to measure the difference between the shooting efficiencies of one team's offense and the defensive efficiencies of other teams.
        
        **Steps to Analyze MAE for Team Comparison:**
        1. **Calculate Offensive Efficiency**: Fetch shot data for the team of interest and calculate their shooting efficiency across various areas on the court.
        2. **Calculate Defensive Efficiency of Opponents**: For each opponent team, calculate their defensive efficiency by analyzing how well they defend these same areas on the court.
        3. **Calculate MAE**: Compute the MAE between the offensive efficiency of the team of interest and the defensive efficiencies of each opponent team across the defined court areas.
        4. **Interpret the Results**:
            - **Low MAE**: Indicates that the opponent team is effective at defending the areas where the team of interest typically excels. This suggests that the opponent is a "bad fit" for the team of interest, as they defend well against their strengths.
            - **High MAE**: Indicates that the opponent team struggles to defend the areas where the team of interest typically excels. This suggests that the opponent is a "good fit" for the team of interest, as their defense is less effective against the team's offensive strengths.
        
        **Use this analysis to identify which teams are tough matchups (bad fits) versus easier matchups (good fits) based on how well they can defend your team's key offensive areas!**
        """)

def data_analysis():
    from config import get_config
    cfg = get_config()

    st.header("Data Analysis")

    # Load the data via config (centralized)
    player_data_fp = cfg.get_file_path('player_data')
    data = pd.read_csv(player_data_fp)

    # Add a dropdown to select the season
    seasons = sorted(data['Season'].unique(), reverse=True)
    selected_season = st.selectbox("Select a Season", seasons)

    # Filter data by selected season
    season_data = data[data['Season'] == selected_season]

    # Display basic statistics
    st.subheader("Basic Statistics")
    st.write(season_data.describe())

    # Feature distribution
    st.subheader("Feature Distribution")
    feature = st.selectbox("Select Feature", season_data.columns)
    fig = plot_feature_distribution(season_data, feature)
    st.pyplot(fig)

    # Correlation heatmap
    st.subheader("Correlation Heatmap")
    fig = plot_correlation_heatmap(season_data)
    st.pyplot(fig)

    # Data preprocessing explanation
    st.subheader("Data Preprocessing")
    st.write("""
    We preprocess the data to ensure it's suitable for modeling. Here are the key steps involved:
    1. **Cleaning Data**: Handle missing values, clean advanced statistics columns, and remove unnecessary columns.
    2. **Feature Engineering**: Create new features like Points Per Game (PPG), Availability, Salary Percentages, and Efficiency.
    3. **Label Encoding**: Encode categorical features like 'Injury Risk' and 'Position'.
    4. **Scaling**: Scale numerical features to normalize the data.
    5. **Season Filtering**: Filter the data by seasons to prepare train and test datasets.
    """)

    # Add preprocessing steps breakdown
    preprocessing_step = st.selectbox("Select a Preprocessing Step", [
        "Clean Data",
        "Feature Engineering",
        "Label Encoding"
    ])

    if preprocessing_step == "Clean Data":
        st.write("""
        In this step, we remove unnecessary columns such as 'Wins', 'Losses', and '2nd Apron', and handle missing data in percentage-based columns (e.g., 3P%, FT%, 2P%). 
        The columns are dropped based on the assumption that they do not contribute significantly to salary prediction.
        """)
        st.write("Cleaned Data Columns: ", data.columns.tolist())

    elif preprocessing_step == "Feature Engineering":
        st.write("""
        We derive new features such as:
        - **PPG (Points Per Game)**: Points scored per game.
        - **Availability**: Games played / total games in a season.
        - **SalaryPct**: Salary as a percentage of the inflated salary cap.
        - **Efficiency**: A custom efficiency metric based on offensive and defensive stats.
        """)
        engineered_data, pipeline_data, _ = engineer_features(season_data)
        st.write(engineered_data[['Availability', 'Efficiency']].head())

    elif preprocessing_step == "Label Encoding":
        st.write("""
        We encode categorical features like 'Injury Risk' into numeric values to feed into the machine learning model. For example:
        - **Low Risk**: 1
        - **Moderate Risk**: 2
        - **High Risk**: 3
        """)
        label_encoded_data = label_encode_injury_risk(season_data)
        st.write(label_encoded_data[['Injury_Risk']].head())



def convert_season_format(season_str):
    try:
        # Ensure we are splitting the season string correctly
        if isinstance(season_str, str):
            print(f"Original season string: {season_str}")  # Debug: Print original season string

            # Split the season by '-' (e.g., '2023-24' -> ['2023', '24'])
            year = season_str.split('-')[0]  # Get '2023'

            print(f"Formatted season string (year only): {year}")  # Debug: Print year only

            return year  # Return only the starting year
        else:
            raise TypeError(f"Expected a string, but got {type(season_str)}")
    except ValueError as ve:
        print(f"ValueError: {ve}")
        return season_str  # Fallback to original season if there's an issue
    except Exception as e:
        print(f"Error formatting season: {e}")
        raise

# Data visualization functions
def plot_feature_distribution(data, feature):
    fig, ax = plt.subplots(figsize=(10, 6))
    sns.histplot(data[feature], kde=True, ax=ax)
    ax.set_title(f'Distribution of {feature}')
    ax.set_xlabel(feature)
    ax.set_ylabel('Count')
    return fig

def plot_correlation_heatmap(data):
    numeric_data = data.select_dtypes(include=[np.number])
    corr = numeric_data.corr()
    fig, ax = plt.subplots(figsize=(12, 10))
    sns.heatmap(corr, annot=False, cmap='coolwarm', ax=ax)
    ax.set_title('Correlation Heatmap')
    return fig

def main():
    from config import get_config
    cfg = get_config()

    # Set Streamlit page config from centralized STREAMLIT_CONFIG
    try:
        st.set_page_config(**cfg.STREAMLIT_CONFIG)
    except Exception:
        # set_page_config can only be called once; ignore if rerun
        pass

    # --- NEW: show optional diagnostics panel in the sidebar ---
    # (Will be called after season_year is computed)

    st.title("NBA Salary Prediction, Data Analysis, and Trade Impact Simulator")

    # Network diagnostics for cloud environments
    try:
        from streamlit_app_helpers import check_network_connectivity
        network_status = check_network_connectivity(debug=True)
        if network_status.get("errors"):
            st.warning("⚠️ Network connectivity issues detected. Some features may be limited.")
            if st.checkbox("Show network diagnostics"):
                st.json(network_status)
    except Exception as e:
        st.warning(f"⚠️ Could not perform network diagnostics: {e}")

    # ---- Centralized paths via config ----
    player_data_fp = cfg.get_file_path('player_data')

    # Load the data
    data = pd.read_csv(player_data_fp)

    # Get the unique seasons and exclude the earliest one
    seasons = sorted(data['Season'].unique(), reverse=True)  # Sort in descending order
    if len(seasons) > 1:
        seasons = seasons[:-1]  # Remove the earliest season (last element)

    # Sidebar Navigation
    st.sidebar.title("Navigation")
    page = st.sidebar.radio("Go to", [
        "Introduction",
        "Data Analysis",
        "Model Results",
        "Salary Evaluation",
        "Shot Chart Analysis",
        "Advanced Metrics Analysis",
        "Trade Impact Simulator"
    ])

    # Season Selection (format to integer year)
    selected_season = st.selectbox("Select Season", seasons)
    season_year = int(selected_season.split('-')[0])
    
    # --- NEW: show optional diagnostics panel in the sidebar ---
    runtime_debug_panel(season_year)

    # Model directory for this season (centralized)
    model_dir = cfg.get_file_path('model_save', year=season_year)

    # Load or train model and get predictions
    # Some utilities may expect strings; cast Paths to str defensively.
    predictions_df = check_or_train_model(str(player_data_fp), str(model_dir), season_year)

    # Load models (to be reused across pages)
    rf_model_path = model_dir / 'best_rf_model.pkl'
    xgb_model_path = model_dir / 'best_xgb_model.pkl'
    feature_names_path = model_dir / 'feature_names.pkl'

    try:
        rf_model = joblib.load(rf_model_path)
        xgb_model = joblib.load(xgb_model_path)
        feature_names = joblib.load(feature_names_path)
    except FileNotFoundError:
        st.error("Models or feature names not found for the selected season. Please ensure the models are trained.")
        return

    if page == "Introduction":
        st.title("Enhanced NBA Player Salary Analysis")
        st.write("Welcome to the NBA Salary Analysis and Prediction App! This project aims to provide comprehensive insights into NBA player salaries, advanced metrics, and future salary predictions based on historical data.")

        st.subheader("Original Data")
        original_df = pd.read_csv(player_data_fp)
        st.dataframe(original_df)

        st.subheader("Predicted Data")
        st.write("Here are the salary predictions generated based on Random Forest and XGBoost models.")
        st.dataframe(predictions_df)

    elif page == "Data Analysis":
        data_analysis()

    elif page == "Model Results":
        st.header("Model Results")

        # 1) Metrics table (loaded from evaluation_results.pkl)
        metrics_df = display_model_metrics(str(model_dir))
        if metrics_df.empty:
            st.warning("No evaluation metrics found for this season. Train the models first.")
        else:
            st.subheader("Model Performance Metrics")
            st.dataframe(metrics_df)

        # 2) Choose model to inspect feature importance
        model_choice = st.selectbox("Select model for feature importance", ["Random Forest", "XGBoost"])
        model = rf_model if model_choice == "Random Forest" else xgb_model

        # 3) Feature count
        from salary_model_training.util_functions import get_feature_count
        n_features = get_feature_count(model)
        if n_features > 0:
            st.write(f"**Number of features in model:** {n_features}")
        else:
            st.write("**Number of features in model:** (not available for this estimator)")

        # 4) Feature importance: filtered DF + chart
        st.subheader(f"{model_choice} Feature Importance")
        feature_importances_df = display_feature_importance(model, feature_names, ['Position_', 'Team_'])

        if feature_importances_df is None or feature_importances_df.empty:
            st.info("This model does not expose feature importances or no importances were computed.")
        else:
            st.dataframe(feature_importances_df)
            fig = plot_feature_importance(feature_importances_df, model_choice)
            st.pyplot(fig)

    elif page == "Salary Evaluation":
        st.header("Salary Evaluation")
        num_players = st.slider("Select number of players to display", min_value=5, max_value=20, value=10)
        overpaid, underpaid = identify_overpaid_underpaid(predictions_df, top_n=num_players)
        st.subheader(f"Top {num_players} Overpaid Players")
        st.dataframe(overpaid[['Player', 'Team', 'Salary', 'Predicted_Salary', 'Salary_Difference']])
        st.subheader(f"Top {num_players} Underpaid Players")
        st.dataframe(underpaid[['Player', 'Team', 'Salary', 'Predicted_Salary', 'Salary_Difference']])

    elif page == "Shot Chart Analysis":
        shot_chart_analysis()

    elif page == "Advanced Metrics Analysis":
        advanced_metrics_analysis()

    elif page == "Trade Impact Simulator":
        st.header("🏀 Trade Impact Simulator with Grading System")

        # We already computed selected_season earlier; get its numeric year
        formatted_season = convert_season_format(selected_season)  # e.g. '2023'
        season_year = int(formatted_season)

        # Resolve the predictions path strictly via config
        try:
            predictions_path = cfg.resolve_predictions_path(season_year)
            st.info(f"Using predictions file: `{predictions_path}`")
        except FileNotFoundError as e:
            st.error(f"Failed to locate predictions data for season {season_year}.\n\n{e}")
            st.stop()

        # OPTIONAL: expose to downstream code via env var for older modules
        os.environ["FREETHROW_PREDICTIONS_CSV"] = str(predictions_path)

        # If your simulator can accept a path, pass it explicitly (new kwarg).
        # Backward compatible: if the function doesn't accept it, it will ignore **kwargs.
        try:
            trade_impact_simulator_app(formatted_season, predictions_path=str(predictions_path))
        except TypeError:
            # Older signature: call without the kwarg; module should read from env var or config
            trade_impact_simulator_app(formatted_season)


if __name__ == "__main__":
    main()









Overwriting src/app.py
