Automated NBA Data Aggregator
===============================
This workshop is intended to bridge the gap between instructional Python programming and real-world sports analytics uses. The tutorial is interactive, in which students work step by step, which is ideal for a joint session of statistics students and basketball enthusiasts at a university hackathon or data science club session. The text assumes that students have been introduced to Python courses and understand programming concepts like loops, functions, and basic data structures, but now need to learn how these are used in real data processes in the real world.
The approach mirrors my experience educating new analysts in sports tech startups, focusing on the subtle realities distinguishing classroom exercises from production settings. We will use real NBA statistics as our interesting case study, with tangible examples and internal relevance to students interested in sports. The tutorial puts the highest priority on practical API usage in theory because this is the most urgent need for those students who are entering into professional employment or more advanced projects.
Good professional practice includes correctly implementing API rate limiting, using defensive programming patterns to sanitize data, dealing with configurations in different deployment environments, and generating output that is ready for analysis. All of these are precisely what set learning code exercises apart from production-level, maintainable systems. The workshop is structured so that students understand how to implement each component and why professional developers make certain architecture choices when processing sports data at scale.


A comprehensive tool to fetch, process, and analyze NBA player statistics from the API-NBA via RapidAPI.

Features:
- Can be configured either interactively or via creds.py
- Fetches season averages for all players on a specified team
- Handles API rate limiting with configurable delays
- Calculates advanced statistics (PPG, RPG, APG, FG%)
- Outputs clean CSV files with proper error handling
- Provides progress tracking and verbose logging

Usage Options:
1. Interactive Mode:
   - Run the script and follow the prompts
   - Enter all configuration when prompted

2. Manual Configuration:
   - Set your RapidAPI key in creds.py (RAPIDAPI_KEY = "your-key-here")
   - Configure TEAM_NAME, SEASON, DELAY, and OUTPUT_DIR constants
   - Run the script
"""


In [7]:
import os
import time
import csv
from datetime import datetime
import requests
from typing import Dict, List, Optional

class NBADataFetcher:
    """Handles all NBA API interactions with proper error handling and rate limiting."""
    
    def __init__(self, api_key: str):
        """Initialize the fetcher with API credentials."""
        self.headers = {
            "X-RapidAPI-Key": api_key,
            "X-RapidAPI-Host": "api-nba-v1.p.rapidapi.com"
        }
    
    def _make_api_request(self, endpoint: str, params: Dict = None) -> Optional[Dict]:
        """Generic API request handler with error handling and retries."""
        url = f"https://api-nba-v1.p.rapidapi.com/{endpoint}"
        
        try:
            response = requests.get(url, headers=self.headers, params=params, timeout=15)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.RequestException as e:
            print(f"API request failed: {e}")
            return None
    
    def get_team_id(self, team_name: str) -> Optional[int]:
        """Find team ID by name with fuzzy matching."""
        data = self._make_api_request("teams")
        if not data or "response" not in data:
            return None
            
        for team in data["response"]:
            if team_name.lower() in team["name"].lower():
                print(f"Found team: {team['name']} (ID: {team['id']})")
                return team["id"]
        return None
    
    def get_team_players(self, team_id: int, season: str) -> List[Dict]:
        """Retrieve all players for a given team and season."""
        params = {"team": team_id, "season": season}
        data = self._make_api_request("players", params)
        return data.get("response", []) if data else []
    
    def get_player_stats(self, player_id: int, player_name: str, season: str) -> Optional[Dict]:
        """Fetch and calculate season averages for a player."""
        params = {"id": player_id, "season": season}
        data = self._make_api_request("players/statistics", params)
        if not data or "response" not in data:
            return None
            
        game_logs = data["response"]
        return self._calculate_averages(game_logs, player_name)
    
    def _calculate_averages(self, game_logs: List[Dict], player_name: str) -> Dict:
        """Calculate season averages from game logs with validation."""
        played_games = [g for g in game_logs if g.get("min") not in [None, "00:00", ""]]
        games_played = len(played_games)
        
        if games_played == 0:
            print(f"No played games found for {player_name}")
            return None
            
        try:
            points = sum(float(g.get("points", 0)) for g in played_games)
            rebounds = sum(float(g.get("totReb", 0)) for g in played_games)
            assists = sum(float(g.get("assists", 0)) for g in played_games)
            fgm = sum(float(g.get("fgm", 0)) for g in played_games)
            fga = sum(float(g.get("fga", 0)) for g in played_games)
            
            return {
                "name": player_name,
                "ppg": round(points / games_played, 1),
                "rpg": round(rebounds / games_played, 1),
                "apg": round(assists / games_played, 1),
                "fg_pct": round(fgm / fga, 3) if fga > 0 else 0.0,
                "games_played": games_played,
                "total_games": len(game_logs),
                "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            }
        except (TypeError, ValueError) as e:
            print(f"Error calculating stats for {player_name}: {e}")
            return None

class DataExporter:
    """Handles data export operations with proper file management."""
    
    @staticmethod
    def save_to_csv(data: List[Dict], filename: str, output_dir: str = "output") -> bool:
        """Save player statistics to CSV with error handling."""
        if not data:
            print("No data to save")
            return False
            
        fieldnames = [
            "name", "ppg", "rpg", "apg", "fg_pct", 
            "games_played", "total_games", "last_updated"
        ]
        
        try:
            os.makedirs(output_dir, exist_ok=True)
            filepath = os.path.join(output_dir, filename)
            with open(filepath, "w", newline="", encoding="utf-8") as f:
                writer = csv.DictWriter(f, fieldnames=fieldnames)
                writer.writeheader()
                writer.writerows(data)
            print(f"Successfully saved data to {filepath}")
            return True
        except (IOError, csv.Error) as e:
            print(f"Failed to save CSV: {e}")
            return False

Introduction: The API-Driven Analytics Pipeline
===============================================

Current basketball analytics have a scope that makes it impossible to collect data manually. Consider the data footprint of a single NBA team during the 2024 season alone—82 regular-season games, with 15 players playing 25+ minutes per game and over 50 statistics gathered per player per game. This translates to 30 teams at the league level, which generates millions of data points across thousands of games each season. Traditional manual data collection methods cannot handle this volume while maintaining the accuracy and timeliness required for meaningful analysis.

Our solution automates this process with three basic technical elements that mimic the infrastructure of professional sports leagues. First, we have professional-grade API integration far beyond HTTP request simplicity. Production-grade API use includes secure authentication via RapidAPI keys, parameter-based endpoint construction for excellent data filtering accuracy, and strict enforcement of rate limits to avoid service interruption. The system should also have network resilience features like automatic retries with exponential backoff and adequate timeouts - real-world APIs tend to experience occasional outages or slowdowns that academic examples typically ignore.

Second, we provide an end-to-end data quality layer to check the unsightly aspects of raw sports data. API returns typically include "Did Not Play" records that would inflate averages if they are not dropped, truncated data feeds that include missing information, and type differences where numbers sometimes appear as strings or null fields. Our validation pipeline addresses these issues by employing active participation filtering, robust type conversion guards, and statistical sanity checks that ensure only valid data proceeds to analysis.
Third, we possess a production-grade output system that generates the clean, normalized forms analysts handle in their procedures. These are properly formatted CSV files with consistent schemas, encapsulated metadata like processing timestamps, and error-protected file operations that elegantly handle permission issues and storage capacities. The output system supports configurable storage locations and file naming schemes to satisfy organizational needs.

This structure closely mirrors that used by NBA team analytics personnel, sports media outlets like ESPN, and sites using advanced statistics like FiveThirtyEight. The advantages of automation are compelling—where it can take days of tedium to process manually, our pipeline can construct full-season data in minutes. Above all, automation eliminates whole categories of human transcription errors while maintaining flawless consistency with official NBA calculation metrics.

Our practice goes beyond ordinary academic examples by employing professional development idioms like type hints for readability, dynamic configuration parameters for different deployment profiles, and detailed progress logging for long-running operations. Such idioms are usual in production code but rare in course materials. For instance, whereas an incoming player might calculate points per game via a straightforward division arithmetic operation, we employ suitably encapsulated functions that validate all inputs, deal well with edge situations, and produce results that adhere to professional standards for formatting and metadata.

In [None]:
def get_config_interactive() -> Dict:
    """Collect and validate all user input parameters for interactive mode."""
    print("\nNBA Player Statistics Aggregator - Interactive Configuration")
    print("="*60)
    
    config = {}
    
    # Get API Key
    while True:
        config['api_key'] = input("\nEnter your RapidAPI key: ").strip()
        if config['api_key']:
            break
        print("API key cannot be empty. Please try again.")
    
    # Get Team Name
    while True:
        config['team_name'] = input("\nEnter team name (e.g., 'Lakers', 'Warriors'): ").strip()
        if config['team_name']:
            break
        print("Team name cannot be empty. Please try again.")
    
    # Get Season Year
    while True:
        season = input("\nEnter season year (e.g., 2023 for 2022-23 season): ").strip()
        if season.isdigit() and len(season) == 4:
            config['season'] = season
            break
        print("Invalid season. Please enter a 4-digit year (e.g., 2023).")
    
    # Get API Delay
    while True:
        delay = input("\nEnter API delay in seconds (recommended 1-5 to avoid rate limiting): ").strip()
        try:
            config['delay'] = max(0.5, float(delay))  # Minimum 0.5 second delay
            break
        except ValueError:
            print("Invalid delay. Please enter a number (e.g., 2 for 2 seconds).")
    
    # Get Output Directory
    config['output_dir'] = input("\nEnter output directory (press Enter for default 'output'): ").strip()
    if not config['output_dir']:
        config['output_dir'] = "output"
    
    return config

Core Concepts Explained
=======================
Three fundamental concepts form the foundation of our professional NBA statistics processor, each solving fundamental aspects of production data systems. Knowledge of these concepts will help participants adapt the pipeline to other sports or analytical purposes beyond basketball.

The first pillar is professional API patterns and integration protocols. Working with production APIs is more than sending straightforward HTTP requests - it is all about deliberate thought around authentication, parameterization, and operational constraints. Our system incorporates secure authentication in properly formatted headers containing the RapidAPI key for service entry and focused host data for endpoint routing. These headers must be appended to every request with the strict format constraints. Equally critical is the strategic use of query parameters that enable exact filtering of the data we bring down. Parameters enable filtering by season, year, specific teams or players, ranges of dates, and other dimensions that prevent downloading extraneous data. A well-parameterized design achieves a tremendous performance and reliability improvement by reducing payload sizes and looking only at relevant records.

The second underlying principle is rigorous data validation and quality assurance. Raw sports data from APIs frequently contains artifacts and inconsistencies that will skew analysis unless dealt with properly. Our validation pipeline involves several layers of quality tests, starting with participation filtering to remove records for games where a player did not participate. This includes removing "Did Not Play" designations, games played with zero minutes, and cases where players were on the roster but scratched at a coach's discretion. The system then performs extensive type safety checks, so numeric statistics arrive as proper numbers, not strings or null values. Text fields like player names have uniform formatting. Range checking adds another quality layer by flagging statistically improbable values that might indicate data corruption - a basketball player cannot legitimately score 200 points during a game.

The third pillar is strict compliance with official NBA statistical protocols and reporting standards. Basic statistics like points per game (PPG) may seem simple in theory, but professional usage requires attention to detail, which is usually not emphasized in academic settings. The NBA has specific norms for rounding statistical outcomes (generally to a single decimal point for per-game averages), handling edge cases like division by zero when calculating percentages, and qualifying statistics using minimum activity thresholds. More advanced statistics introduce additional complexity via expert algorithms for metrics like actual shooting percentage (accounting for the differential value of three-pointers and free throws) and player efficiency rating (a weighted sum of several statistics). Our implementation mirrors how NBA clubs and media calculate and report these statistics, so our results will be easily comparable to authoritative sources.

In [None]:
def get_config_manual() -> Dict:
    """Load configuration from creds.py and constants for manual mode."""
    try:
        import creds
        api_key = creds.RAPIDAPI_KEY
    except (ImportError, AttributeError) as e:
        raise ValueError("Missing or invalid creds.py file. Please create one with your RapidAPI key.") from e
    
    # These would be defined at the top of the file in manual mode
    config = {
        'api_key': api_key,
        'team_name': "Lakers",  # TEAM_NAME constant
        'season': "2023",       # SEASON constant
        'delay': 6,             # DELAY constant
        'output_dir': "output"  # OUTPUT_DIR constant
    }
    
    return config

Building the Solution
======================

We construct the end-to-end data pipeline with three progressively sophisticated components, each demonstrating best-practice software engineering for data systems. With modular components, students can learn each component independently and see how they compose together.
The first essential component is a solid API client for production work. Unlike the standard bare script-based API calls in tutorial examples, our client features several professional-grade aspects required for reliable operation. Connection management includes long-running sessions for improved performance between different requests, customizable timeouts to prevent hanging, and consistent treatment of headers and parameters for all calls. The client employs high-end fault tolerance with an exponential backoff retry mechanism. If a request is failing (which occurs with APIs of the real world), the system will automatically retry with longer and longer delays between attempts. This behavior elegantly handles momentary network issues or API rate limits without intervention. The client also includes complete error handling capable of distinguishing between different failure classes (network, auth, data validation failures) and processing each appropriately.

Data sanitization is the other large piece, which normalizes raw API results into tidy, analysis-ready datasets. The sanitization engine begins with participation validation, only retaining data in cases where players had meaningful play in games. This involves validating minutes played thresholds (excluding zero or meaningless minutes games), verifying game status indicators, and cross-verifying roster information. The system then performs aggressive type conversion and validation, which puts all statistical values in good numeric shape with missing or malformed data processed sensibly. One nice touch of professionalism is support for reporting data quality - rather than silently discarding bad records, the system records validation failures with enough information that analysts can investigate potential problems in the data. This monitoring capability is critical in production environments where data quality issues might indicate larger system problems.

According to official league procedures, the statistical processing engine is the third core component, computing standard and premium NBA statistics. The engine is structured as a sequence of specialist calculation modules, where each module can be individually updated and tested. Basic statistics modules carry out simple calculations like per-game averages for points, rebounds, and assists while following NBA rounding conventions and qualification levels. Advanced metrics modules use more advanced algorithms such as player efficiency rating (a weighted composite statistic), actual shooting percentage (with the value of the shot type included), and usage percentage (estimating what proportion of team possessions a player controls). The engine contains a configuration layer that controls what metrics are calculated and in what output format, allowing for customization for a range of analysis needs. Throughout all computation, the engine always maintains strict type consistency and possesses graceful edge cases, such as being capable of dealing with division operations where denominators might be zero.

These components demonstrate several significant professional software engineering practices in production systems. Input validation at multiple levels demonstrates consistent error-handling patterns. The system demonstrates a clean separation of concerns based on modular design such that the parts can be tested and edited independently. Configuration management supports both interactive and file-based configurations for different usage patterns. Most of all, the implementation keeps maintainability in mind, using mindful naming, type hints, and docstrings - knowing that real-world code gets read and edited many times more than originally written.

In [None]:
def run_with_config(config: Dict):
    """Main execution flow using the provided configuration."""
    print(f"\nNBA Player Statistics Aggregator - {config['team_name']} {config['season']}")
    print(f"Starting at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    
    # Initialize components
    fetcher = NBADataFetcher(config['api_key'])
    exporter = DataExporter()
    
    # Get team ID
    print(f"\nSearching for team: {config['team_name']}...")
    team_id = fetcher.get_team_id(config['team_name'])
    if not team_id:
        print(f"Could not find team: {config['team_name']}")
        print("Please check the team name and try again.")
        return
    
    # Get players
    print(f"\nFetching players for {config['team_name']} {config['season']} season...")
    players = fetcher.get_team_players(team_id, config['season'])
    if not players:
        print("No players found for team")
        return
    
    print(f"\nProcessing {len(players)} players...")

Complete System Walkthrough
===========================

The combined NBA analytics pipeline combines all aspects into an end-to-end system demonstrating professional data engineering techniques. This tour runs through the complete workflow from initialization to final output, emphasizing key design decisions and why they were made.

System configuration is the foundation of operational flexibility. We have an interactive prompts mode for exploratory use and file-based configurations for automated processes in a dual-mode system configuration. The interactive mode helps users with parameter entry with validation and courteous feedback to ensure proper API keys, team identifiers, and sensible processing parameters. File-based configuration is based on a production setup credentials file, settings module, and unattended scheduled jobs and analysis applications. The configuration manager handles Sensitive data correctly, never logging or displaying API keys in plain text but giving helpful error messages for bad settings. This two-way strategy caters to the whole gamut of usage scenarios from one-off analysis to report automation systems.

Data gathering begins with team resolution, converting human-readable team names ("Los Angeles Lakers") into the internal team IDs that the API employs. Team resolution employs fuzzy matching to handle typical name variants ("Lakers" vs "LA Lakers") and provides beneficial feedback when matches are ambiguous. Once the team is identified, the system downloads roster information to determine whom to examine and imports game logs for all players within the specified season. The fetcher imposes strict rate limiting and stifles requests to keep neatly under API limits with the best throughput. A progress monitor provides accessible feedback on long operations, but another benefit, touch, is often absent in learning samples. After acquisition, the system permits exhaustive logging, facilitating debugging and usage auditing without leaking sensitive credentials.

Data processing converts raw game logs to analysis results through several steps. Early validation rejects incomplete or damaged records and logs quality issues that will be reviewed later. The system computes base statistics such as points per game, rebounds per game, assists per game, rounded NBA-fashion, and qualifying players by playing level. Calculation of advanced metrics proceeds, with each metric embedded in its function to facilitate maintenance ease. The processing engine can easily calculate derived fields like shooting percentages and efficiency metrics while calculating all edge cases, like correctly calculating field goal percentage for players who have not attempted any shots yet. Data lineage information about how each metric was calculated from what source data is retained during processing for analytical integrity during commercial deployment.

Output generation produces analysis-ready output in a directly consumable form. The primary output is neatly formatted CSV files with player statistics that have consistent column names and are in order to industry requirements. Files include metadata headers for generation timestamp, data source, and processing parameters for aiding version control. The exporter has robust file handling with correct permissions, atomic write patterns for preventing partial outputs, and parameterizable destination paths. The system normalizes data into forms for integrated database environments that map well into relational schemas. Output file naming is deterministic with team, season, and date information for easy organization. The exporter provides validation checks that files were written out in their entirety and correctly before reporting successful completion.

The flow of execution orchestrates all components into a single pipeline. Initialization loads and verifies the configuration while establishing necessary resources and connections. The collection gathering phase gathers roster information and game logs with chunking and pacing appropriate for high data sets. Processing transforms raw data into a processed state through validation, statistical computation, and quality check stages. Export generates permanent output while dropping temporary resources. The pipeline uses status monitoring with satisfactory progress and issue feedback in every operation. Error handling differentiates between transient errors (like network blips) that can be retried and fatal errors (like bad credentials) that require intervention by the user. The pipeline is designed for interactive use in exploratory analysis and programmatic integration into larger data pipelines.

In [None]:
# Process each player
    player_stats = []
    for i, player in enumerate(players, 1):
        first_name = player.get("firstname", "").strip()
        last_name = player.get("lastname", "").strip()
        player_name = f"{first_name} {last_name}" if first_name or last_name else "Unknown Player"
        player_id = player.get("id")
        
        if not player_id:
            print(f"Skipping player with missing ID: {player_name}")
            continue
            
        print(f"\n{i}/{len(players)}: Processing {player_name}...")
        
        stats = fetcher.get_player_stats(player_id, player_name, config['season'])
        if stats:
            player_stats.append(stats)
            print(f"  → {stats['ppg']} PPG, {stats['rpg']} RPG, {stats['apg']} APG")
            print(f"  → FG%: {stats['fg_pct']:.1%} ({stats['games_played']}/{stats['total_games']} games)")
        
        # Respect rate limits
        if i < len(players):
            time.sleep(config['delay'])
    
    # Export results
    if player_stats:
        filename = f"{config['team_name'].lower().replace(' ', '_')}_{config['season']}_stats.csv"
        exporter.save_to_csv(player_stats, filename, config['output_dir'])
        
        print("\nProcess completed successfully!")
        print(f"\nSummary:")
        print(f"- Team: {config['team_name']}")
        print(f"- Season: {config['season']}")
        print(f"- Players processed: {len(player_stats)}/{len(players)}")
        print(f"- Output file: {os.path.join(config['output_dir'], filename)}")
    else:
        print("\nProcess completed but no valid player statistics were collected.")

Conclusion and Next Steps
=========================

This workshop has equipped participants to build professional-grade sports analytics pipelines beyond classroom exercises. The techniques and themes demonstrated are not limited to basketball but to any data-intensive discipline requiring substantial collection, processing, and analysis of significant statistics. The pipeline's modular structure provides several natural extensions that would be excellent follow-up projects for students wishing to expand their knowledge.

Expanding the statistical arsenal offers one beneficial area of enhancement. The system could include advanced defensive statistics like defensive rating or steal percentage, lineup analysis tracking group performance for given player pairings, or even prognostic model components projecting future performance from historical trends. Each further category of new statistics would support the principles of good statistical use while enhancing the system's value as an analytical tool.

Technical add-ons would convert the pipeline into a more complex platform. Integration with a database would allow for permanent storage of historical statistics to enable longitudinal analysis. A visual layer could generate charts and reports from processed information. Automatic reporting capabilities could issue daily or weekly reports to coaching staff or the media. Web service encapsulation would expose access to the system's features remotely over REST APIs. These enhancements would demonstrate how analytical pipelines integrate into larger organizational contexts.

Professional development techniques offer another space for growth. Facilitating a complete test environment with unit tests for each calculation module and integration tests for the whole pipeline would guarantee software engineering best practices. Performance benchmarking would identify areas for optimization throughout the data processing pipeline. An environment setup with continuous integration would demonstrate state-of-the-art DevOps best practices for analysis systems. Documentation generation would provide professional-grade reference material for system maintainers and end users.

The knowledge acquired through this workshop has direct applications in the whole range of careers in sports technology. Teams' analytics departments value professionals who understand statistical methodologies and their reliable implementation. League information needs to be handled by engineers in the form of engaging visualizations and stories for media outlets. Fantasy sports and sports betting analysis require robust pipes that handle fast-evolving data. Talent evaluation relies on real-time, accurate statistics from player rating services. Perhaps most importantly, these trends are relevant to countless domains beyond sports - any field that deals with vast quantities of fast-changing data can learn from the same master engineering methods.

The complete solution demonstrates how classroom programming concepts mature into industrial data systems by considering reliability, maintainability, and reality constraints. Students gain specific technical skills and the mindset to build production-quality analytical tools. This process of evolving from academic exercise to professional work is a critical milestone in any data-driven career path.

In [None]:
def main():
    """Main entry point with mode selection."""
    print("\nNBA Player Statistics Aggregator")
    print("="*40)
    
    try:
        # Let user choose between interactive and manual mode
        while True:
            mode = input("\nChoose mode:\n1. Interactive (enter configuration now)\n2. Manual (use creds.py and constants)\nEnter choice (1 or 2): ").strip()
            if mode in ('1', '2'):
                break
            print("Invalid choice. Please enter 1 or 2.")
        
        if mode == '1':
            config = get_config_interactive()
        else:
            config = get_config_manual()
        
        run_with_config(config)
        
    except KeyboardInterrupt:
        print("\n\nProcess interrupted by user. Exiting...")
    except Exception as e:
        print(f"\nAn unexpected error occurred: {e}")
    finally:
        print("\nThank you for using the NBA Player Statistics Aggregator!")

if __name__ == "__main__":
    main()

Full Script
===========

In [None]:
import os
import time
import csv
from datetime import datetime
import requests
from typing import Dict, List, Optional

class NBADataFetcher:
    """Handles all NBA API interactions with proper error handling and rate limiting."""
    
    def __init__(self, api_key: str):
        """Initialize the fetcher with API credentials."""
        self.headers = {
            "X-RapidAPI-Key": api_key,
            "X-RapidAPI-Host": "api-nba-v1.p.rapidapi.com"
        }
    
    def _make_api_request(self, endpoint: str, params: Dict = None) -> Optional[Dict]:
        """Generic API request handler with error handling and retries."""
        url = f"https://api-nba-v1.p.rapidapi.com/{endpoint}"
        
        try:
            response = requests.get(url, headers=self.headers, params=params, timeout=15)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.RequestException as e:
            print(f"API request failed: {e}")
            return None
    
    def get_team_id(self, team_name: str) -> Optional[int]:
        """Find team ID by name with fuzzy matching."""
        data = self._make_api_request("teams")
        if not data or "response" not in data:
            return None
            
        for team in data["response"]:
            if team_name.lower() in team["name"].lower():
                print(f"Found team: {team['name']} (ID: {team['id']})")
                return team["id"]
        return None
    
    def get_team_players(self, team_id: int, season: str) -> List[Dict]:
        """Retrieve all players for a given team and season."""
        params = {"team": team_id, "season": season}
        data = self._make_api_request("players", params)
        return data.get("response", []) if data else []
    
    def get_player_stats(self, player_id: int, player_name: str, season: str) -> Optional[Dict]:
        """Fetch and calculate season averages for a player."""
        params = {"id": player_id, "season": season}
        data = self._make_api_request("players/statistics", params)
        if not data or "response" not in data:
            return None
            
        game_logs = data["response"]
        return self._calculate_averages(game_logs, player_name)
    
    def _calculate_averages(self, game_logs: List[Dict], player_name: str) -> Dict:
        """Calculate season averages from game logs with validation."""
        played_games = [g for g in game_logs if g.get("min") not in [None, "00:00", ""]]
        games_played = len(played_games)
        
        if games_played == 0:
            print(f"No played games found for {player_name}")
            return None
            
        try:
            points = sum(float(g.get("points", 0)) for g in played_games)
            rebounds = sum(float(g.get("totReb", 0)) for g in played_games)
            assists = sum(float(g.get("assists", 0)) for g in played_games)
            fgm = sum(float(g.get("fgm", 0)) for g in played_games)
            fga = sum(float(g.get("fga", 0)) for g in played_games)
            
            return {
                "name": player_name,
                "ppg": round(points / games_played, 1),
                "rpg": round(rebounds / games_played, 1),
                "apg": round(assists / games_played, 1),
                "fg_pct": round(fgm / fga, 3) if fga > 0 else 0.0,
                "games_played": games_played,
                "total_games": len(game_logs),
                "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            }
        except (TypeError, ValueError) as e:
            print(f"Error calculating stats for {player_name}: {e}")
            return None

class DataExporter:
    """Handles data export operations with proper file management."""
    
    @staticmethod
    def save_to_csv(data: List[Dict], filename: str, output_dir: str = "output") -> bool:
        """Save player statistics to CSV with error handling."""
        if not data:
            print("No data to save")
            return False
            
        fieldnames = [
            "name", "ppg", "rpg", "apg", "fg_pct", 
            "games_played", "total_games", "last_updated"
        ]
        
        try:
            os.makedirs(output_dir, exist_ok=True)
            filepath = os.path.join(output_dir, filename)
            with open(filepath, "w", newline="", encoding="utf-8") as f:
                writer = csv.DictWriter(f, fieldnames=fieldnames)
                writer.writeheader()
                writer.writerows(data)
            print(f"Successfully saved data to {filepath}")
            return True
        except (IOError, csv.Error) as e:
            print(f"Failed to save CSV: {e}")
            return False

def get_config_interactive() -> Dict:
    """Collect and validate all user input parameters for interactive mode."""
    print("\nNBA Player Statistics Aggregator - Interactive Configuration")
    print("="*60)
    
    config = {}
    
    # Get API Key
    while True:
        config['api_key'] = input("\nEnter your RapidAPI key: ").strip()
        if config['api_key']:
            break
        print("API key cannot be empty. Please try again.")
    
    # Get Team Name
    while True:
        config['team_name'] = input("\nEnter team name (e.g., 'Lakers', 'Warriors'): ").strip()
        if config['team_name']:
            break
        print("Team name cannot be empty. Please try again.")
    
    # Get Season Year
    while True:
        season = input("\nEnter season year (e.g., 2023 for 2022-23 season): ").strip()
        if season.isdigit() and len(season) == 4:
            config['season'] = season
            break
        print("Invalid season. Please enter a 4-digit year (e.g., 2023).")
    
    # Get API Delay
    while True:
        delay = input("\nEnter API delay in seconds (recommended 1-5 to avoid rate limiting): ").strip()
        try:
            config['delay'] = max(0.5, float(delay))  # Minimum 0.5 second delay
            break
        except ValueError:
            print("Invalid delay. Please enter a number (e.g., 2 for 2 seconds).")
    
    # Get Output Directory
    config['output_dir'] = input("\nEnter output directory (press Enter for default 'output'): ").strip()
    if not config['output_dir']:
        config['output_dir'] = "output"
    
    return config

def get_config_manual() -> Dict:
    """Load configuration from creds.py and constants for manual mode."""
    try:
        import creds
        api_key = creds.RAPIDAPI_KEY
    except (ImportError, AttributeError) as e:
        raise ValueError("Missing or invalid creds.py file. Please create one with your RapidAPI key.") from e
    
    # These would be defined at the top of the file in manual mode
    config = {
        'api_key': api_key,
        'team_name': "Lakers",  # TEAM_NAME constant
        'season': "2023",       # SEASON constant
        'delay': 6,             # DELAY constant
        'output_dir': "output"  # OUTPUT_DIR constant
    }
    
    return config

def run_with_config(config: Dict):
    """Main execution flow using the provided configuration."""
    print(f"\nNBA Player Statistics Aggregator - {config['team_name']} {config['season']}")
    print(f"Starting at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    
    # Initialize components
    fetcher = NBADataFetcher(config['api_key'])
    exporter = DataExporter()
    
    # Get team ID
    print(f"\nSearching for team: {config['team_name']}...")
    team_id = fetcher.get_team_id(config['team_name'])
    if not team_id:
        print(f"Could not find team: {config['team_name']}")
        print("Please check the team name and try again.")
        return
    
    # Get players
    print(f"\nFetching players for {config['team_name']} {config['season']} season...")
    players = fetcher.get_team_players(team_id, config['season'])
    if not players:
        print("No players found for team")
        return
    
    print(f"\nProcessing {len(players)} players...")
    
    # Process each player
    player_stats = []
    for i, player in enumerate(players, 1):
        first_name = player.get("firstname", "").strip()
        last_name = player.get("lastname", "").strip()
        player_name = f"{first_name} {last_name}" if first_name or last_name else "Unknown Player"
        player_id = player.get("id")
        
        if not player_id:
            print(f"Skipping player with missing ID: {player_name}")
            continue
            
        print(f"\n{i}/{len(players)}: Processing {player_name}...")
        
        stats = fetcher.get_player_stats(player_id, player_name, config['season'])
        if stats:
            player_stats.append(stats)
            print(f"  → {stats['ppg']} PPG, {stats['rpg']} RPG, {stats['apg']} APG")
            print(f"  → FG%: {stats['fg_pct']:.1%} ({stats['games_played']}/{stats['total_games']} games)")
        
        # Respect rate limits
        if i < len(players):
            time.sleep(config['delay'])
    
    # Export results
    if player_stats:
        filename = f"{config['team_name'].lower().replace(' ', '_')}_{config['season']}_stats.csv"
        exporter.save_to_csv(player_stats, filename, config['output_dir'])
        
        print("\nProcess completed successfully!")
        print(f"\nSummary:")
        print(f"- Team: {config['team_name']}")
        print(f"- Season: {config['season']}")
        print(f"- Players processed: {len(player_stats)}/{len(players)}")
        print(f"- Output file: {os.path.join(config['output_dir'], filename)}")
    else:
        print("\nProcess completed but no valid player statistics were collected.")

def main():
    """Main entry point with mode selection."""
    print("\nNBA Player Statistics Aggregator")
    print("="*40)
    
    try:
        # Let user choose between interactive and manual mode
        while True:
            mode = input("\nChoose mode:\n1. Interactive (enter configuration now)\n2. Manual (use creds.py and constants)\nEnter choice (1 or 2): ").strip()
            if mode in ('1', '2'):
                break
            print("Invalid choice. Please enter 1 or 2.")
        
        if mode == '1':
            config = get_config_interactive()
        else:
            config = get_config_manual()
        
        run_with_config(config)
        
    except KeyboardInterrupt:
        print("\n\nProcess interrupted by user. Exiting...")
    except Exception as e:
        print(f"\nAn unexpected error occurred: {e}")
    finally:
        print("\nThank you for using the NBA Player Statistics Aggregator!")

if __name__ == "__main__":
    main()