In [5]:
import pandas as pd
import numpy as np
import os
from typing import Dict, List, Tuple, Optional
from IPython.display import display, HTML

In [17]:
class NFLRoleAnalyzer:
    def __init__(self, rosters_folder: str = "Rosters", 
                 player_logs_folder: str = "Player_Logs",
                 output_folder: str = "Player_Roles"):
        self.rosters_folder = rosters_folder
        self.player_logs_folder = player_logs_folder
        self.output_folder = output_folder
        
        # Create output folder if it doesn't exist
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)
            print(f"Created output folder: {output_folder}")
        
        # Position groupings with their relevant statistics
        self.position_stats = {
            'QB': ['Cmp', 'Att', 'Yds', 'TD', 'Int', 'Rate'],
            'RB': ['Att', 'Yds', 'Y/A', 'TD', 'Tgt', 'Rec', 'Yds.1', 'Y/R', 'TD.1', 'Ctch%', 'Fmb', 'FL', 'GS'],
            'WR': ['Tgt', 'Rec', 'Yds', 'Y/R', 'TD', 'Ctch%', 'Fmb', 'FL'],
            'TE': ['Tgt', 'Rec', 'Yds', 'Y/R', 'TD', 'Ctch%', 'Fmb', 'FL'],
            'OL': ['GS', 'G'],
            'DL': ['Sk', 'Solo', 'Ast', 'Comb', 'TFL', 'QBHits', 'FF', 'FR'],
            'LB': ['Sk', 'Solo', 'Ast', 'Comb', 'TFL', 'QBHits', 'FF', 'FR'],
            'DB': ['Int', 'Solo', 'Ast', 'Comb', 'PD', 'FF', 'FR'],
            'K': ['FGA', 'FGM', 'XPA', 'XPM'],
            'P': ['Punts', 'Yds', 'Avg', 'In20']
        }
        
        self.stat_labels = {
            'QB': ['Completions', 'Attempts', 'Passing Yards', 'Passing TDs', 'Interceptions', 'Passer Rating'],
            'RB': ['Rushing Attempts', 'Rushing Yards', 'Yards per Attempt', 'Rushing TDs', 
                  'Targets', 'Receptions', 'Receiving Yards', 'Yards per Reception', 'Receiving TDs', 
                  'Catch Percentage', 'Fumbles', 'Fumbles Lost', 'Games Started'],
            'WR': ['Targets', 'Receptions', 'Receiving Yards', 'Yards per Reception', 
                  'Receiving TDs', 'Catch Percentage', 'Fumbles', 'Fumbles Lost'],
            'TE': ['Targets', 'Receptions', 'Receiving Yards', 'Yards per Reception', 
                  'Receiving TDs', 'Catch Percentage', 'Fumbles', 'Fumbles Lost']
        }
        
        # Status values to treat as inactive
        self.inactive_statuses = ['Inactive', 'Injured Reserve', 'Did Not Play']

    def get_available_positions(self) -> List[str]:
        """Return list of available positions to analyze."""
        return list(self.position_stats.keys())

    def clean_stat_value(self, value) -> float:
        """Clean and convert stat values to float, handling percentages and empty values."""
        if pd.isna(value) or value == '' or value in self.inactive_statuses:
            return 0.0
        if isinstance(value, str):
            # Remove percentage signs and convert to float
            value = value.strip('%')
            try:
                return float(value)
            except ValueError:
                return 0.0
        return float(value)

    def analyze_selected_role(self, position: str, update_existing: bool = True) -> str:
        """Analyze a specific role/position and save to CSV."""
        if position not in self.position_stats:
            print(f"Invalid position selected. Available positions are: {', '.join(self.get_available_positions())}")
            return ""

        print(f"\nAnalyzing {position} players...")
        
        all_player_data = []
        roster_files = [f for f in os.listdir(self.rosters_folder) if f.endswith('_roster.csv')]
        
        for roster_file in roster_files:
            team_name = roster_file.split('_')[0]
            roster_path = os.path.join(self.rosters_folder, roster_file)
            
            try:
                # Read roster
                roster = pd.read_csv(roster_path)
                players = roster[roster['Pos'].str.contains(position, case=False, na=False)]
                
                # Process each player
                for _, player in players.iterrows():
                    player_name = player['Player']
                    gamelog_path = os.path.join(self.player_logs_folder, team_name, f"{player_name}_gamelog.csv")
                    
                    if os.path.exists(gamelog_path):
                        try:
                            # Read gamelog
                            gamelog = pd.read_csv(gamelog_path)
                            
                            # Filter out inactive games
                            active_games = gamelog[~gamelog['Status'].isin(self.inactive_statuses)] if 'Status' in gamelog.columns else gamelog
                            
                            # Calculate stats
                            stats = {}
                            relevant_stats = self.position_stats[position]
                            
                            for stat in relevant_stats:
                                if stat in gamelog.columns:
                                    # Convert to numeric, replacing errors with 0
                                    values = [self.clean_stat_value(v) for v in gamelog[stat]]
                                    values = [v for v in values if v is not None]
                                    
                                    if values:
                                        stats[f"{stat}_total"] = sum(values)
                                        stats[f"{stat}_avg"] = sum(values) / len(values)
                                        stats[f"{stat}_max"] = max(values)
                                    else:
                                        stats[f"{stat}_total"] = 0
                                        stats[f"{stat}_avg"] = 0
                                        stats[f"{stat}_max"] = 0
                            
                            # Add player info
                            player_data = {
                                'Team': team_name,
                                'Player': player_name,
                                'Position': player['Pos'],
                                'Age': player['Age'],
                                'Experience': player['Yrs'],
                                'College': player['College/Univ'],
                                'Games_Played': len(active_games)
                            }
                            player_data.update(stats)
                            
                            all_player_data.append(player_data)
                        
                        except Exception as e:
                            print(f"Error processing {player_name}: {e}")
                            continue
            
            except Exception as e:
                print(f"Error processing {team_name}: {e}")
                continue
        
        if not all_player_data:
            print(f"No data found for position: {position}")
            return ""
        
        # Create DataFrame
        df = pd.DataFrame(all_player_data)
        
        # Sort by most relevant stat for the position
        sort_columns = {
            'QB': 'Yds_total',
            'RB': 'Yds_total',  # Rushing yards
            'WR': 'Rec_total',
            'TE': 'Rec_total',
            'DL': 'Sk_total',
            'LB': 'Comb_total',
            'DB': 'Int_total'
        }
        
        if position in sort_columns and sort_columns[position] in df.columns:
            df = df.sort_values(sort_columns[position], ascending=False)
        
        # Save to CSV
        if update_existing:
            filename = f"{position}_analysis.csv"
        else:
            timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
            filename = f"{position}_analysis_{timestamp}.csv"
        
        filepath = os.path.join(self.output_folder, filename)
        df.to_csv(filepath, index=False)
        
        print(f"\nAnalysis {'updated' if update_existing else 'saved'} to: {filepath}")
        print("\nPreview of the data:")
        display(df.head())
        
        return filepath

In [19]:
def analyze_role(role_name: str, update_existing: bool = True):
    """
    Analyze a specific role directly without interactive prompts.
    
    Args:
        role_name (str): The role/position to analyze (e.g., 'QB', 'WR', 'TE', etc.)
        update_existing (bool): If True, updates existing file instead of creating new one
    """
    analyzer = NFLRoleAnalyzer()
    
    # Convert to uppercase for consistency
    role = role_name.upper()
    
    # Validate the role
    available_positions = analyzer.get_available_positions()
    if role not in available_positions:
        print(f"Invalid role: {role}")
        print(f"Available roles are: {', '.join(available_positions)}")
        return
    
    # Analyze the role
    filepath = analyzer.analyze_selected_role(role, update_existing)
    
    if filepath:
        print(f"\nAnalysis complete! Data {'updated' if update_existing else 'saved'} to: {filepath}")
    else:
        print(f"No data was generated for role: {role}")

In [21]:
def display_role_data(role_name: str, show_all: bool = True):
    """Display the full dataset for a specific role."""
    # Convert to uppercase for consistency
    role = role_name.upper()
    
    # Construct filepath
    filepath = os.path.join("Player_Roles", f"{role}_analysis.csv")
    
    if not os.path.exists(filepath):
        print(f"No analysis file found for {role}. Please run analyze_role({role}) first.")
        return
    
    try:
        # Read the CSV file
        df = pd.read_csv(filepath)
        
        if show_all:
            # Set display options to show all rows and columns
            pd.set_option('display.max_rows', None)
            pd.set_option('display.max_columns', None)
            pd.set_option('display.width', None)
            
            print(f"\nFull dataset for {role}:")
            display(df)
            
            # Reset display options to defaults
            pd.reset_option('display.max_rows')
            pd.reset_option('display.max_columns')
            pd.reset_option('display.width')
        else:
            print(f"\nPreview of dataset for {role}:")
            display(df.head())
        
        # Display dataset info
        print("\nDataset Information:")
        print(f"Total Players: {len(df)}")
        print(f"Teams Represented: {len(df['Team'].unique())}")
        print("\nColumns:")
        for col in df.columns:
            print(f"- {col}")
            
    except Exception as e:
        print(f"Error reading file: {e}")

In [25]:
# Example usage:
if __name__ == "__main__":
    # Can be used in either of these ways:
    role_name = "WR"  # Options: QB, RB, WR, TE, OL, DL, LB, DB, K, P, LS
    analyze_role(role_name)

    # Display the full dataset
    display_role_data(role_name, show_all=True)


Analyzing WR players...

Analysis updated to: Player_Roles\WR_analysis.csv

Preview of the data:


Unnamed: 0,Team,Player,Position,Age,Experience,College,Games_Played,Tgt_total,Tgt_avg,Tgt_max,...,TD_max,Ctch%_total,Ctch%_avg,Ctch%_max,Fmb_total,Fmb_avg,Fmb_max,FL_total,FL_avg,FL_max
0,Bears,Keenan Allen,WR,32.0,11,California,147,1357.0,7.218085,20.0,...,2.0,9927.6,52.806383,100.0,16.0,0.085106,2.0,10.0,0.053191,2.0
7,Commanders,Terry McLaurin,WR,29.0,5,Ohio St.,91,677.0,7.202128,14.0,...,2.0,5691.8,60.551064,100.0,3.0,0.031915,1.0,2.0,0.021277,1.0
5,Commanders,Noah Brown,WR,28.0,7,Ohio St.,85,237.0,2.135135,11.0,...,2.0,3886.2,35.010811,100.0,1.0,0.009009,1.0,1.0,0.009009,1.0
8,Commanders,Olamide Zaccheaus,WR,27.0,5,Virginia,83,205.0,2.180851,9.0,...,2.0,3870.9,41.179787,100.0,4.0,0.042553,2.0,2.0,0.021277,1.0
1,Bears,DeAndre Carter,WR,31.0,6,Sacramento St.,105,171.0,1.628571,10.0,...,1.0,3872.7,36.882857,100.0,17.0,0.161905,2.0,5.0,0.047619,1.0



Analysis complete! Data updated to: Player_Roles\WR_analysis.csv

Full dataset for WR:


Unnamed: 0,Team,Player,Position,Age,Experience,College,Games_Played,Tgt_total,Tgt_avg,Tgt_max,Rec_total,Rec_avg,Rec_max,Yds_total,Yds_avg,Yds_max,Y/R_total,Y/R_avg,Y/R_max,TD_total,TD_avg,TD_max,Ctch%_total,Ctch%_avg,Ctch%_max,Fmb_total,Fmb_avg,Fmb_max,FL_total,FL_avg,FL_max
0,Bears,Keenan Allen,WR,32.0,11,California,147,1357.0,7.218085,20.0,930.0,4.946809,18.0,10771.0,57.292553,215.0,1709.89,9.09516,27.6,61.0,0.324468,2.0,9927.6,52.806383,100.0,16.0,0.085106,2.0,10.0,0.053191,2.0
1,Commanders,Terry McLaurin,WR,29.0,5,Ohio St.,91,677.0,7.202128,14.0,426.0,4.531915,11.0,6004.0,63.87234,141.0,1271.51,13.526702,29.0,31.0,0.329787,2.0,5691.8,60.551064,100.0,3.0,0.031915,1.0,2.0,0.021277,1.0
2,Commanders,Noah Brown,WR,28.0,7,Ohio St.,85,237.0,2.135135,11.0,141.0,1.27027,8.0,1902.0,17.135135,172.0,660.76,5.952793,25.5,6.0,0.054054,2.0,3886.2,35.010811,100.0,1.0,0.009009,1.0,1.0,0.009009,1.0
3,Commanders,Olamide Zaccheaus,WR,27.0,5,Virginia,83,205.0,2.180851,9.0,127.0,1.351064,8.0,1734.0,18.446809,103.0,813.13,8.650319,93.0,10.0,0.106383,2.0,3870.9,41.179787,100.0,4.0,0.042553,2.0,2.0,0.021277,1.0
4,Bears,DeAndre Carter,WR,31.0,6,Sacramento St.,105,171.0,1.628571,10.0,117.0,1.114286,7.0,1331.0,12.67619,73.0,557.6,5.310476,46.0,6.0,0.057143,1.0,3872.7,36.882857,100.0,17.0,0.161905,2.0,5.0,0.047619,1.0
5,Commanders,Dyami Brown,WR,25.0,3,North Carolina,58,82.0,1.322581,6.0,43.0,0.693548,3.0,625.0,10.080645,105.0,385.66,6.220323,52.5,4.0,0.064516,2.0,2033.3,32.795161,100.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Bears,Rome Odunze,WR,22.0,Rook,Washington,9,50.0,5.555556,11.0,28.0,3.111111,6.0,414.0,46.0,112.0,126.31,14.034444,20.8,1.0,0.111111,1.0,507.5,56.388889,100.0,1.0,0.111111,1.0,0.0,0.0,0.0
7,Bears,Tyler Scott,WR,23.0,1,Cincinnati,23,32.0,1.230769,5.0,17.0,0.653846,3.0,168.0,6.461538,49.0,105.33,4.051154,16.33,0.0,0.0,0.0,698.3,26.857692,100.0,1.0,0.038462,1.0,1.0,0.038462,1.0
8,Commanders,Luke McCaffrey,WR,23.0,Rook,NebraskaRice,11,17.0,1.545455,3.0,13.0,1.181818,3.0,134.0,12.181818,44.0,80.0,7.272727,23.0,0.0,0.0,0.0,583.3,53.027273,100.0,,,,,,



Dataset Information:
Total Players: 9
Teams Represented: 2

Columns:
- Team
- Player
- Position
- Age
- Experience
- College
- Games_Played
- Tgt_total
- Tgt_avg
- Tgt_max
- Rec_total
- Rec_avg
- Rec_max
- Yds_total
- Yds_avg
- Yds_max
- Y/R_total
- Y/R_avg
- Y/R_max
- TD_total
- TD_avg
- TD_max
- Ctch%_total
- Ctch%_avg
- Ctch%_max
- Fmb_total
- Fmb_avg
- Fmb_max
- FL_total
- FL_avg
- FL_max
