<a href="https://colab.research.google.com/github/bhaveshasasik/nfl_game_predictor/blob/main/Random_Forest_NFL_Game_Predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier

# Function to find most impactful running back
def calculate_top_rb_impact(file_path):
    # Load data
    data = pd.read_csv(file_path, header=1)

    # Rename columns for easy access
    data.columns = [
        'Rk', 'Player', 'Tm', 'Age', 'Pos', 'G', 'GS', 'Att', 'Yds', 'TD',
        '1D', 'Succ%', 'Lng', 'Y/A', 'Y/G', 'Fmb'
    ]

    # Filter for running backs with minimum attempts
    data = data[(data['Pos'] == 'RB') & (data['Att'] >= 60)]

    # Calculate additional metrics
    data['Yards_per_Attempt'] = data['Yds'] / data['Att']
    data['Touchdowns_per_Attempt'] = data['TD'] / data['Att']
    data['Success_Rate'] = data['Succ%'] / 100  # Assuming Succ% is already a percentage

    # Select metrics and normalize
    metrics = ['Yards_per_Attempt', 'Touchdowns_per_Attempt', 'Success_Rate', 'Y/G']
    scaler = MinMaxScaler()
    data[metrics] = scaler.fit_transform(data[metrics])

    # Calculate impact score
    data['Impact_Score'] = (
        0.4 * data['Yards_per_Attempt'] +
        0.3 * data['Touchdowns_per_Attempt'] +
        0.2 * data['Success_Rate'] +
        0.1 * data['Y/G']
    )

    # Get top player per team
    top_players_per_team = (
        data.sort_values(by=['Tm', 'Impact_Score'], ascending=[True, False])
        .groupby('Tm')
        .head(1)
    )

    # Return impactful players as a dictionary with team names
    impactful_players = {
        row['Tm']: {
            'Position': row['Pos'],
            'Player': row['Player'],
            'Impact_Score': row['Impact_Score']
        }
        for _, row in top_players_per_team.iterrows()
    }

    return impactful_players


# Function to process general team data (standings and win/loss records)
def process_team_standings(file_path):
    # Load data
    standings = pd.read_csv(file_path)

    # Calculate win percentage
    standings['Win_Percentage'] = standings['Wins'] / (standings['Wins'] + standings['Losses'])

    # Normalize win percentage
    scaler = MinMaxScaler()
    standings['Win_Percentage_Normalized'] = scaler.fit_transform(standings[['Win_Percentage']])

    # Return standings data
    team_data = standings[['Team', 'Win_Percentage_Normalized']].set_index('Team').to_dict('index')
    return team_data

# Combine impact scores and general team data
def combine_team_data(rb_impact_data, team_data):
    combined_data = []
    for team, rb_info in rb_impact_data.items():
        if team in team_data:
            combined_data.append({
                'Team': team,
                'Impact_Score': rb_info['Impact_Score'],
                'Win_Percentage': team_data[team]['Win_Percentage_Normalized']
            })
    return pd.DataFrame(combined_data)


def calculate_team_impact_scores(players_dict):
    # Define arbitrary weights for each position
    position_weights = {
        "QB": 1.5,    # Quarterback
        "WR": 1.2,    # Wide Receiver
        "TE": 1.1,    # Tight End
        "RB": 1.0,    # Running Back
        "SFT": 0.9,   # Safety
        "CB/LB": 0.8  # Cornerback/Linebacker
    }

    # Initialize a dictionary to store team impact scores
    team_impact_scores = {}

    # Iterate over each team in the dictionary
    for team, players in players_dict.items():
        team_score = 0  # Initialize team score

        # Calculate weighted score for each player's position
        for pos, player, impact_score in players:
            weight = position_weights.get(pos, 1)  # Default weight is 1 if position not found
            team_score += impact_score * weight  # Add weighted impact score

        # Store the total impact score for the team
        team_impact_scores[team] = team_score

    return team_impact_scores



# Random forest model
def train_random_forest(data):
    # Prepare features and labels
    X = data[['Impact_Score', 'Win_Percentage']]
    y = data['Outcome']  # Binary outcome: 1 = Win, 0 = Loss

    # Train random forest
    rf = RandomForestClassifier(random_state=42)
    rf.fit(X, y)

    return rf

# Example Usage
if __name__ == "__main__":
    # Calculate RB impact
    rb_file_path = '2023_rushing_stats.csv'  # Path to rushing stats file
    rb_impact = calculate_top_rb_impact(rb_file_path)
    print("RB Impact:", rb_impact)

    # Process team standings
    standings_file_path = 'team_standings.csv'  # Path to standings file
    team_standings = process_team_standings(standings_file_path)
    print("Team Standings:", team_standings)

    # Combine data
    combined_data = combine_team_data(rb_impact, team_standings)
    print("Combined Data:", combined_data)

    # Train random forest (assuming Outcome column is present in combined_data)
    rf_model = train_random_forest(combined_data)
    print("Random Forest Model Trained.")
