<a href="https://colab.research.google.com/github/bhaveshasasik/nfl_game_predictor/blob/main/Random_Forest_NFL_Game_Predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [26]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier


# Function to find most impactful running back
def rb_func(file_path):
    # Load data
    data = pd.read_csv(file_path, header=1)

    # Rename columns for easy access
    data.columns = [
        'Rk', 'Player', 'Tm', 'Age', 'Pos', 'G', 'GS', 'Att', 'Yds', 'TD',
        '1D', 'Succ%', 'Lng', 'Y/A', 'Y/G', 'Fmb'
    ]

    # Filter for running backs with minimum attempts
    data = data[(data['Pos'] == 'RB') & (data['Att'] >= 60)]

    # Calculate additional metrics
    data['Yards_per_Attempt'] = data['Yds'] / data['Att']
    data['Touchdowns_per_Attempt'] = data['TD'] / data['Att']
    data['Success_Rate'] = data['Succ%'] / 100  # Assuming Succ% is already a percentage

    # Select metrics and normalize
    metrics = ['Yards_per_Attempt', 'Touchdowns_per_Attempt', 'Success_Rate', 'Y/G']
    scaler = MinMaxScaler()
    data[metrics] = scaler.fit_transform(data[metrics])

    # Calculate impact score
    data['Impact_Score'] = (
        0.4 * data['Yards_per_Attempt'] +
        0.3 * data['Touchdowns_per_Attempt'] +
        0.2 * data['Success_Rate'] +
        0.1 * data['Y/G']
    )

    # Get top player per team
    top_players_per_team = (
        data.sort_values(by=['Tm', 'Impact_Score'], ascending=[True, False])
        .groupby('Tm')
        .head(1)
    )

    return top_players_per_team


    # Return impactful players as a dictionary with team names

    #return impactful_players



In [27]:
def get_impactful(top_players):
    impactful_players = {
          row['Tm']: {
              'Position': row['Pos'],
              'Player': row['Player'],
              'Impact_Score': row['Impact_Score']
          }
          for _, row in top_players.iterrows()
      }
    return impactful_players

In [28]:
impactful_rb = rb_func('../2023 rushing stats.csv')

sort_player = get_impactful(impactful_rb)
print(sort_player)



{'2TM': {'Position': 'RB', 'Player': 'Cam Akers', 'Impact_Score': 0.12730514096185735}, 'ARI': {'Position': 'RB', 'Player': 'James Conner', 'Impact_Score': 0.4835199881295264}, 'ATL': {'Position': 'RB', 'Player': 'Bijan Robinson', 'Impact_Score': 0.32359213802566994}, 'BAL': {'Position': 'RB', 'Player': 'Gus Edwards', 'Impact_Score': 0.5150651843467031}, 'BUF': {'Position': 'RB', 'Player': 'Latavius Murray', 'Impact_Score': 0.42558187338512193}, 'CAR': {'Position': 'RB', 'Player': 'Chuba Hubbard', 'Impact_Score': 0.2940749858651862}, 'CHI': {'Position': 'RB', 'Player': "D'Onta Foreman", 'Impact_Score': 0.39237034173318275}, 'CIN': {'Position': 'RB', 'Player': 'Joe Mixon', 'Impact_Score': 0.385267004982589}, 'CLE': {'Position': 'RB', 'Player': 'Kareem Hunt', 'Impact_Score': 0.3494676170360179}, 'DAL': {'Position': 'RB', 'Player': 'Tony Pollard', 'Impact_Score': 0.34571976049029}, 'DEN': {'Position': 'RB', 'Player': 'Jaleel McLaughlin', 'Impact_Score': 0.4068734918398893}, 'DET': {'Posit

In [None]:
def combine_impact_scores(qb_func, rb_func, wr_func, te_func, sft_func, cb_lb_func):
    """
    Combines impact scores from multiple position-specific functions into one dictionary.

    Parameters:
        qb_func (function): Function to calculate QB impact scores.
        rb_func (function): Function to calculate RB impact scores.
        wr_func (function): Function to calculate WR impact scores.
        te_func (function): Function to calculate TE impact scores.
        sft_func (function): Function to calculate SFT impact scores.
        cb_lb_func (function): Function to calculate CB/LB impact scores.

    Returns:
        dict: Combined dictionary with team-wise impact scores for all positions.
    """
    # Call each position-specific function to get their impact dictionaries
    qb_scores = qb_func()
    rb_scores = rb_func()
    wr_scores = wr_func()
    te_scores = te_func()
    sft_scores = sft_func()
    cb_lb_scores = cb_lb_func()

    # List of all score dictionaries
    all_scores = [qb_scores, rb_scores, wr_scores, te_scores, sft_scores, cb_lb_scores]

    # Initialize combined dictionary
    combined_scores = {}

    # Merge dictionaries
    for scores in all_scores:
        for team, player_data in scores.items():
            if team not in combined_scores:
                combined_scores[team] = []
            combined_scores[team].append(player_data)

    return combined_scores


In [None]:

# Function to process general team data (standings and win/loss records)
def process_team_standings(file_path):
    # Load data
    standings = pd.read_csv(file_path)

    # Calculate win percentage
    standings['Win_Percentage'] = standings['Wins'] / (standings['Wins'] + standings['Losses'])

    # Normalize win percentage
    scaler = MinMaxScaler()
    standings['Win_Percentage_Normalized'] = scaler.fit_transform(standings[['Win_Percentage']])

    # Return standings data
    team_data = standings[['Team', 'Win_Percentage_Normalized']].set_index('Team').to_dict('index')
    return team_data

# Combine impact scores and general team data
def combine_team_data(rb_impact_data, team_data):
    combined_data = []
    for team, rb_info in rb_impact_data.items():
        if team in team_data:
            combined_data.append({
                'Team': team,
                'Impact_Score': rb_info['Impact_Score'],
                'Win_Percentage': team_data[team]['Win_Percentage_Normalized']
            })
    return pd.DataFrame(combined_data)






# Random forest model
def train_random_forest(data):
    # Prepare features and labels
    X = data[['Impact_Score', 'Win_Percentage']]
    y = data['Outcome']  # Binary outcome: 1 = Win, 0 = Loss

    # Train random forest
    rf = RandomForestClassifier(random_state=42)
    rf.fit(X, y)

    return rf

# Example Usage
if __name__ == "__main__":
    # Calculate RB impact
    rb_file_path = '2023_rushing_stats.csv'  # Path to rushing stats file
    rb_impact = calculate_top_rb_impact(rb_file_path)
    print("RB Impact:", rb_impact)

    # Process team standings
    standings_file_path = 'team_standings.csv'  # Path to standings file
    team_standings = process_team_standings(standings_file_path)
    print("Team Standings:", team_standings)

    # Combine data
    combined_data = combine_team_data(rb_impact, team_standings)
    print("Combined Data:", combined_data)

    # Train random forest (assuming Outcome column is present in combined_data)
    rf_model = train_random_forest(combined_data)
    print("Random Forest Model Trained.")
