<a href="https://colab.research.google.com/github/bhaveshasasik/nfl_game_predictor/blob/main/Random_forest_FINAL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import os

In [29]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
import pandas as pd

def preprocess_all_files(file_paths):
    # Combine all team data into one DataFrame
    all_data = []
    for file_path in file_paths:
        data = pd.read_csv(file_path)

        # Preprocess the file similar to preprocess_schedule
        data = data[data['Unnamed: 0'] != 'Week']  # Remove unnecessary rows
        data.rename(columns={
            'Score': 'Team_Points',
            'Score.1': 'Opponent_Points',
            'Offense': 'Team_TotalYards',
            'Offense.1': 'Team_PassYards',
            'Offense.2': 'Team_RushYards',
            'Offense.3': 'Team_Turnovers',
            'Defense': 'Opponent_TotalYards',
            'Defense.1': 'Opponent_PassYards',
            'Defense.2': 'Opponent_RushYards',
            'Defense.3': 'Opponent_Turnovers'
        }, inplace=True)
        numeric_cols = [
            'Team_Points', 'Opponent_Points', 'Team_TotalYards', 'Team_PassYards',
            'Team_RushYards', 'Team_Turnovers', 'Opponent_TotalYards',
            'Opponent_PassYards', 'Opponent_RushYards', 'Opponent_Turnovers'
        ]
        for col in numeric_cols:
            data[col] = pd.to_numeric(data[col], errors='coerce')
        data = data.dropna(subset=numeric_cols)

        # Add team and opponent identifiers
        data['Team'] = file_path.split('/')[-1].split('_')[0]  # Extract team name from filename
        all_data.append(data)

    combined_data = pd.concat(all_data, ignore_index=True)
    return combined_data

def train_combined_model(combined_data):
    # Prepare features and labels
    features = ['Team_TotalYards', 'Team_PassYards', 'Team_RushYards', 'Team_Turnovers',
                'Opponent_TotalYards', 'Opponent_PassYards', 'Opponent_RushYards', 'Opponent_Turnovers']
    combined_data['Outcome'] = (combined_data['Team_Points'] > combined_data['Opponent_Points']).astype(int)
    X = combined_data[features]
    y = combined_data['Outcome']

    # Normalize features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    # Train model
    model = RandomForestClassifier(random_state=42)
    model.fit(X, y)
    return model, combined_data, scaler

def predict_matchup(model, combined_data, scaler, team_a, team_b):
    # Get stats for the two teams
    team_a_stats = combined_data[combined_data['Team'] == team_a].iloc[0][
        ['Team_TotalYards', 'Team_PassYards', 'Team_RushYards', 'Team_Turnovers']
    ].tolist()
    team_b_stats = combined_data[combined_data['Team'] == team_b].iloc[0][
        ['Team_TotalYards', 'Team_PassYards', 'Team_RushYards', 'Team_Turnovers']
    ].tolist()

    # Combine stats for prediction
    matchup_stats = pd.DataFrame([team_a_stats + team_b_stats], columns=[
        'Team_TotalYards', 'Team_PassYards', 'Team_RushYards', 'Team_Turnovers',
        'Opponent_TotalYards', 'Opponent_PassYards', 'Opponent_RushYards', 'Opponent_Turnovers'
    ])
    matchup_stats = scaler.transform(matchup_stats)

    # Predict outcome
    prediction = model.predict(matchup_stats)
    probability = model.predict_proba(matchup_stats)[0][1]
    outcome = team_a if prediction[0] == 1 else team_b
    print(f"Predicted Winner: {outcome}")
    print(f"Confidence: {probability:.2f}")
    return outcome, probability

file_paths = [
    '../bears_data.csv',
    '../bengals_data.csv',
    '../bills_data.csv',
    '../broncos_data.csv',
    '../browns_data.csv',
    '../buccaneers_data.csv',
    '../cardinals_data.csv',
    '../chargers_data.csv',
    '../chiefs_data.csv',
    '../colts_data.csv',
    '../commanders_data.csv',
    '../cowboys_data.csv',
    '../dolphins_data.csv',
    '../eagles_data.csv',
    '../falcons_data.csv',
    '../giants_data.csv',
    '../jaguars_data.csv',
    '../jets_data.csv',
    '../lions_data.csv',
    '../packers_data.csv',
    '../panthers_data.csv',
    '../patriots_data.csv',
    '../raiders_data.csv',
    '../rams_data.csv',
    '../ravens_data.csv',
    '../saints_data.csv',
    '../seahawks_data.csv',
    '../steelers_data.csv',
    '../texans_data.csv',
    '../titans_data.csv',
    '../vikings_data.csv',
    '../49ers_data.csv'
]

# Preprocess all files and train model
combined_data = preprocess_all_files(file_paths)
model, combined_data, scaler = train_combined_model(combined_data)

# Predict matchup
team_a = "ravens"
team_b = "panthers"
predict_matchup(model, combined_data, scaler, team_a, team_b)


Predicted Winner: ravens
Confidence: 0.61


('ravens', 0.61)