In [1]:
import pandas as pd

data = pd.read_csv("../data/nfl_game_stats.csv")

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

data['total_points'] = data['score_away'] + data['score_home']

features = data[['score_away', 'score_home', 'redzone_att_away', 'redzone_att_home',
                 'fumbles_away', 'fumbles_home', 'interceptions_away', 'interceptions_home']]
target = data['total_points']
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

model = RandomForestRegressor(random_state=42, n_estimators=100)
model.fit(X_train, y_train)

predictions = model.predict(X_test)
mae = mean_absolute_error(y_test, predictions)
print(f'Mean Absolute Error: {mae}')



Mean Absolute Error: 0.19687183811129852


In [3]:
def get_projected_total_points(away_team, home_team):
    away_team = away_team.lower()
    home_team = home_team.lower()

    filtered_data = data[(data['away'].str.lower() == away_team) | (data['home'].str.lower() == away_team) |
                         (data['away'].str.lower() == home_team) | (data['home'].str.lower() == home_team)]

    if filtered_data.empty:
        return f"No data found for the teams: {away_team} or {home_team}."

    input_features = filtered_data[['score_away', 'score_home', 'redzone_att_away', 'redzone_att_home',
                                    'fumbles_away', 'fumbles_home', 'interceptions_away', 'interceptions_home']].mean()
    input_features = input_features.values.reshape(1, -1)

    projected_total_points = model.predict(input_features)[0]
    return projected_total_points
away_team = input("Enter the away team: ").capitalize()
home_team = input("Enter the home team: ").capitalize()

result = get_projected_total_points(away_team, home_team)


print(f"The total of {away_team} vs. {home_team} would be: {result:.2f}")

The total of Lions vs. Eagles would be: 46.03


