In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
import csv

nfl_data_path = "output_data/nfl.csv"

nfl_data = pd.read_csv(nfl_data_path, encoding="latin1")
nfl_data

Unnamed: 0.1,Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,raw_score_difference (home-away),team_away,...,favourite_score_difference,spread_favorite,win_loss,over_under_line,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail
0,10809,9/5/2013,2013,1,False,DEN,49,27,22,BAL,...,-22,-7.5,True,49.5,Sports Authority Field at Mile High,False,83.0,7.0,30.0,
1,10810,9/8/2013,2013,1,False,BUF,21,23,-2,NE,...,-2,-10.5,False,51.5,Ralph Wilson Stadium,False,65.0,6.0,55.0,
2,10811,9/8/2013,2013,1,False,CAR,7,12,-5,SEA,...,-5,-3.5,True,45.0,Bank of America Stadium,False,87.0,7.0,48.0,
3,10812,9/8/2013,2013,1,False,CHI,24,21,3,CIN,...,-3,-3.0,False,42.0,Soldier Field,False,77.0,13.0,79.0,
4,10813,9/8/2013,2013,1,False,CLE,10,23,-13,MIA,...,13,-2.5,False,40.5,FirstEnergy Stadium,False,72.0,16.0,80.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2692,13511,1/22/2023,2022,Division,True,BUF,10,27,-17,CIN,...,17,-6.0,False,48.5,Highmark Stadium,False,32.0,4.0,100.0,snow
2693,13512,1/22/2023,2022,Division,True,SF,19,12,7,DAL,...,-7,-3.5,True,46.5,Levi's Stadium,False,55.0,19.0,47.0,
2694,13513,1/29/2023,2022,Conference,True,KC,23,20,3,CIN,...,-3,-1.5,True,48.0,GEHA Field at Arrowhead Stadium,False,22.0,13.0,55.0,
2695,13514,1/29/2023,2022,Conference,True,PHI,31,7,24,SF,...,-24,-2.5,True,45.5,Lincoln Financial Field,False,52.0,14.0,48.0,rain


In [2]:
# Separate columns needed
nfl_data = nfl_data[["schedule_season","team_home", "score_home", "score_away", "raw_score_difference (home-away)", "team_away", "team_favorite_id", "favourite_direction", "favourite_score_difference", "spread_favorite", "win_loss","weather_temperature", "weather_wind_mph"]].copy()

# Rename multiple columns
nfl_data_clean = nfl_data.rename(columns={'schedule_season': 'Season', 'team_home': 'Home Team', 'score_home': 'Home Team Score','score_away': 'Away Team Score', 'raw_score_difference (home-away)': 'Score Difference', 'team_away': 'Away Team ID', 'team_favorite_id': 'Team Favourite ID', 
                                          'favourite_direction': 'Home/Away Direction', 'favourite_score_difference':'Home/Away Direction Diff','spread_favorite':'Spread Favourite', 'win_loss':'Win/Loss','weather_temperature' : 'Temperature (°F)', 'weather_wind_mph' : 'Wind (MPH)'})

nfl_data_clean

Unnamed: 0,Season,Home Team,Home Team Score,Away Team Score,Score Difference,Away Team ID,Team Favourite ID,Home/Away Direction,Home/Away Direction Diff,Spread Favourite,Win/Loss,Temperature (°F),Wind (MPH)
0,2013,DEN,49,27,22,BAL,DEN,Home,-22,-7.5,True,83.0,7.0
1,2013,BUF,21,23,-2,NE,NE,Away,-2,-10.5,False,65.0,6.0
2,2013,CAR,7,12,-5,SEA,SEA,Away,-5,-3.5,True,87.0,7.0
3,2013,CHI,24,21,3,CIN,CHI,Home,-3,-3.0,False,77.0,13.0
4,2013,CLE,10,23,-13,MIA,CLE,Home,13,-2.5,False,72.0,16.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2692,2022,BUF,10,27,-17,CIN,BUF,Home,17,-6.0,False,32.0,4.0
2693,2022,SF,19,12,7,DAL,SF,Home,-7,-3.5,True,55.0,19.0
2694,2022,KC,23,20,3,CIN,KC,Home,-3,-1.5,True,22.0,13.0
2695,2022,PHI,31,7,24,SF,PHI,Home,-24,-2.5,True,52.0,14.0


In [3]:
# Determine whether it was a win or loss for the Spread Favourite
nfl_data_clean["Win/Loss"] = nfl_data_clean["Win/Loss"].apply(lambda x: 'Win' if x==True else 'Loss')
nfl_data_clean

Unnamed: 0,Season,Home Team,Home Team Score,Away Team Score,Score Difference,Away Team ID,Team Favourite ID,Home/Away Direction,Home/Away Direction Diff,Spread Favourite,Win/Loss,Temperature (°F),Wind (MPH)
0,2013,DEN,49,27,22,BAL,DEN,Home,-22,-7.5,Win,83.0,7.0
1,2013,BUF,21,23,-2,NE,NE,Away,-2,-10.5,Loss,65.0,6.0
2,2013,CAR,7,12,-5,SEA,SEA,Away,-5,-3.5,Win,87.0,7.0
3,2013,CHI,24,21,3,CIN,CHI,Home,-3,-3.0,Loss,77.0,13.0
4,2013,CLE,10,23,-13,MIA,CLE,Home,13,-2.5,Loss,72.0,16.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2692,2022,BUF,10,27,-17,CIN,BUF,Home,17,-6.0,Loss,32.0,4.0
2693,2022,SF,19,12,7,DAL,SF,Home,-7,-3.5,Win,55.0,19.0
2694,2022,KC,23,20,3,CIN,KC,Home,-3,-1.5,Win,22.0,13.0
2695,2022,PHI,31,7,24,SF,PHI,Home,-24,-2.5,Win,52.0,14.0


In [4]:
# Separate columns needed
nfl_data_clean1 = nfl_data_clean[["Season","Win/Loss","Temperature (°F)", "Wind (MPH)"]].copy()
nfl_data_clean1

Unnamed: 0,Season,Win/Loss,Temperature (°F),Wind (MPH)
0,2013,Win,83.0,7.0
1,2013,Loss,65.0,6.0
2,2013,Win,87.0,7.0
3,2013,Loss,77.0,13.0
4,2013,Loss,72.0,16.0
...,...,...,...,...
2692,2022,Loss,32.0,4.0
2693,2022,Win,55.0,19.0
2694,2022,Win,22.0,13.0
2695,2022,Win,52.0,14.0


In [5]:
# Calculate how many games were a Win or a Loss for each NFL Season
win_loss_weather_count = nfl_data_clean1.groupby(["Season", "Win/Loss", "Temperature (°F)", "Wind (MPH)"]).count()
win_loss_weather_count

Season,Win/Loss,Temperature (°F),Wind (MPH)
2013,Loss,9.0,9.0
2013,Loss,20.0,10.0
2013,Loss,23.0,5.0
2013,Loss,23.0,22.0
2013,Loss,25.0,8.0
...,...,...,...
2022,Win,52.0,14.0
2022,Win,55.0,19.0
2022,Win,57.0,11.0
2022,Win,58.0,2.0


In [6]:
# Analysis to see if Temperature (°F) affects Win/Loss
# Summary statistics table for weather metrics of Favourite Spread 
weather_agg = win_loss_count.groupby(["Win/Loss"])[["Temperature (°F)", "Wind (MPH)"]].agg(["mean", "median", "var", "std", "sem"], skipna=False)
weather_agg

# Average Temp = 62.91(°F) (17.17 Celsius), Min Temp = -6.00(°F) (-21.11 Celsius), Max Temp = 97.00(°F) (36.11 Celsius) 

KeyError: "Columns not found: 'Wind (MPH)', 'Temperature (°F)'"

In [None]:
# Import seaborn
import seaborn as sns

# Create a Bar Chart
plt.figure(figsize=(10.5,5))
sns.set_palette("Set1")
ax = sns.barplot(data= win_loss_count, x= "Season", y= "Spread Favourite", hue="Win/Loss")
sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1))
plt.savefig("winlossbar.png")

In [None]:
win_loss_team = nfl_data_clean.groupby(["Home Team", "Win/Loss"]).count()
win_loss_team = nfl_data_clean.groupby(["Home Team", "Win/Loss"]).count()["Spread Favourite"].reset_index()
win_loss_team

In [None]:
# Import seaborn
import seaborn as sns

# Create a Bar Chart
plt.figure(figsize=(14,5))

plt.xticks(rotation=75)

sns.set_palette("Set3")

sns.barplot(data= win_loss_team, x="Home Team", y="Spread Favourite", hue="Win/Loss")
sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1))
plt.savefig("hometeam.png")

In [None]:
# Track the max and min values
max_value = float('-inf')
min_value = float('inf')

# Iterate over the values in the column
for value in win_loss_team['Spread Favourite']:
    if value > max_value:
        max_value = value
    if value < min_value:
        min_value = value

# Print the max and min values
print("Maximum value:", max_value)
print("Minimum value:", min_value)

In [None]:
max_team = win_loss_team.groupby(["Home Team"]).max()["Spread Favourite"]
max_team

In [None]:
min_team = win_loss_team.groupby(["Home Team"]).min()["Spread Favourite"]
min_team

In [None]:
win_loss_team = nfl_data_clean.groupby(["Away Team ID", "Win/Loss"]).count()
win_loss_team = nfl_data_clean.groupby(["Away Team ID", "Win/Loss"]).count()["Spread Favourite"].reset_index()
win_loss_team

In [None]:
# Import seaborn
import seaborn as sns

# Create a Bar Chart
plt.figure(figsize=(15,5))

plt.xticks(rotation=75)
sns.set_palette("Set1")

sns.barplot(data= win_loss_team, x="Away Team ID", y="Spread Favourite", hue="Win/Loss")
sns.move_legend(ax, "upper right", bbox_to_anchor=(1, 1))
plt.savefig("awayteam.png")

In [None]:
# Track the max and min values
max_value = float('-inf')
min_value = float('inf')

# Iterate over the values in the column
for value in win_loss_team['Spread Favourite']:
    if value > max_value:
        max_value = value
    if value < min_value:
        min_value = value

# Print the max and min values
print("Maximum value:", max_value)
print("Minimum value:", min_value)

In [None]:
max_team = win_loss_team.groupby(["Away Team ID"]).max()["Spread Favourite"]
max_team

In [None]:
min_team = win_loss_team.groupby(["Away Team ID"]).min()["Spread Favourite"]
min_team

In [None]:
nfl_data.describe()

In [None]:
nfl_data.head()

In [None]:
nfl_data.tail()

In [None]:
nfl_data.dropna (inplace=True)
nfl_data