## Data Cleaning

In [67]:
##ADD YOUR IMPORTS HERE...
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime as dt
import os
import seaborn as sns
import re
import folium
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.graph_objs as go
from jupyter_dash import JupyterDash

In [68]:
spreadspoke_df = pd.read_csv('spreadspoke_scores.csv')
teams_df = pd.read_csv('nfl_teams.csv')
stadiums_df = pd.read_csv('nfl_stadiums.csv', encoding='ISO-8859-1')

In [69]:
#need to make sure that data has non null values for the betting data
spreadspoke_df = spreadspoke_df.dropna(subset=['team_favorite_id','spread_favorite','over_under_line'])

#converting the date column to datetimes
spreadspoke_df['schedule_date'] = pd.to_datetime(spreadspoke_df['schedule_date'], format='%m/%d/%Y')

#dropping games before 2002
spreadspoke_df = spreadspoke_df[spreadspoke_df['schedule_season'] >= 2002]

#spreadspoke_df.head()

In [70]:
#Walker
#Clean up teams_df to remove useless information
teams_df.drop(['team_name_short', 'team_id_pfr', 'team_conference_pre2002', 'team_division_pre2002'], axis=1, inplace=True)
teams_df

Unnamed: 0,team_name,team_id,team_conference,team_division
0,Arizona Cardinals,ARI,NFC,NFC West
1,Atlanta Falcons,ATL,NFC,NFC South
2,Baltimore Colts,IND,AFC,
3,Baltimore Ravens,BAL,AFC,AFC North
4,Boston Patriots,NE,AFC,
5,Buffalo Bills,BUF,AFC,AFC East
6,Carolina Panthers,CAR,NFC,NFC South
7,Chicago Bears,CHI,NFC,NFC North
8,Cincinnati Bengals,CIN,AFC,AFC North
9,Cleveland Browns,CLE,AFC,AFC North


In [71]:
#Create a dictionary mapping team names to IDs
team_id_mapping = dict(zip(teams_df['team_name'], teams_df['team_id']))

#Add columns with team ID tags
spreadspoke_df['team_home_id'] = spreadspoke_df['team_home'].replace(team_id_mapping)
spreadspoke_df['team_away_id'] = spreadspoke_df['team_away'].replace(team_id_mapping)
spreadspoke_df = spreadspoke_df[spreadspoke_df['team_favorite_id'] != 'PICK']
spreadspoke_df.head(5)

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail,team_home_id,team_away_id
7872,2002-09-05,2002,1,False,New York Giants,13.0,16.0,San Francisco 49ers,SF,-4.0,39,Giants Stadium,False,75.0,12.0,58.0,,NYG,SF
7873,2002-09-08,2002,1,False,Buffalo Bills,31.0,37.0,New York Jets,NYJ,-3.0,43,Ralph Wilson Stadium,False,75.0,7.0,50.0,,BUF,NYJ
7875,2002-09-08,2002,1,False,Chicago Bears,27.0,23.0,Minnesota Vikings,CHI,-4.5,41,Memorial Stadium (Champaign),False,76.0,5.0,75.0,,CHI,MIN
7876,2002-09-08,2002,1,False,Cincinnati Bengals,6.0,34.0,San Diego Chargers,CIN,-3.0,37,Paul Brown Stadium,False,81.0,5.0,50.0,,CIN,LAC
7877,2002-09-08,2002,1,False,Cleveland Browns,39.0,40.0,Kansas City Chiefs,CLE,-2.0,36,FirstEnergy Stadium,False,78.0,7.0,54.0,,CLE,KC


In [72]:
# Step 1: Determine the winning team
spreadspoke_df['winning_team'] = spreadspoke_df.apply(lambda row: row['team_home_id'] if row['score_home'] > row['score_away'] else row['team_away_id'], axis=1)

# Step 2: Create the new column
spreadspoke_df['favorite_won'] = spreadspoke_df['team_favorite_id'] == spreadspoke_df['winning_team']

# Prefer to have 'Yes'/'No' instead of True/False:
spreadspoke_df['favorite_won'] = (spreadspoke_df['team_favorite_id'] == spreadspoke_df['winning_team']).map({True: 'Yes', False: 'No'})

spreadspoke_df.head(10)

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail,team_home_id,team_away_id,winning_team,favorite_won
7872,2002-09-05,2002,1,False,New York Giants,13.0,16.0,San Francisco 49ers,SF,-4.0,39.0,Giants Stadium,False,75.0,12.0,58.0,,NYG,SF,SF,Yes
7873,2002-09-08,2002,1,False,Buffalo Bills,31.0,37.0,New York Jets,NYJ,-3.0,43.0,Ralph Wilson Stadium,False,75.0,7.0,50.0,,BUF,NYJ,NYJ,Yes
7875,2002-09-08,2002,1,False,Chicago Bears,27.0,23.0,Minnesota Vikings,CHI,-4.5,41.0,Memorial Stadium (Champaign),False,76.0,5.0,75.0,,CHI,MIN,CHI,Yes
7876,2002-09-08,2002,1,False,Cincinnati Bengals,6.0,34.0,San Diego Chargers,CIN,-3.0,37.0,Paul Brown Stadium,False,81.0,5.0,50.0,,CIN,LAC,LAC,No
7877,2002-09-08,2002,1,False,Cleveland Browns,39.0,40.0,Kansas City Chiefs,CLE,-2.0,36.0,FirstEnergy Stadium,False,78.0,7.0,54.0,,CLE,KC,KC,No
7878,2002-09-08,2002,1,False,Denver Broncos,23.0,16.0,St. Louis Rams,LAR,-3.0,51.0,Sports Authority Field at Mile High,False,73.0,13.0,45.0,,DEN,LAR,DEN,No
7879,2002-09-08,2002,1,False,Green Bay Packers,37.0,34.0,Atlanta Falcons,GB,-7.0,42.5,Lambeau Field,False,72.0,6.0,78.0,,GB,ATL,GB,Yes
7880,2002-09-08,2002,1,False,Houston Texans,19.0,10.0,Dallas Cowboys,DAL,-8.5,33.5,Reliant Stadium,False,72.0,0.0,,indoor,HOU,DAL,HOU,No
7881,2002-09-08,2002,1,False,Jacksonville Jaguars,25.0,28.0,Indianapolis Colts,IND,-3.5,44.0,EverBank Field,False,82.0,14.0,77.0,,JAX,IND,IND,Yes
7882,2002-09-08,2002,1,False,Miami Dolphins,49.0,21.0,Detroit Lions,MIA,-9.5,35.5,Sun Life Stadium,False,83.0,9.0,80.0,,MIA,DET,MIA,Yes


In [73]:
#Convert the values in the three required columns into floats to be compared to one another
spreadspoke_df['score_home'] = spreadspoke_df['score_home'].astype(float)
spreadspoke_df['score_away'] = spreadspoke_df['score_away'].astype(float)
spreadspoke_df['over_under_line'] = spreadspoke_df['over_under_line'].astype(float)

spreadspoke_df['game_score_sum'] = spreadspoke_df['score_home'] + spreadspoke_df['score_away']

spreadspoke_df['over_under_delta'] = spreadspoke_df['game_score_sum'] - spreadspoke_df['over_under_line']

spreadspoke_df['over_under_result'] = spreadspoke_df.apply(lambda row: 
    'Over' if row['game_score_sum'] > row['over_under_line'] 
    else ('Under' if row['game_score_sum'] < row['over_under_line'] 
          else 'Push'), axis=1)
spreadspoke_df.head()

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail,team_home_id,team_away_id,winning_team,favorite_won,game_score_sum,over_under_delta,over_under_result
7872,2002-09-05,2002,1,False,New York Giants,13.0,16.0,San Francisco 49ers,SF,-4.0,39.0,Giants Stadium,False,75.0,12.0,58.0,,NYG,SF,SF,Yes,29.0,-10.0,Under
7873,2002-09-08,2002,1,False,Buffalo Bills,31.0,37.0,New York Jets,NYJ,-3.0,43.0,Ralph Wilson Stadium,False,75.0,7.0,50.0,,BUF,NYJ,NYJ,Yes,68.0,25.0,Over
7875,2002-09-08,2002,1,False,Chicago Bears,27.0,23.0,Minnesota Vikings,CHI,-4.5,41.0,Memorial Stadium (Champaign),False,76.0,5.0,75.0,,CHI,MIN,CHI,Yes,50.0,9.0,Over
7876,2002-09-08,2002,1,False,Cincinnati Bengals,6.0,34.0,San Diego Chargers,CIN,-3.0,37.0,Paul Brown Stadium,False,81.0,5.0,50.0,,CIN,LAC,LAC,No,40.0,3.0,Over
7877,2002-09-08,2002,1,False,Cleveland Browns,39.0,40.0,Kansas City Chiefs,CLE,-2.0,36.0,FirstEnergy Stadium,False,78.0,7.0,54.0,,CLE,KC,KC,No,79.0,43.0,Over


In [74]:
#Define the new order of columns
new_order = ["schedule_date","team_home","score_home","score_away","team_away","team_favorite_id","favorite_won",
             "spread_favorite","over_under_line","over_under_result","over_under_delta","game_score_sum","schedule_season","schedule_playoff","schedule_week","stadium","stadium_neutral",
             "weather_temperature","weather_wind_mph","weather_humidity", "team_home_id", "team_away_id"]

#Rearrange the DataFrame
spreadspoke_df = spreadspoke_df[new_order]
spreadspoke_df = spreadspoke_df.set_index('schedule_date')

pd.set_option('display.max_columns', None)

spreadspoke_df.head()

Unnamed: 0_level_0,team_home,score_home,score_away,team_away,team_favorite_id,favorite_won,spread_favorite,over_under_line,over_under_result,over_under_delta,game_score_sum,schedule_season,schedule_playoff,schedule_week,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,team_home_id,team_away_id
schedule_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2002-09-05,New York Giants,13.0,16.0,San Francisco 49ers,SF,Yes,-4.0,39.0,Under,-10.0,29.0,2002,False,1,Giants Stadium,False,75.0,12.0,58.0,NYG,SF
2002-09-08,Buffalo Bills,31.0,37.0,New York Jets,NYJ,Yes,-3.0,43.0,Over,25.0,68.0,2002,False,1,Ralph Wilson Stadium,False,75.0,7.0,50.0,BUF,NYJ
2002-09-08,Chicago Bears,27.0,23.0,Minnesota Vikings,CHI,Yes,-4.5,41.0,Over,9.0,50.0,2002,False,1,Memorial Stadium (Champaign),False,76.0,5.0,75.0,CHI,MIN
2002-09-08,Cincinnati Bengals,6.0,34.0,San Diego Chargers,CIN,No,-3.0,37.0,Over,3.0,40.0,2002,False,1,Paul Brown Stadium,False,81.0,5.0,50.0,CIN,LAC
2002-09-08,Cleveland Browns,39.0,40.0,Kansas City Chiefs,CLE,No,-2.0,36.0,Over,43.0,79.0,2002,False,1,FirstEnergy Stadium,False,78.0,7.0,54.0,CLE,KC


In [75]:
#Adding Stadium Info

In [76]:
# # Values to replace
# values_to_replace = ['Washington Redskins', 'Washington Football Team']

# # Replace specific values with 'New Team Name'
# spreadspoke_df['team_home'] = spreadspoke_df['team_home'].replace(values_to_replace, 'Washington Commanders')  
# spreadspoke_df['team_away'] = spreadspoke_df['team_away'].replace(values_to_replace, 'Washington Commanders')  

# #spreadspoke_df.head(20) Commanders correctly replaced

In [77]:
nfl_teams = [
    "Arizona Cardinals",  "Atlanta Falcons",  "Baltimore Ravens",  "Buffalo Bills",  "Carolina Panthers",  "Chicago Bears",
    "Cincinnati Bengals", "Cleveland Browns", "Dallas Cowboys", "Denver Broncos", "Detroit Lions", "Green Bay Packers",
    "Houston Texans", "Indianapolis Colts",  "Jacksonville Jaguars",  "Kansas City Chiefs",  "Las Vegas Raiders",
    "Los Angeles Chargers",  "Los Angeles Rams",  "Miami Dolphins",  "Minnesota Vikings",  "New England Patriots",
    "New Orleans Saints",  "New York Giants",  "New York Jets",  "Philadelphia Eagles",  "Pittsburgh Steelers",
    "San Francisco 49ers",  "Seattle Seahawks",  "Tampa Bay Buccaneers",  "Tennessee Titans",  "Washington Commanders"
]

# Create regex pattern to match any of the NFL team names
pattern = r'\b(' + '|'.join(nfl_teams) + r')\b'

# Filter the DataFrame to only keep rows that contain an NFL team name
filtered_df = spreadspoke_df[spreadspoke_df['team_home'].str.contains(pattern, case=False, regex=True)]
spreadspoke_df2 = filtered_df[filtered_df['team_away'].str.contains(pattern, case=False, regex=True)]

# Display the filtered DataFrame
spreadspoke_df2.head()

#we don't lose washington commanders as the home team here
len(list(spreadspoke_df2['team_home'].unique())) 


This pattern is interpreted as a regular expression, and has match groups. To actually get the groups, use str.extract.


This pattern is interpreted as a regular expression, and has match groups. To actually get the groups, use str.extract.



32

In [78]:
values_to_replace = ['Washington Redskins', 'Washington Football Team']

# Replace washington redskins and washington football team with washington commanders
spreadspoke_df2.loc[spreadspoke_df2['team_home'].isin(values_to_replace), 'team_home'] = 'Washington Commanders'
spreadspoke_df2.loc[spreadspoke_df2['team_away'].isin(values_to_replace), 'team_away'] = 'Washington Commanders'

#prove that it was replaced correctly
# spreadspoke_df2.query("team_away == 'Washington Commanders'")
# spreadspoke_df2.query("team_home == 'Washington Commanders'")

In [79]:
#Matching teams to stadiums:

#list of current stadiums that are open (including stadiums that have had multiple names since 2002)
stadiums = [
    "Acrisure Stadium", "Allegiant Stadium", "GEHA Field at Arrowhead Stadium", "AT&T Stadium", "Bank of America Stadium",
    "Caesars Superdome", "Empower Field at Mile High", "FedEx Field","FirstEnergy Stadium", "Ford Field", "Gillette Stadium",
    "Hard Rock Stadium", "Highmark Stadium", "Lambeau Field", "Levi's Stadium", "Lincoln Financial Field","Lumen Field",
    "M&T Bank Stadium", "Mercedes-Benz Stadium", "MetLife Stadium", "NRG Stadium", "Nissan Stadium", "Paycor Stadium",
    "Raymond James Stadium", "SoFi Stadium", "Soldier Field", "State Farm Stadium", "TIAA Bank Field", "U.S. Bank Stadium",
    "Lucas Oil Stadium",
    "Cowboys Stadium","Sports Authority Field at Mile High","Mile High Stadium","University of Phoenix Stadium",
    "Reliant Stadium", "EverBank Field", "Arrowhead Stadium", "LP Stadium", "Louisiana Superdome",
    "Mercedes-Benz Superdome","Bills Stadium", "New Era Field","Ralph Wilson Stadium", "Heinz Field"]

pattern = r'\b(' + '|'.join(stadiums) + r')\b'
stadiums_filtered_df = stadiums_df[stadiums_df['stadium_name'].str.contains(pattern, case=False, regex=True)]
stadiums_filtered_df = stadiums_filtered_df.drop(columns=['stadium_close', 'stadium_address', 'stadium_weather_station_zipcode', 'stadium_weather_station',	'stadium_weather_station_name', 'stadium_azimuthangle', 'stadium_elevation']).reset_index().drop(columns=['index'])
#stadiums_filtered_df


This pattern is interpreted as a regular expression, and has match groups. To actually get the groups, use str.extract.



In [80]:
#add a column to the spreadspoke_df to match the hometown to the home team
stadium_locations = {'Pittsburgh Steelers':'Pittsburgh, PA', 'Las Vegas Raiders':'Paradise, NV', 'Dallas Cowboys':'Arlington, TX',
                     'Carolina Panthers':'Charlotte, NC', 'New Orleans Saints':'New Orleans, LA', 'Denver Broncos':'Denver, CO',
                     'Washington Commanders':'Landover, MD', 'Cleveland Browns':'Cleveland, OH', 'Detroit Lions':'Detroit, MI',
                     'Kansas City Chiefs':'Kansas City, MO', 'New England Patriots':'Foxborough, MA', 
                     'Miami Dolphins':'Miami Gardens, FL', 'Buffalo Bills':'Orchard Park, NY', 'Green Bay Packers':'Green Bay, WI',
                     'San Francisco 49ers':'Santa Clara, CA', 'Philadelphia Eagles':'Philadelphia, PA', 
                     'Indianapolis Colts':'Indianapolis, IN', 'Seattle Seahawks':'Seattle, WA', 'Baltimore Ravens':'Baltimore, MD',
                     'Atlanta Falcons':'Atlanta, GA', 'New York Giants':'East Rutherford, NJ', 'New York Jets':'East Rutherford, NJ',
                     'Tennessee Titans':'Nashville, TN', 'Houston Texans':'Houston, TX', 'Cincinnati Bengals':'Cincinnati, OH',
                     'Tampa Bay Buccaneers':'Tampa, FL', 'Los Angeles Chargers':'Inglewood, CA', 'Los Angeles Rams':'Inglewood, CA',
                     'Chicago Bears':'Chicago, IL', 'Arizona Cardinals':'Glendale, AZ', 'Jacksonville Jaguars':'Jacksonville, FL',
                     'Minnesota Vikings':'Minneapolis, MN'}

spreadspoke_df2['stadium_location'] = spreadspoke_df2['team_home'].map(stadium_locations) #this is where you lose the commanders
spreadspoke_df2.reset_index()
spreadspoke_df2.head()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0_level_0,team_home,score_home,score_away,team_away,team_favorite_id,favorite_won,spread_favorite,over_under_line,over_under_result,over_under_delta,game_score_sum,schedule_season,schedule_playoff,schedule_week,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,team_home_id,team_away_id,stadium_location
schedule_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2002-09-05,New York Giants,13.0,16.0,San Francisco 49ers,SF,Yes,-4.0,39.0,Under,-10.0,29.0,2002,False,1,Giants Stadium,False,75.0,12.0,58.0,NYG,SF,"East Rutherford, NJ"
2002-09-08,Buffalo Bills,31.0,37.0,New York Jets,NYJ,Yes,-3.0,43.0,Over,25.0,68.0,2002,False,1,Ralph Wilson Stadium,False,75.0,7.0,50.0,BUF,NYJ,"Orchard Park, NY"
2002-09-08,Chicago Bears,27.0,23.0,Minnesota Vikings,CHI,Yes,-4.5,41.0,Over,9.0,50.0,2002,False,1,Memorial Stadium (Champaign),False,76.0,5.0,75.0,CHI,MIN,"Chicago, IL"
2002-09-08,Cleveland Browns,39.0,40.0,Kansas City Chiefs,CLE,No,-2.0,36.0,Over,43.0,79.0,2002,False,1,FirstEnergy Stadium,False,78.0,7.0,54.0,CLE,KC,"Cleveland, OH"
2002-09-08,Green Bay Packers,37.0,34.0,Atlanta Falcons,GB,Yes,-7.0,42.5,Over,28.5,71.0,2002,False,1,Lambeau Field,False,72.0,6.0,78.0,GB,ATL,"Green Bay, WI"


In [81]:
#merge the stadium dataframe to the spreadspoke dataframe to get the stadium information in the same df as the game information
spreadspoke_df3 = pd.merge(spreadspoke_df2, stadiums_filtered_df, on='stadium_location', how='outer')

#create a mask to filter rows for games played in stadiums that no longer exist
mask = spreadspoke_df3['schedule_season'] >= spreadspoke_df3['stadium_open']
spreadspoke_df3 = spreadspoke_df3[mask]

#need to see all of the columns
pd.set_option('display.max_columns', None)

#check for the Los Angeles Chargers
#check for the Los Angeles Rams
#check for the Las Vegas Raiders

# spreadspoke_df3.query("team_home == 'Los Angeles Chargers'").head(5)
# spreadspoke_df3.query("team_home == 'Los Angeles Rams'").head(5)
# spreadspoke_df3.query("team_home == 'Las Vegas Raiders'").head(5)

In [82]:
#check to make sure we didn't lose any stadiums during the merge
unique_stadiums = spreadspoke_df3['stadium_name'].unique()

#the number of stadiums stays the same (we didn't lose any)
print(len(stadiums)) #44
print(len(unique_stadiums)) #44

44
44


In [83]:
final_df = spreadspoke_df3.reset_index().drop(columns=['index'])
final_df
#final_df contains a combination of the stadium data and the game data

Unnamed: 0,team_home,score_home,score_away,team_away,team_favorite_id,favorite_won,spread_favorite,over_under_line,over_under_result,over_under_delta,game_score_sum,schedule_season,schedule_playoff,schedule_week,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,team_home_id,team_away_id,stadium_location,stadium_name,stadium_open,stadium_type,stadium_weather_type,stadium_capacity,stadium_surface,stadium_latitude,stadium_longitude
0,Dallas Cowboys,31.0,33.0,New York Giants,DAL,No,-3.0,45.5,Over,18.5,64.0,2009,False,2,Cowboys Stadium,False,88.0,6.0,,DAL,NYG,"Arlington, TX",AT&T Stadium,2009.0,retractable,indoor,80000,FieldTurf,32.747778,-97.092778
1,Dallas Cowboys,31.0,33.0,New York Giants,DAL,No,-3.0,45.5,Over,18.5,64.0,2009,False,2,Cowboys Stadium,False,88.0,6.0,,DAL,NYG,"Arlington, TX",Cowboys Stadium,2009.0,retractable,indoor,80000,FieldTurf,32.757200,-97.073600
2,Dallas Cowboys,21.0,7.0,Carolina Panthers,DAL,Yes,-8.5,48.5,Under,-20.5,28.0,2009,False,3,Cowboys Stadium,False,81.0,14.0,,DAL,CAR,"Arlington, TX",AT&T Stadium,2009.0,retractable,indoor,80000,FieldTurf,32.747778,-97.092778
3,Dallas Cowboys,21.0,7.0,Carolina Panthers,DAL,Yes,-8.5,48.5,Under,-20.5,28.0,2009,False,3,Cowboys Stadium,False,81.0,14.0,,DAL,CAR,"Arlington, TX",Cowboys Stadium,2009.0,retractable,indoor,80000,FieldTurf,32.757200,-97.073600
4,Dallas Cowboys,37.0,21.0,Atlanta Falcons,DAL,Yes,-5.0,48.0,Over,10.0,58.0,2009,False,7,Cowboys Stadium,False,73.0,13.0,,DAL,ATL,"Arlington, TX",AT&T Stadium,2009.0,retractable,indoor,80000,FieldTurf,32.747778,-97.092778
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6496,Tampa Bay Buccaneers,20.0,6.0,Tennessee Titans,TB,Yes,-2.5,39.0,Under,-13.0,26.0,2023,False,10,Raymond James Stadium,False,,,,TB,TEN,"Tampa, FL",Raymond James Stadium,1998.0,outdoor,warm,65890,Grass,27.975833,-82.503333
6497,Tampa Bay Buccaneers,14.0,10.0,Carolina Panthers,TB,Yes,-3.5,36.5,Under,-12.5,24.0,2023,False,13,Raymond James Stadium,False,,,,TB,CAR,"Tampa, FL",Raymond James Stadium,1998.0,outdoor,warm,65890,Grass,27.975833,-82.503333
6498,Tampa Bay Buccaneers,30.0,12.0,Jacksonville Jaguars,JAX,No,-2.0,43.0,Under,-1.0,42.0,2023,False,16,Raymond James Stadium,False,,,,TB,JAX,"Tampa, FL",Raymond James Stadium,1998.0,outdoor,warm,65890,Grass,27.975833,-82.503333
6499,Tampa Bay Buccaneers,13.0,23.0,New Orleans Saints,TB,No,-2.5,42.5,Under,-6.5,36.0,2023,False,17,Raymond James Stadium,False,,,,TB,NO,"Tampa, FL",Raymond James Stadium,1998.0,outdoor,warm,65890,Grass,27.975833,-82.503333


In [84]:
def spread_cover(row):
    # Determine the non-favorite team
    if row['team_favorite_id'] == row['team_home']:
        row['team_not_favorite_id'] = row['team_away_id']
    else:
        row['team_not_favorite_id'] = row['team_home_id']

    # Determine if the favorite won or not
    if row['favorite_won'] == 'No':
        row['spread_cover'] = row['team_not_favorite_id']
        row['favorite_covered'] = False  # Favorite did not cover
    else:
        if row['team_favorite_id'] == row['team_home']:
            if row['score_home'] + row['spread_favorite'] > row['score_away']:
                row['spread_cover'] = row['team_home']
                row['favorite_covered'] = True  # Favorite covered
            else:
                row['spread_cover'] = row['team_away']
                row['favorite_covered'] = False  # Favorite did not cover
        else:
            if row['score_away'] + row['spread_favorite'] > row['score_home']:
                row['spread_cover'] = row['team_away_id']
                row['favorite_covered'] = True  # Favorite covered
            else:
                row['spread_cover'] = row['team_home_id']
                row['favorite_covered'] = False  # Favorite did not cover

    return row

# Apply the function to each row in the DataFrame
final_df = final_df.apply(spread_cover, axis=1)
final_df

Unnamed: 0,team_home,score_home,score_away,team_away,team_favorite_id,favorite_won,spread_favorite,over_under_line,over_under_result,over_under_delta,game_score_sum,schedule_season,schedule_playoff,schedule_week,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,team_home_id,team_away_id,stadium_location,stadium_name,stadium_open,stadium_type,stadium_weather_type,stadium_capacity,stadium_surface,stadium_latitude,stadium_longitude,team_not_favorite_id,spread_cover,favorite_covered
0,Dallas Cowboys,31.0,33.0,New York Giants,DAL,No,-3.0,45.5,Over,18.5,64.0,2009,False,2,Cowboys Stadium,False,88.0,6.0,,DAL,NYG,"Arlington, TX",AT&T Stadium,2009.0,retractable,indoor,80000,FieldTurf,32.747778,-97.092778,DAL,DAL,False
1,Dallas Cowboys,31.0,33.0,New York Giants,DAL,No,-3.0,45.5,Over,18.5,64.0,2009,False,2,Cowboys Stadium,False,88.0,6.0,,DAL,NYG,"Arlington, TX",Cowboys Stadium,2009.0,retractable,indoor,80000,FieldTurf,32.757200,-97.073600,DAL,DAL,False
2,Dallas Cowboys,21.0,7.0,Carolina Panthers,DAL,Yes,-8.5,48.5,Under,-20.5,28.0,2009,False,3,Cowboys Stadium,False,81.0,14.0,,DAL,CAR,"Arlington, TX",AT&T Stadium,2009.0,retractable,indoor,80000,FieldTurf,32.747778,-97.092778,DAL,DAL,False
3,Dallas Cowboys,21.0,7.0,Carolina Panthers,DAL,Yes,-8.5,48.5,Under,-20.5,28.0,2009,False,3,Cowboys Stadium,False,81.0,14.0,,DAL,CAR,"Arlington, TX",Cowboys Stadium,2009.0,retractable,indoor,80000,FieldTurf,32.757200,-97.073600,DAL,DAL,False
4,Dallas Cowboys,37.0,21.0,Atlanta Falcons,DAL,Yes,-5.0,48.0,Over,10.0,58.0,2009,False,7,Cowboys Stadium,False,73.0,13.0,,DAL,ATL,"Arlington, TX",AT&T Stadium,2009.0,retractable,indoor,80000,FieldTurf,32.747778,-97.092778,DAL,DAL,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6496,Tampa Bay Buccaneers,20.0,6.0,Tennessee Titans,TB,Yes,-2.5,39.0,Under,-13.0,26.0,2023,False,10,Raymond James Stadium,False,,,,TB,TEN,"Tampa, FL",Raymond James Stadium,1998.0,outdoor,warm,65890,Grass,27.975833,-82.503333,TB,TB,False
6497,Tampa Bay Buccaneers,14.0,10.0,Carolina Panthers,TB,Yes,-3.5,36.5,Under,-12.5,24.0,2023,False,13,Raymond James Stadium,False,,,,TB,CAR,"Tampa, FL",Raymond James Stadium,1998.0,outdoor,warm,65890,Grass,27.975833,-82.503333,TB,TB,False
6498,Tampa Bay Buccaneers,30.0,12.0,Jacksonville Jaguars,JAX,No,-2.0,43.0,Under,-1.0,42.0,2023,False,16,Raymond James Stadium,False,,,,TB,JAX,"Tampa, FL",Raymond James Stadium,1998.0,outdoor,warm,65890,Grass,27.975833,-82.503333,TB,TB,False
6499,Tampa Bay Buccaneers,13.0,23.0,New Orleans Saints,TB,No,-2.5,42.5,Under,-6.5,36.0,2023,False,17,Raymond James Stadium,False,,,,TB,NO,"Tampa, FL",Raymond James Stadium,1998.0,outdoor,warm,65890,Grass,27.975833,-82.503333,TB,TB,False


In [85]:
new_order = [
    'team_home', 'score_home' , 'score_away', 'team_away', 'team_home_id', 'team_away_id', 'team_favorite_id', 'team_not_favorite_id',
    'spread_favorite', 'spread_cover', 'favorite_covered',
    'over_under_line', 'over_under_result', 'over_under_delta', 'game_score_sum',
    'schedule_season', 'schedule_week', 'schedule_playoff',
    'stadium', 'stadium_name', 'stadium_location', 'stadium_open', 'stadium_type', 'stadium_weather_type',
    'stadium_capacity', 'stadium_surface', 'stadium_latitude', 'stadium_longitude',
    'weather_temperature', 'weather_wind_mph', 'weather_humidity', 'stadium_neutral'
]

# Reorder the DataFrame columns
final_df = final_df[new_order]

# Print the reordered DataFrame
final_df.head()

Unnamed: 0,team_home,score_home,score_away,team_away,team_home_id,team_away_id,team_favorite_id,team_not_favorite_id,spread_favorite,spread_cover,favorite_covered,over_under_line,over_under_result,over_under_delta,game_score_sum,schedule_season,schedule_week,schedule_playoff,stadium,stadium_name,stadium_location,stadium_open,stadium_type,stadium_weather_type,stadium_capacity,stadium_surface,stadium_latitude,stadium_longitude,weather_temperature,weather_wind_mph,weather_humidity,stadium_neutral
0,Dallas Cowboys,31.0,33.0,New York Giants,DAL,NYG,DAL,DAL,-3.0,DAL,False,45.5,Over,18.5,64.0,2009,2,False,Cowboys Stadium,AT&T Stadium,"Arlington, TX",2009.0,retractable,indoor,80000,FieldTurf,32.747778,-97.092778,88.0,6.0,,False
1,Dallas Cowboys,31.0,33.0,New York Giants,DAL,NYG,DAL,DAL,-3.0,DAL,False,45.5,Over,18.5,64.0,2009,2,False,Cowboys Stadium,Cowboys Stadium,"Arlington, TX",2009.0,retractable,indoor,80000,FieldTurf,32.7572,-97.0736,88.0,6.0,,False
2,Dallas Cowboys,21.0,7.0,Carolina Panthers,DAL,CAR,DAL,DAL,-8.5,DAL,False,48.5,Under,-20.5,28.0,2009,3,False,Cowboys Stadium,AT&T Stadium,"Arlington, TX",2009.0,retractable,indoor,80000,FieldTurf,32.747778,-97.092778,81.0,14.0,,False
3,Dallas Cowboys,21.0,7.0,Carolina Panthers,DAL,CAR,DAL,DAL,-8.5,DAL,False,48.5,Under,-20.5,28.0,2009,3,False,Cowboys Stadium,Cowboys Stadium,"Arlington, TX",2009.0,retractable,indoor,80000,FieldTurf,32.7572,-97.0736,81.0,14.0,,False
4,Dallas Cowboys,37.0,21.0,Atlanta Falcons,DAL,ATL,DAL,DAL,-5.0,DAL,False,48.0,Over,10.0,58.0,2009,7,False,Cowboys Stadium,AT&T Stadium,"Arlington, TX",2009.0,retractable,indoor,80000,FieldTurf,32.747778,-97.092778,73.0,13.0,,False


In [86]:
# Step 1: Determine the winning team
final_df['winning_team_id'] = final_df.apply(lambda row: row['team_home_id'] if row['score_home'] > row['score_away'] else row['team_away_id'], axis=1)

# Step 2: Create the new column
final_df['favorite_won'] = final_df['team_favorite_id'] == final_df['winning_team_id']

final_df.head()

new_order_2 = [
    'team_home', 'score_home' , 'score_away', 'team_away', 'team_home_id', 'team_away_id', 'team_favorite_id', 'team_not_favorite_id',
    'winning_team_id', 'favorite_won', 'spread_favorite', 'spread_cover', 'favorite_covered',
    'over_under_line', 'over_under_result', 'over_under_delta', 'game_score_sum',
    'schedule_season', 'schedule_week', 'schedule_playoff',
    'stadium', 'stadium_name', 'stadium_location', 'stadium_open', 'stadium_type', 'stadium_weather_type',
    'stadium_capacity', 'stadium_surface', 'stadium_latitude', 'stadium_longitude',
    'weather_temperature', 'weather_wind_mph', 'weather_humidity', 'stadium_neutral'
]

# Reorder the DataFrame columns
final_df = final_df[new_order_2]

# Print the reordered DataFrame
final_df

Unnamed: 0,team_home,score_home,score_away,team_away,team_home_id,team_away_id,team_favorite_id,team_not_favorite_id,winning_team_id,favorite_won,spread_favorite,spread_cover,favorite_covered,over_under_line,over_under_result,over_under_delta,game_score_sum,schedule_season,schedule_week,schedule_playoff,stadium,stadium_name,stadium_location,stadium_open,stadium_type,stadium_weather_type,stadium_capacity,stadium_surface,stadium_latitude,stadium_longitude,weather_temperature,weather_wind_mph,weather_humidity,stadium_neutral
0,Dallas Cowboys,31.0,33.0,New York Giants,DAL,NYG,DAL,DAL,NYG,False,-3.0,DAL,False,45.5,Over,18.5,64.0,2009,2,False,Cowboys Stadium,AT&T Stadium,"Arlington, TX",2009.0,retractable,indoor,80000,FieldTurf,32.747778,-97.092778,88.0,6.0,,False
1,Dallas Cowboys,31.0,33.0,New York Giants,DAL,NYG,DAL,DAL,NYG,False,-3.0,DAL,False,45.5,Over,18.5,64.0,2009,2,False,Cowboys Stadium,Cowboys Stadium,"Arlington, TX",2009.0,retractable,indoor,80000,FieldTurf,32.757200,-97.073600,88.0,6.0,,False
2,Dallas Cowboys,21.0,7.0,Carolina Panthers,DAL,CAR,DAL,DAL,DAL,True,-8.5,DAL,False,48.5,Under,-20.5,28.0,2009,3,False,Cowboys Stadium,AT&T Stadium,"Arlington, TX",2009.0,retractable,indoor,80000,FieldTurf,32.747778,-97.092778,81.0,14.0,,False
3,Dallas Cowboys,21.0,7.0,Carolina Panthers,DAL,CAR,DAL,DAL,DAL,True,-8.5,DAL,False,48.5,Under,-20.5,28.0,2009,3,False,Cowboys Stadium,Cowboys Stadium,"Arlington, TX",2009.0,retractable,indoor,80000,FieldTurf,32.757200,-97.073600,81.0,14.0,,False
4,Dallas Cowboys,37.0,21.0,Atlanta Falcons,DAL,ATL,DAL,DAL,DAL,True,-5.0,DAL,False,48.0,Over,10.0,58.0,2009,7,False,Cowboys Stadium,AT&T Stadium,"Arlington, TX",2009.0,retractable,indoor,80000,FieldTurf,32.747778,-97.092778,73.0,13.0,,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6496,Tampa Bay Buccaneers,20.0,6.0,Tennessee Titans,TB,TEN,TB,TB,TB,True,-2.5,TB,False,39.0,Under,-13.0,26.0,2023,10,False,Raymond James Stadium,Raymond James Stadium,"Tampa, FL",1998.0,outdoor,warm,65890,Grass,27.975833,-82.503333,,,,False
6497,Tampa Bay Buccaneers,14.0,10.0,Carolina Panthers,TB,CAR,TB,TB,TB,True,-3.5,TB,False,36.5,Under,-12.5,24.0,2023,13,False,Raymond James Stadium,Raymond James Stadium,"Tampa, FL",1998.0,outdoor,warm,65890,Grass,27.975833,-82.503333,,,,False
6498,Tampa Bay Buccaneers,30.0,12.0,Jacksonville Jaguars,TB,JAX,JAX,TB,TB,False,-2.0,TB,False,43.0,Under,-1.0,42.0,2023,16,False,Raymond James Stadium,Raymond James Stadium,"Tampa, FL",1998.0,outdoor,warm,65890,Grass,27.975833,-82.503333,,,,False
6499,Tampa Bay Buccaneers,13.0,23.0,New Orleans Saints,TB,NO,TB,TB,NO,False,-2.5,TB,False,42.5,Under,-6.5,36.0,2023,17,False,Raymond James Stadium,Raymond James Stadium,"Tampa, FL",1998.0,outdoor,warm,65890,Grass,27.975833,-82.503333,,,,False


In [87]:
# Create a boolean mask for duplicates
duplicate_mask = final_df.duplicated(subset=['team_home', 'score_home' , 'score_away', 'team_away', 'team_home_id', 'team_away_id', 'team_favorite_id', 'team_not_favorite_id',
    'winning_team_id', 'favorite_won', 'spread_favorite', 'spread_cover', 'favorite_covered',
    'over_under_line', 'over_under_result', 'over_under_delta', 'game_score_sum',
    'schedule_season', 'schedule_week', 'schedule_playoff',
    'stadium', 'stadium_location', 'stadium_open', 'stadium_type', 'stadium_weather_type',
    'stadium_capacity', 'stadium_surface',
    'weather_temperature', 'weather_wind_mph', 'weather_humidity', 'stadium_neutral'], keep=False)  # keep=False means mark all duplicates

# Display the duplicates
#duplicates = final_df[duplicate_mask]
#print("Duplicates:")
#display(duplicates)
final_df = final_df.drop_duplicates(subset=['team_home', 'score_home' , 'score_away', 'team_away', 'team_home_id', 'team_away_id', 'team_favorite_id', 'team_not_favorite_id',
    'winning_team_id', 'favorite_won', 'spread_favorite', 'spread_cover', 'favorite_covered',
    'over_under_line', 'over_under_result', 'over_under_delta', 'game_score_sum',
    'schedule_season', 'schedule_week', 'schedule_playoff',
    'stadium', 'stadium_location', 'stadium_open', 'stadium_type', 'stadium_weather_type',
    'stadium_capacity', 'stadium_surface',
    'weather_temperature', 'weather_wind_mph', 'weather_humidity', 'stadium_neutral'])

In [88]:
display(final_df)

Unnamed: 0,team_home,score_home,score_away,team_away,team_home_id,team_away_id,team_favorite_id,team_not_favorite_id,winning_team_id,favorite_won,spread_favorite,spread_cover,favorite_covered,over_under_line,over_under_result,over_under_delta,game_score_sum,schedule_season,schedule_week,schedule_playoff,stadium,stadium_name,stadium_location,stadium_open,stadium_type,stadium_weather_type,stadium_capacity,stadium_surface,stadium_latitude,stadium_longitude,weather_temperature,weather_wind_mph,weather_humidity,stadium_neutral
0,Dallas Cowboys,31.0,33.0,New York Giants,DAL,NYG,DAL,DAL,NYG,False,-3.0,DAL,False,45.5,Over,18.5,64.0,2009,2,False,Cowboys Stadium,AT&T Stadium,"Arlington, TX",2009.0,retractable,indoor,80000,FieldTurf,32.747778,-97.092778,88.0,6.0,,False
2,Dallas Cowboys,21.0,7.0,Carolina Panthers,DAL,CAR,DAL,DAL,DAL,True,-8.5,DAL,False,48.5,Under,-20.5,28.0,2009,3,False,Cowboys Stadium,AT&T Stadium,"Arlington, TX",2009.0,retractable,indoor,80000,FieldTurf,32.747778,-97.092778,81.0,14.0,,False
4,Dallas Cowboys,37.0,21.0,Atlanta Falcons,DAL,ATL,DAL,DAL,DAL,True,-5.0,DAL,False,48.0,Over,10.0,58.0,2009,7,False,Cowboys Stadium,AT&T Stadium,"Arlington, TX",2009.0,retractable,indoor,80000,FieldTurf,32.747778,-97.092778,73.0,13.0,,False
6,Dallas Cowboys,38.0,17.0,Seattle Seahawks,DAL,SEA,DAL,DAL,DAL,True,-9.5,DAL,False,46.0,Over,9.0,55.0,2009,8,False,Cowboys Stadium,AT&T Stadium,"Arlington, TX",2009.0,retractable,indoor,80000,FieldTurf,32.747778,-97.092778,72.0,7.0,,False
8,Dallas Cowboys,24.0,0.0,Philadelphia Eagles,DAL,PHI,DAL,DAL,DAL,True,-3.0,DAL,False,47.0,Under,-23.0,24.0,2009,17,False,Cowboys Stadium,AT&T Stadium,"Arlington, TX",2009.0,retractable,indoor,80000,FieldTurf,32.747778,-97.092778,48.0,8.0,,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6496,Tampa Bay Buccaneers,20.0,6.0,Tennessee Titans,TB,TEN,TB,TB,TB,True,-2.5,TB,False,39.0,Under,-13.0,26.0,2023,10,False,Raymond James Stadium,Raymond James Stadium,"Tampa, FL",1998.0,outdoor,warm,65890,Grass,27.975833,-82.503333,,,,False
6497,Tampa Bay Buccaneers,14.0,10.0,Carolina Panthers,TB,CAR,TB,TB,TB,True,-3.5,TB,False,36.5,Under,-12.5,24.0,2023,13,False,Raymond James Stadium,Raymond James Stadium,"Tampa, FL",1998.0,outdoor,warm,65890,Grass,27.975833,-82.503333,,,,False
6498,Tampa Bay Buccaneers,30.0,12.0,Jacksonville Jaguars,TB,JAX,JAX,TB,TB,False,-2.0,TB,False,43.0,Under,-1.0,42.0,2023,16,False,Raymond James Stadium,Raymond James Stadium,"Tampa, FL",1998.0,outdoor,warm,65890,Grass,27.975833,-82.503333,,,,False
6499,Tampa Bay Buccaneers,13.0,23.0,New Orleans Saints,TB,NO,TB,TB,NO,False,-2.5,TB,False,42.5,Under,-6.5,36.0,2023,17,False,Raymond James Stadium,Raymond James Stadium,"Tampa, FL",1998.0,outdoor,warm,65890,Grass,27.975833,-82.503333,,,,False


In [89]:
final_df.over_under_result.unique()

array(['Over', 'Under', 'Push'], dtype=object)

In [90]:
final_df[final_df['over_under_result'] == 'Push'] #Push is exact over/under result, w/74 instances, not significant enough to represent in my opinion 

Unnamed: 0,team_home,score_home,score_away,team_away,team_home_id,team_away_id,team_favorite_id,team_not_favorite_id,winning_team_id,favorite_won,spread_favorite,spread_cover,favorite_covered,over_under_line,over_under_result,over_under_delta,game_score_sum,schedule_season,schedule_week,schedule_playoff,stadium,stadium_name,stadium_location,stadium_open,stadium_type,stadium_weather_type,stadium_capacity,stadium_surface,stadium_latitude,stadium_longitude,weather_temperature,weather_wind_mph,weather_humidity,stadium_neutral
70,Dallas Cowboys,17.0,28.0,Arizona Cardinals,DAL,ARI,DAL,DAL,ARI,False,-1.5,DAL,False,45.0,Push,0.0,45.0,2014,9,False,Cowboys Stadium,AT&T Stadium,"Arlington, TX",2009.0,retractable,indoor,80000,FieldTurf,32.747778,-97.092778,64.0,0.0,,False
312,Baltimore Ravens,20.0,13.0,New York Jets,BAL,NYJ,BAL,BAL,BAL,True,-10.0,BAL,False,33.0,Push,0.0,33.0,2007,2,False,M&T Bank Stadium,M&T Bank Stadium,"Baltimore, MD",1998.0,outdoor,moderate,71008,FieldTurf,39.278056,-76.622778,58.0,6.0,52.0,False
356,Baltimore Ravens,33.0,14.0,New York Giants,BAL,NYG,NYG,BAL,BAL,False,-2.5,BAL,False,47.0,Push,0.0,47.0,2012,16,False,M&T Bank Stadium,M&T Bank Stadium,"Baltimore, MD",1998.0,outdoor,moderate,71008,FieldTurf,39.278056,-76.622778,51.0,7.0,33.0,False
411,Baltimore Ravens,20.0,34.0,Kansas City Chiefs,BAL,KC,BAL,BAL,KC,False,-3.0,BAL,False,54.0,Push,0.0,54.0,2020,3,False,M&T Bank Stadium,M&T Bank Stadium,"Baltimore, MD",1998.0,outdoor,moderate,71008,FieldTurf,39.278056,-76.622778,,,,False
444,Baltimore Ravens,34.0,10.0,Houston Texans,BAL,HOU,BAL,BAL,BAL,True,-10.0,BAL,False,44.0,Push,0.0,44.0,2023,Division,True,M&T Bank Stadium,M&T Bank Stadium,"Baltimore, MD",1998.0,outdoor,moderate,71008,FieldTurf,39.278056,-76.622778,27.0,16.0,40.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6201,Seattle Seahawks,14.0,24.0,San Francisco 49ers,SEA,SF,SEA,SEA,SF,False,-10.0,SEA,False,38.0,Push,0.0,38.0,2006,15,False,CenturyLink Field,Lumen Field,"Seattle, WA",2002.0,outdoor,moderate,67000,FieldTurf,47.652300,-122.409500,44.0,13.0,85.0,False
6223,Seattle Seahawks,31.0,6.0,San Francisco 49ers,SEA,SF,SF,SEA,SEA,False,-3.0,SEA,False,37.0,Push,0.0,37.0,2010,1,False,CenturyLink Field,Lumen Field,"Seattle, WA",2002.0,outdoor,moderate,67000,FieldTurf,47.652300,-122.409500,64.0,11.0,,False
6233,Seattle Seahawks,22.0,17.0,Baltimore Ravens,SEA,BAL,BAL,SEA,SEA,False,-7.0,SEA,False,39.0,Push,0.0,39.0,2011,10,False,CenturyLink Field,Lumen Field,"Seattle, WA",2002.0,outdoor,moderate,67000,FieldTurf,47.652300,-122.409500,50.0,10.0,,False
6299,Seattle Seahawks,21.0,26.0,San Francisco 49ers,SEA,SF,SF,SEA,SF,True,-3.5,SF,True,47.0,Push,0.0,47.0,2019,17,False,CenturyLink Field,Lumen Field,"Seattle, WA",2002.0,outdoor,moderate,67000,FieldTurf,47.652300,-122.409500,,,,False


In [91]:
final_df = final_df[final_df['over_under_result'] != 'Push']
print(final_df.over_under_result.unique())

['Over' 'Under']


## Visualizations

In [92]:
#Other Ideas 
#Home team when underdog? Home team when favored? (checking to see if teams have a significant home field advantage)

#Fix team not favorite id column
#favorite_covered column is not coded properly

## TEAMS ON OUTCOMES

In [101]:
# Initialize the Dash app
app = dash.Dash(__name__)

# Layout of the app
app.layout = html.Div([
    html.H1("NFL Analysis Dashboard"),
    
    dcc.Dropdown(
        id='visualization-dropdown',
        options=[
            {'label': 'Over/Under', 'value': 'Over/Under'},
            {'label': 'Favorite', 'value': 'Favorite'},
            {'label': 'Spread Coverage', 'value': 'Spread Coverage'}
        ],
        value='Over/Under',
        clearable=False
    ),
    
    dcc.Dropdown(
        id='team-dropdown',
        options=[
            {'label': 'Overall NFL', 'value': 'Overall NFL'}
        ] + [{'label': team, 'value': team}
             for team in sorted(pd.concat([final_df['team_home'], final_df['team_away']]).unique())],
        value='Overall NFL',
        clearable=False
    ),
    
    dcc.RadioItems(
        id='playoff-toggle',
        options=[
            {'label': 'Regular Season', 'value': 'False'},
            {'label': 'Playoffs', 'value': 'True'}
        ],
        value='False',
        labelStyle={'display': 'inline-block', 'marginRight': 20}
    ),
    
    dcc.Graph(id='bar-chart')
])

@app.callback(
    Output('bar-chart', 'figure'),
    [Input('visualization-dropdown', 'value'),
     Input('team-dropdown', 'value'),
     Input('playoff-toggle', 'value')]
)
def update_graph(selected_visualization, selected_team, is_playoff):
    # Filter the DataFrame based on playoff status
    filtered_df = final_df[final_df['schedule_playoff'].astype(str) == is_playoff]
    
    game_type = "Playoff" if is_playoff == 'True' else "Regular Season"
    
    if selected_visualization == 'Over/Under':
        if selected_team == 'Overall NFL':
            total_games = len(filtered_df)
            if total_games > 0:
                over_games = len(filtered_df[filtered_df['over_under_result'] == 'Over'])
                under_games = total_games - over_games
                
                over_pct = over_games / total_games * 100
                under_pct = under_games / total_games * 100
                
                fig = go.Figure(data=[
                    go.Bar(name='Under', x=['Overall NFL'], y=[under_pct],
                           marker_color='red', text=f'Under: {under_games}/{total_games} ({under_pct:.1f}%)'),
                    go.Bar(name='Over', x=['Overall NFL'], y=[over_pct],
                           marker_color='green', text=f'Over: {over_games}/{total_games} ({over_pct:.1f}%)')
                ])
                
                fig.update_layout(
                    title=f'{game_type} Over/Under Distribution for Overall NFL',
                    xaxis_title='Category',
                    yaxis_title='Percentage (%)',
                    barmode='group',
                    showlegend=True,
                    height=400,
                    margin=dict(l=100, r=50, t=50, b=50)
                )
            else:
                fig = go.Figure()
                fig.update_layout(
                    title=f'No Data Available for {game_type}',
                    xaxis_title='Category',
                    yaxis_title='Percentage (%)',
                    height=400,
                    margin=dict(l=100, r=50, t=50, b=50)
                )
        
        else:
            all_teams = pd.concat([filtered_df['team_home'], filtered_df['team_away']]).unique()
            all_teams = [team for team in sorted(all_teams) if team != selected_team]

            game_stats = []
            for opponent in all_teams:
                team_games = filtered_df[
                    ((filtered_df['team_home'] == selected_team) & (filtered_df['team_away'] == opponent)) |
                    ((filtered_df['team_home'] == opponent) & (filtered_df['team_away'] == selected_team))
                ]
                
                total_games = len(team_games)
                if total_games > 0:
                    over_games = len(team_games[team_games['over_under_result'] == 'Over'])
                    under_games = total_games - over_games
                    
                    over_pct = over_games / total_games * 100
                    under_pct = under_games / total_games * 100
                    
                    game_stats.append({
                        'team': opponent,
                        'total_games': total_games,
                        'over_pct': over_pct,
                        'under_pct': under_pct,
                        'over_games': over_games,
                        'under_games': under_games
                    })
            
            if not game_stats:
                fig = go.Figure()
                fig.update_layout(
                    title=f'No Data Available for {game_type}',
                    xaxis_title='Percentage (%)',
                    yaxis_title='Opponent Teams',
                    height=400,
                    margin=dict(l=100, r=50, t=50, b=50)
                )
            else:
                game_stats_sorted = sorted(game_stats, key=lambda x: x['total_games'], reverse=True)
                
                all_teams_sorted = [stats['team'] for stats in game_stats_sorted]
                over_percentages = [stats['over_pct'] for stats in game_stats_sorted]
                under_percentages = [stats['under_pct'] for stats in game_stats_sorted]
                total_games_list = [stats['total_games'] for stats in game_stats_sorted]
                over_games_list = [stats['over_games'] for stats in game_stats_sorted]
                under_games_list = [stats['under_games'] for stats in game_stats_sorted]
                
                under_hover_text = [f"Team: {selected_team} vs {team}<br>Under: {under}/{total} ({under_pct:.1f}%)<br>Total Games: {total}" 
                                    for team, under, total, under_pct in zip(all_teams_sorted, under_games_list, total_games_list, under_percentages)]
                over_hover_text = [f"Team: {selected_team} vs {team}<br>Over: {over}/{total} ({over_pct:.1f}%)<br>Total Games: {total}" 
                                   for team, over, total, over_pct in zip(all_teams_sorted, over_games_list, total_games_list, over_percentages)]
                
                fig = go.Figure(data=[
                    go.Bar(name='Under', y=all_teams_sorted, x=under_percentages,
                           marker_color='red', hoverinfo='text', hovertext=under_hover_text, orientation='h'),
                    go.Bar(name='Over', y=all_teams_sorted, x=over_percentages,
                           marker_color='green', hoverinfo='text', hovertext=over_hover_text, orientation='h')
                ])
                
                annotations = []
                for i, (team, total) in enumerate(zip(all_teams_sorted, total_games_list)):
                    annotations.append(dict(
                        x=1.02,
                        y=team,
                        xref='paper',
                        yref='y',
                        text=str(total),
                        showarrow=False,
                        font=dict(size=10, color='black'),
                        align='left',
                        xanchor='left',
                        yanchor='middle'
                    ))
                
                annotations.append(dict(
                    x=1.06,
                    y=0.5,
                    xref='paper',
                    yref='paper',
                    text='# of Games',
                    showarrow=False,
                    font=dict(size=12, color='black', weight='bold'),
                    textangle=-90,
                    align='center',
                    xanchor='center',
                    yanchor='middle'
                ))
                
                axis_title_style = dict(
                    size=12,
                    color='black',
                    weight='bold'
                )
                
                fig.update_layout(
                    title=f'{game_type} Over/Under Distribution for {selected_team} vs All Other Teams',
                    xaxis_title=dict(text='Percentage(%)', font=axis_title_style),
                    yaxis_title=dict(text='Opponent Teams', font=axis_title_style),
                    barmode='relative',
                    xaxis={'range': [0, 100]},
                    yaxis={'autorange': 'reversed'},
                    showlegend=True,
                    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
                    hovermode='closest',
                    height=800,
                    margin=dict(l=200, r=100),
                    shapes=[dict(
                        type='line',
                        x0=50,
                        x1=50,
                        y0=0,
                        y1=1,
                        xref='x',
                        yref='paper',
                        line=dict(color='black', width=4)
                    )],
                    annotations=annotations,
                    font=dict(family="Arial, sans-serif")
                )
    
    elif selected_visualization == 'Favorite':
        if selected_team == 'Overall NFL':
            total_games = len(filtered_df)
            if total_games > 0:
                favorites_wins = len(filtered_df[filtered_df['favorite_won'] == True])
                favorites_losses = total_games - favorites_wins
                
                favorite_pct = favorites_wins / total_games * 100
                non_favorite_pct = favorites_losses / total_games * 100
                
                fig = go.Figure(data=[
                    go.Bar(name='Non-Favorite', x=['Overall NFL'], y=[non_favorite_pct],
                           marker_color='red', text=f'Non-Favorite Wins: {favorites_losses}/{total_games} ({non_favorite_pct:.1f}%)'),
                    go.Bar(name='Favorite', x=['Overall NFL'], y=[favorite_pct],
                           marker_color='green', text=f'Favorite Wins: {favorites_wins}/{total_games} ({favorite_pct:.1f}%)')
                ])
                
                fig.update_layout(
                    title=f'{game_type} Favorite Wins Distribution for Overall NFL',
                    xaxis_title='Category',
                    yaxis_title='Percentage (%)',
                    barmode='group',
                    showlegend=True,
                    height=400,
                    margin=dict(l=100, r=50, t=50, b=50)
                )
            else:
                fig = go.Figure()
                fig.update_layout(
                    title=f'No Data Available for {game_type}',
                    xaxis_title='Category',
                    yaxis_title='Percentage (%)',
                    height=400,
                    margin=dict(l=100, r=50, t=50, b=50)
                )
        
        else:
            all_teams = pd.concat([filtered_df['team_home'], filtered_df['team_away']]).unique()
            all_teams = [team for team in sorted(all_teams) if team != selected_team]

            game_stats = []
            for opponent in all_teams:
                team_games = filtered_df[
                    ((filtered_df['team_home'] == selected_team) & (filtered_df['team_away'] == opponent)) |
                    ((filtered_df['team_home'] == opponent) & (filtered_df['team_away'] == selected_team))
                ]
                
                total_games = len(team_games)
                if total_games > 0:
                    favorites_wins = len(team_games[team_games['favorite_won'] == True])
                    non_favorites_wins = total_games - favorites_wins
                    
                    favorite_pct = favorites_wins / total_games * 100
                    non_favorite_pct = non_favorites_wins / total_games * 100
                    
                    game_stats.append({
                        'team': opponent,
                        'total_games': total_games,
                        'favorite_pct': favorite_pct,
                        'non_favorite_pct': non_favorite_pct,
                        'favorites_wins': favorites_wins,
                        'non_favorites_wins': non_favorites_wins
                    })
            
            if not game_stats:
                fig = go.Figure()
                fig.update_layout(
                    title=f'No Data Available for {game_type}',
                    xaxis_title='Percentage (%)',
                    yaxis_title='Opponent Teams',
                    height=400,
                    margin=dict(l=100, r=50, t=50, b=50)
                )
            else:
                game_stats_sorted = sorted(game_stats, key=lambda x: x['total_games'], reverse=True)
                
                all_teams_sorted = [stats['team'] for stats in game_stats_sorted]
                favorite_percentages = [stats['favorite_pct'] for stats in game_stats_sorted]
                non_favorite_percentages = [stats['non_favorite_pct'] for stats in game_stats_sorted]
                total_games_list = [stats['total_games'] for stats in game_stats_sorted]
                favorites_wins_list = [stats['favorites_wins'] for stats in game_stats_sorted]
                non_favorites_wins_list = [stats['non_favorites_wins'] for stats in game_stats_sorted]
                
                non_favorite_hover_text = [f"Team: {selected_team} vs {team}<br>Non-Favorite Wins: {non_wins}/{total} ({non_pct:.1f}%)<br>Total Games: {total}" 
                                           for team, non_wins, total, non_pct in zip(all_teams_sorted, non_favorites_wins_list, total_games_list, non_favorite_percentages)]
                favorite_hover_text = [f"Team: {selected_team} vs {team}<br>Favorite Wins: {fav_wins}/{total} ({fav_pct:.1f}%)<br>Total Games: {total}" 
                                       for team, fav_wins, total, fav_pct in zip(all_teams_sorted, favorites_wins_list, total_games_list, favorite_percentages)]
                
                fig = go.Figure(data=[
                    go.Bar(name='Non-Favorite', y=all_teams_sorted, x=non_favorite_percentages,
                           marker_color='red', hoverinfo='text', hovertext=non_favorite_hover_text, orientation='h'),
                    go.Bar(name='Favorite', y=all_teams_sorted, x=favorite_percentages,
                           marker_color='green', hoverinfo='text', hovertext=favorite_hover_text, orientation='h')
                ])
                
                annotations = []
                for i, (team, total) in enumerate(zip(all_teams_sorted, total_games_list)):
                    annotations.append(dict(
                        x=1.02,
                        y=team,
                        xref='paper',
                        yref='y',
                        text=str(total),
                        showarrow=False,
                        font=dict(size=10, color='black'),
                        align='left',
                        xanchor='left',
                        yanchor='middle'
                    ))
                
                annotations.append(dict(
                    x=1.06,
                    y=0.5,
                    xref='paper',
                    yref='paper',
                    text='# of Games',
                    showarrow=False,
                    font=dict(size=12, color='black', weight='bold'),
                    textangle=-90,
                    align='center',
                    xanchor='center',
                    yanchor='middle'
                ))
                
                axis_title_style = dict(
                    size=12,
                    color='black',
                    weight='bold'
                )
                
                fig.update_layout(
                    title=f'{game_type} Favorite Wins Distribution for {selected_team} vs All Other Teams',
                    xaxis_title=dict(text='Percentage(%)', font=axis_title_style),
                    yaxis_title=dict(text='Opponent Teams', font=axis_title_style),
                    barmode='relative',
                    xaxis={'range': [0, 100]},
                    yaxis={'autorange': 'reversed'},
                    showlegend=True,
                    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
                    hovermode='closest',
                    height=800,
                    margin=dict(l=200, r=100),
                    shapes=[dict(
                        type='line',
                        x0=50,
                        x1=50,
                        y0=0,
                        y1=1,
                        xref='x',
                        yref='paper',
                        line=dict(color='black', width=4)
                    )],
                    annotations=annotations,
                    font=dict(family="Arial, sans-serif")
                )
    
    elif selected_visualization == 'Spread Coverage':
        if selected_team == 'Overall NFL':
            total_games = len(filtered_df)
            if total_games > 0:
                spread_coverages = len(filtered_df[filtered_df['favorite_covered'] == True])
                spread_not_covered = total_games - spread_coverages
                
                coverage_pct = spread_coverages / total_games * 100
                no_coverage_pct = spread_not_covered / total_games * 100
                
                fig = go.Figure(data=[
                    go.Bar(name='Not Covered', x=['Overall NFL'], y=[no_coverage_pct],
                           marker_color='red', text=f'Not Covered: {spread_not_covered}/{total_games} ({no_coverage_pct:.1f}%)'),
                    go.Bar(name='Covered', x=['Overall NFL'], y=[coverage_pct],
                           marker_color='green', text=f'Covered: {spread_coverages}/{total_games} ({coverage_pct:.1f}%)')
                ])
                
                fig.update_layout(
                    title=f'{game_type} Spread Coverage Distribution for Overall NFL',
                    xaxis_title='Category',
                    yaxis_title='Percentage (%)',
                    barmode='group',
                    showlegend=True,
                    height=400,
                    margin=dict(l=100, r=50, t=50, b=50)
                )
            else:
                fig = go.Figure()
                fig.update_layout(
                    title=f'No Data Available for {game_type}',
                    xaxis_title='Category',
                    yaxis_title='Percentage (%)',
                    height=400,
                    margin=dict(l=100, r=50, t=50, b=50)
                )
        
        else:
            all_teams = pd.concat([filtered_df['team_home'], filtered_df['team_away']]).unique()
            all_teams = [team for team in sorted(all_teams) if team != selected_team]

            game_stats = []
            for opponent in all_teams:
                team_games = filtered_df[
                    ((filtered_df['team_home'] == selected_team) & (filtered_df['team_away'] == opponent)) |
                    ((filtered_df['team_home'] == opponent) & (filtered_df['team_away'] == selected_team))
                ]
                
                total_games = len(team_games)
                if total_games > 0:
                    spread_coverages = len(team_games[team_games['favorite_covered'] == True])
                    no_coverage = total_games - spread_coverages
                    
                    coverage_pct = spread_coverages / total_games * 100
                    no_coverage_pct = no_coverage / total_games * 100
                    
                    game_stats.append({
                        'team': opponent,
                        'total_games': total_games,
                        'coverage_pct': coverage_pct,
                        'no_coverage_pct': no_coverage_pct,
                        'spread_coverages': spread_coverages,
                        'no_coverage': no_coverage
                    })
            
            if not game_stats:
                fig = go.Figure()
                fig.update_layout(
                    title=f'No Data Available for {game_type}',
                    xaxis_title='Percentage (%)',
                    yaxis_title='Opponent Teams',
                    height=400,
                    margin=dict(l=100, r=50, t=50, b=50)
                )
            else:
                game_stats_sorted = sorted(game_stats, key=lambda x: x['total_games'], reverse=True)
                
                all_teams_sorted = [stats['team'] for stats in game_stats_sorted]
                coverage_percentages = [stats['coverage_pct'] for stats in game_stats_sorted]
                no_coverage_percentages = [stats['no_coverage_pct'] for stats in game_stats_sorted]
                total_games_list = [stats['total_games'] for stats in game_stats_sorted]
                spread_coverages_list = [stats['spread_coverages'] for stats in game_stats_sorted]
                no_coverage_list = [stats['no_coverage'] for stats in game_stats_sorted]
                
                no_coverage_hover_text = [f"Team: {selected_team} vs {team}<br>Not Covered: {no_cov}/{total} ({no_cov_pct:.1f}%)<br>Total Games: {total}" 
                                          for team, no_cov, total, no_cov_pct in zip(all_teams_sorted, no_coverage_list, total_games_list, no_coverage_percentages)]
                coverage_hover_text = [f"Team: {selected_team} vs {team}<br>Covered: {cov}/{total} ({cov_pct:.1f}%)<br>Total Games: {total}" 
                                       for team, cov, total, cov_pct in zip(all_teams_sorted, spread_coverages_list, total_games_list, coverage_percentages)]
                
                fig = go.Figure(data=[
                    go.Bar(name='Not Covered', y=all_teams_sorted, x=no_coverage_percentages,
                           marker_color='red', hoverinfo='text', hovertext=no_coverage_hover_text, orientation='h'),
                    go.Bar(name='Covered', y=all_teams_sorted, x=coverage_percentages,
                           marker_color='green', hoverinfo='text', hovertext=coverage_hover_text, orientation='h')
                ])
                
                annotations = []
                for i, (team, total) in enumerate(zip(all_teams_sorted, total_games_list)):
                    annotations.append(dict(
                        x=1.02,
                        y=team,
                        xref='paper',
                        yref='y',
                        text=str(total),
                        showarrow=False,
                        font=dict(size=10, color='black'),
                        align='left',
                        xanchor='left',
                        yanchor='middle'
                    ))
                
                annotations.append(dict(
                    x=1.06,
                    y=0.5,
                    xref='paper',
                    yref='paper',
                    text='# of Games',
                    showarrow=False,
                    font=dict(size=12, color='black', weight='bold'),
                    textangle=-90,
                    align='center',
                    xanchor='center',
                    yanchor='middle'
                ))
                
                axis_title_style = dict(
                    size=12,
                    color='black',
                    weight='bold'
                )
                
                fig.update_layout(
                    title=f'{game_type} Spread Coverage Distribution for {selected_team} vs All Other Teams',
                    xaxis_title=dict(text='Percentage(%)', font=axis_title_style),
                    yaxis_title=dict(text='Opponent Teams', font=axis_title_style),
                    barmode='relative',
                    xaxis={'range': [0, 100]},
                    yaxis={'autorange': 'reversed'},
                    showlegend=True,
                    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
                    hovermode='closest',
                    height=800,
                    margin=dict(l=200, r=100),
                    shapes=[dict(
                        type='line',
                        x0=50,
                        x1=50,
                        y0=0,
                        y1=1,
                        xref='x',
                        yref='paper',
                        line=dict(color='black', width=4)
                    )],
                    annotations=annotations,
                    font=dict(family="Arial, sans-serif")
                )

    return fig

if __name__ == '__main__':
    app.run_server(debug=True, port=8058)


## TYPES ON OUTCOMES

In [100]:
# Initialize the Dash app
app = dash.Dash(__name__)


# Layout of the app
app.layout = html.Div([
    html.H1("NFL Analysis Dashboard"),
    
    dcc.Dropdown(
        id='visualization-dropdown',
        options=[
            {'label': 'Over/Under', 'value': 'Over/Under'},
            {'label': 'Favorite', 'value': 'Favorite'},
            {'label': 'Spread Coverage', 'value': 'Spread Coverage'}
        ],
        value='Over/Under',
        clearable=False
    ),
    
    dcc.Dropdown(
        id='type-dropdown',
        options=[
            {'label': 'Weather Type', 'value': 'Weather Type'},
            {'label': 'Stadium Type', 'value': 'Stadium Type'}
        ],
        value='Weather Type',
        clearable=False
    ),
    
    dcc.RadioItems(
        id='playoff-toggle',
        options=[
            {'label': 'Regular Season', 'value': 'False'},
            {'label': 'Playoffs', 'value': 'True'}
        ],
        value='False',
        labelStyle={'display': 'inline-block', 'marginRight': 20}
    ),
    
    dcc.Graph(id='bar-chart')
])

@app.callback(
    Output('bar-chart', 'figure'),
    [Input('visualization-dropdown', 'value'),
     Input('type-dropdown', 'value'),
     Input('playoff-toggle', 'value')]
)
def update_graph(selected_visualization, selected_type, is_playoff):
    # Filter the DataFrame based on playoff status
    filtered_df = final_df[final_df['schedule_playoff'].astype(str) == is_playoff]
    
    game_type = "Playoff" if is_playoff == 'True' else "Regular Season"
    
    # Determine which column to use for the dropdown
    if selected_type == 'Weather Type':
        column = 'stadium_weather_type'
    else:
        column = 'stadium_type'
    
    type_options = filtered_df[column].unique()
    
    fig = go.Figure()

    if selected_visualization == 'Over/Under':
        type_counts = filtered_df.groupby([column, 'over_under_result']).size().unstack(fill_value=0)
        type_percentages = (type_counts.T / type_counts.sum(axis=1)).T * 100
        
        # Adding green (Over) on the right and red (Under) on the left
        if 'Over' in type_percentages.columns:
            fig.add_trace(go.Bar(
                y=type_percentages.index,
                x=type_percentages['Over'],
                name='Over',
                marker_color='green',
                orientation='h',
                text=[f'{x:,}' for x in type_counts['Over']],
                textposition='outside',
                hovertemplate=[
                    f'Over: {x:,}<br>Total: {type_counts.sum(axis=1).loc[y]:,}<extra></extra>' 
                    for x, y in zip(type_counts['Over'], type_percentages.index)
                ]
            ))
        if 'Under' in type_percentages.columns:
            fig.add_trace(go.Bar(
                y=type_percentages.index,
                x=type_percentages['Under'],
                name='Under',
                marker_color='red',
                orientation='h',
                text=[f'{x:,}' for x in type_counts['Under']],
                textposition='outside',
                hovertemplate=[
                    f'Under: {x:,}<br>Total: {type_counts.sum(axis=1).loc[y]:,}<extra></extra>' 
                    for x, y in zip(type_counts['Under'], type_percentages.index)
                ]
            ))
        
        fig.update_layout(
            title=f'{game_type} Over/Under Distribution by {selected_type}',
            xaxis_title='Percentage (%)',
            yaxis_title=selected_type,
            barmode='stack',
            showlegend=True,
            height=400,
            margin=dict(l=100, r=200, t=50, b=50),  # Adjusted space for the total number of games
            shapes=[
                dict(
                    type='line',
                    x0=50,
                    x1=50,
                    y0=0,
                    y1=1,
                    xref='x',
                    yref='paper',
                    line=dict(color='black', width=4)
                )
            ],
            annotations=[
                dict(
                    x=1.05,  # Position the annotation slightly to the right of the chart
                    y=0.5,
                    xref='paper',
                    yref='paper',
                    text=f'# of Games: {filtered_df.shape[0]}',
                    showarrow=False,
                    font=dict(size=14, color='black'),
                    align='left',
                    xanchor='left',
                    yanchor='middle'
                )
            ]
        )
    
    elif selected_visualization == 'Favorite':
        type_counts = filtered_df.groupby([column, 'favorite_won']).size().unstack(fill_value=0)
        type_percentages = (type_counts.T / type_counts.sum(axis=1)).T * 100
        
        # Adding green (Favorite Won) on the right and red (Favorite Lost) on the left
        if True in type_percentages.columns:
            fig.add_trace(go.Bar(
                y=type_percentages.index,
                x=type_percentages[True],
                name='Favorite Won',
                marker_color='green',
                orientation='h',
                text=[f'{x:,}' for x in type_counts[True]],
                textposition='outside',
                hovertemplate=[
                    f'Favorite Won: {x:,}<br>Total: {type_counts.sum(axis=1).loc[y]:,}<extra></extra>' 
                    for x, y in zip(type_counts[True], type_percentages.index)
                ]
            ))
        if False in type_percentages.columns:
            fig.add_trace(go.Bar(
                y=type_percentages.index,
                x=type_percentages[False],
                name='Favorite Lost',
                marker_color='red',
                orientation='h',
                text=[f'{x:,}' for x in type_counts[False]],
                textposition='outside',
                hovertemplate=[
                    f'Favorite Lost: {x:,}<br>Total: {type_counts.sum(axis=1).loc[y]:,}<extra></extra>' 
                    for x, y in zip(type_counts[False], type_percentages.index)
                ]
            ))
        
        fig.update_layout(
            title=f'{game_type} Favorite Wins Distribution by {selected_type}',
            xaxis_title='Percentage (%)',
            yaxis_title=selected_type,
            barmode='stack',
            showlegend=True,
            height=400,
            margin=dict(l=100, r=200, t=50, b=50),  # Adjusted space for the total number of games
            shapes=[
                dict(
                    type='line',
                    x0=50,
                    x1=50,
                    y0=0,
                    y1=1,
                    xref='x',
                    yref='paper',
                    line=dict(color='black', width=4)
                )
            ],
            annotations=[
                dict(
                    x=1.05,  # Position the annotation slightly to the right of the chart
                    y=0.5,
                    xref='paper',
                    yref='paper',
                    text=f'# of Games: {filtered_df.shape[0]}',
                    showarrow=False,
                    font=dict(size=14, color='black'),
                    align='left',
                    xanchor='left',
                    yanchor='middle'
                )
            ]
        )
    
    elif selected_visualization == 'Spread Coverage':
        type_counts = filtered_df.groupby([column, 'favorite_covered']).size().unstack(fill_value=0)
        type_percentages = (type_counts.T / type_counts.sum(axis=1)).T * 100
        
        # Adding green (Covered) on the right and red (Not Covered) on the left
        if True in type_percentages.columns:
            fig.add_trace(go.Bar(
                y=type_percentages.index,
                x=type_percentages[True],
                name='Covered',
                marker_color='green',
                orientation='h',
                text=[f'{x:,}' for x in type_counts[True]],
                textposition='outside',
                hovertemplate=[
                    f'Covered: {x:,}<br>Total: {type_counts.sum(axis=1).loc[y]:,}<extra></extra>' 
                    for x, y in zip(type_counts[True], type_percentages.index)
                ]
            ))
        if False in type_percentages.columns:
            fig.add_trace(go.Bar(
                y=type_percentages.index,
                x=type_percentages[False],
                name='Not Covered',
                marker_color='red',
                orientation='h',
                text=[f'{x:,}' for x in type_counts[False]],
                textposition='outside',
                hovertemplate=[
                    f'Not Covered: {x:,}<br>Total: {type_counts.sum(axis=1).loc[y]:,}<extra></extra>' 
                    for x, y in zip(type_counts[False], type_percentages.index)
                ]
            ))
        
        fig.update_layout(
            title=f'{game_type} Spread Coverage Distribution by {selected_type}',
            xaxis_title='Percentage (%)',
            yaxis_title=selected_type,
            barmode='stack',
            showlegend=True,
            height=400,
            margin=dict(l=100, r=200, t=50, b=50),  # Adjusted space for the total number of games
            shapes=[
                dict(
                    type='line',
                    x0=50,
                    x1=50,
                    y0=0,
                    y1=1,
                    xref='x',
                    yref='paper',
                    line=dict(color='black', width=4)
                )
            ],
            annotations=[
                dict(
                    x=1.05,  # Position the annotation slightly to the right of the chart
                    y=0.5,
                    xref='paper',
                    yref='paper',
                    text=f'# of Games: {filtered_df.shape[0]}',
                    showarrow=False,
                    font=dict(size=14, color='black'),
                    align='left',
                    xanchor='left',
                    yanchor='middle'
                )
            ]
        )

    return fig

if __name__ == '__main__':
    app.run_server(debug=True, port=8058)
