In [348]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import random
import seaborn as sns
import math
import re
%matplotlib inline
pd.set_option('display.max_columns', 100)

#### Import Gambling Statistics from ...

In [349]:
df = pd.read_csv('data/spreadspoke_scores.csv')

# General Cleaning

#### Drop all rows not contains an Over/Under Value

In [350]:
df = df.dropna(subset=['over_under_line'])

#### Convert the "Game Spread" to an Positive number 

In [351]:
df["spread_favorite"] = abs(df['spread_favorite'])

#### Add "Total" column for the Total Amount of Points Scored in a given game. Using Away Points Score  + Home Points Score

In [352]:
df['total'] = df['score_home'] + df['score_away'] 
# Change to an Interger
df = df.astype({'total': 'int'})

#### Drop all Rows without an Over/Under Line

In [353]:
df = df[df.over_under_line != ' ']
# Change to a Float
df = df.astype({'over_under_line': 'float'})

#### Create the Target Variable, Over/Under Result Column
* ##### 'Over' if the Total is greater than the Over/Under Line 
* ##### 'Under' if the Total is less than the Over/Under Line

In [354]:
df['over_under_result'] = np.where(df['total'] > df['over_under_line'], 'Over', 'Under')

#### Update 'Washington Redskins' team name, to 'Washington Football team'

In [355]:
df['team_home'] = np.where(df['team_home'] == 'Washington Redskins', 'Washington Football Team', df['team_home'])
df['team_away'] = np.where(df['team_away'] == 'Washington Redskins', 'Washington Football Team', df['team_away'])

#### Correct all Alternative Names of Stadiums

In [356]:
df['stadium_name'] = df['stadium']
df = df.drop(columns=['stadium'])

# Create List of Stadium Names
stadium_list = list(df['stadium_name'])

# Create List of Hard Rock Stadium alternative stadium names
hard_rock_stadium_alternates = ['Joe Robbie Stadium','Pro Player Stadium','Dolphin Stadium']

# Create for loop that Changes all alternative name to 'Hard Rock Stadium'
for i, stadium in enumerate(stadium_list):
    if stadium in hard_rock_stadium_alternates:
        stadium_list[i] = 'Hard Rock Stadium'
        
# Update Column Row with new stadium list with Hard Rock Stadium instead of Alternatives
df['stadium_name'] = stadium_list

# Change other independent events of Stadium name changes
df['stadium_name'] = np.where(df['stadium_name'] == 'Tampa Stadium', 'Raymond James Stadium',df['stadium_name'])
df['stadium_name'] = np.where(df['stadium_name'] == 'Alltel Stadium', 'TIAA Bank Field',df['stadium_name'])
df['stadium_name'] = np.where(df['stadium_name'] == 'Jack Murphy Stadium', 'Qualcomm Stadium',df['stadium_name'])

#### Important Stadium Data from .....

In [357]:
stadium_df = pd.read_csv('data/nfl_stadiums.csv')

# Merge Stadium Data to the Final DataFrame
df = df.merge(stadium_df,on='stadium_name',how='left')

#Drop Uneeded Columns
df = df.drop(columns=['stadium_close','NAME'])

#### Get Stadium Locations

In [358]:
# Manually Add Stadium Locations
df['stadium_location'] = np.where(df['stadium_name'] == 'FedEx Field', 'Landover, MD',df['stadium_location'])
df['stadium_location'] = np.where(df['stadium_name'] == 'TIAA Bank Field', 'Jacksonville, FL',df['stadium_location'])

# Drop all Nulls
df = df.dropna(subset=['stadium_location'])

# Drop Stadiums outside the U.S.
df = df.drop([6168,9219,9484,10021,7118,7366,7576,7828,8203,8437,9158,9424,9435])

#### Drop Games Played before 1978

In [359]:
df = df[df.schedule_season > 1978]

#### Correct the Data the Stadium Opened

In [363]:
df[df['stadium_open'].isna()].stadium_name.value_counts()

Series([], Name: stadium_name, dtype: int64)

In [364]:
# Manually change Open Years
df['stadium_open'] = np.where(df['stadium_name'] == 'FedEx Field', 1997,df['stadium_open'])
df['stadium_open'] = np.where(df['stadium_name'] == 'TIAA Bank Field', 1994,df['stadium_open'])
df['stadium_open'] = np.where(df['stadium_name'] == 'Rose Bowl', 1921,df['stadium_open'])
df['stadium_open'] = np.where(df['stadium_name'] == 'Alamo Dome', 1993,df['stadium_open'])
df['stadium_open'] = np.where(df['stadium_name'] == 'Stanford Stadium', 1921,df['stadium_open'])

# Change to Integer
df = df.astype({'stadium_open': 'int'})

#### Fix the Type of Stadium

In [365]:
df['stadium_type'] = np.where(df['stadium_name'] == 'FedEx Field', 'outdoor',df['stadium_type'])
df['stadium_type'] = np.where(df['stadium_name'] == 'TIAA Bank Field', 'outdoor',df['stadium_type'])
df['stadium_type'] = np.where(df['stadium_name'] == 'Stanford Stadium', 'outdoor',df['stadium_type'])

#### Get the Zip Codes for all Stadiums

In [366]:
# Get Zipcodes by splitting last 5 elements from Address Values
df['zipcode'] = df['stadium_address'].str[-5:]

# Adding Zip Codes to Stadiums Without Address
df['zipcode'] = np.where(df['stadium_name'] == 'Rose Bowl', 91103,df['zipcode'])
df['zipcode'] = np.where(df['stadium_name'] == 'Stanford Stadium', 94305,df['zipcode'])
df['zipcode'] = np.where(df['stadium_name'] == 'FedEx Field', 20785,df['zipcode'])
df['zipcode'] = np.where(df['stadium_name'] == 'TIAA Bank Field', 32202,df['zipcode'])
df['zipcode'] = np.where(df['stadium_name'] == 'Mercedes-Benz Stadium', 30313,df['zipcode'])
df['zipcode'] = np.where(df['stadium_name'] == 'SoFi Stadium', 90301,df['zipcode'])
df['zipcode'] = np.where(df['stadium_name'] == 'Allegiant Stadium', 89118,df['zipcode'])

# Drop Stadiums outside the U.S.
df = df[df.stadium_name != 'Wembley Stadium']

#### Fix the Surface Type for every Stadium

In [368]:
# Change other names for Turf and Grass
df['stadium_surface'] = np.where(df['stadium_surface'] == 'Hellas Matrix Turf', 'FieldTurf',df['stadium_surface'])
df['stadium_surface'] = np.where(df['stadium_surface'] == 'Grass, Turf (1971-1974)', 'Grass',df['stadium_surface'])

# Make corresponding list from the stadium name & stadium surface columns
stadium_list = list(df['stadium_name'])
surface_list = list(df['stadium_surface'])
# Create list for All Stadiums with Turf Surfaces
turf_stadiums = ['Giants Stadium','Texas Stadium','Hubert H. Humphrey Metrodome','RCA Dome','Veterans Stadium',
                'Foxboro Stadium','Pontiac Silverdome','Three Rivers Stadium','Edward Jones Dome','Cinergy Field',
                'Seattle Kingdome','Houston Astrodome','Busch Memorial Stadium','Mall of America Field',
                'Husky Stadium',]
# Create for loops to add Correct Surface for every Stadium in turf_stadium list
for i, stadium in enumerate(stadium_list):
    # Get the Index for the Stadium if the Stadium is in Turf Stadium List
    if stadium in turf_stadiums:
        surface_list[i] = 'FieldTurf'
    # Use the index to change the corresponding Surface Type
df['stadium_surface'] = surface_list

# Change all the Stadiums that dont have Turf to Grass values
df['stadium_surface'] = np.where(df['stadium_surface'] == 'FieldTurf', 'FieldTurf', 'Grass')

In [369]:
df[df['stadium_surface'] == 'FieldTurf']['stadium_name'].unique()

array(['Ralph Wilson Stadium', 'Louisiana Superdome', 'Giants Stadium',
       'Veterans Stadium', 'Seattle Kingdome', 'Busch Memorial Stadium',
       'Foxboro Stadium', 'Pontiac Silverdome', 'Three Rivers Stadium',
       'Cinergy Field', 'Texas Stadium', 'Houston Astrodome',
       'Hubert H. Humphrey Metrodome', 'RCA Dome', 'Georgia Dome',
       'Husky Stadium', 'Edward Jones Dome', 'M&T Bank Stadium',
       'Paul Brown Stadium', 'Gillette Stadium', 'CenturyLink Field',
       'Ford Field', 'Alamo Dome', 'Lucas Oil Stadium', 'Cowboys Stadium',
       'Mall of America Field', 'MetLife Stadium',
       'Mercedes-Benz Superdome', 'New Era Field', 'U.S. Bank Stadium',
       'AT&T Stadium', 'Mercedes-Benz Stadium', 'SoFi Stadium'],
      dtype=object)

#### Import Stadium Capacities

In [385]:
# Manually Import Each Stadium Capacity

df['stadium_capacity'] = np.where(df['stadium_name'] == 'Giants Stadium',80242,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Candlestick Park',69732,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Sun Life Stadium',64767,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Texas Stadium',65675,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Hubert H. Humphrey Metrodome', 64121,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'RCA Dome',60567,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Veterans Stadium',65352,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'FedEx Field', 82000,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Foxboro Stadium',60292,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Pontiac Silverdome',80311,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Mile High Stadium', 75000,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Three Rivers Stadium',59000,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Edward Jones Dome', 67277,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Cinergy Field', 59754,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Seattle Kingdome',66000,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == "Houlihan's Stadium", 50000,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Houston Astrodome', 65000,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'RFK Memorial Stadium',45596,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Cleveland Municipal Stadium', 81000,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Anaheim Stadium', 69008,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Atlanta-Fulton County Stadium', 60606,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Busch Memorial Stadium', 60000,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Orange Bowl', 75000,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Memorial Stadium (Baltimore)', 50000,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Sun Devil Stadium', 53599,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Mall of America Field', 64121,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Metropolitan Stadium', 41200,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Wembley Stadium', 86000,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Husky Stadium', 70000,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'TCF Bank Stadium', 50805,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'TIAA Bank Field', 67814,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == "Memorial Stadium (Champaign)", 60670,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == "Memorial Stadium (Clemson)", 74000,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Liberty Bowl Memorial Stadium', 58325,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Vanderbilt Stadium', 40550,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Rose Bowl', 92542,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == "Tiger Stadium (LSU)", 100000,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Tulane Stadium', 70000,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Stanford Stadium', 50000,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Rice Stadium', 47000,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Tulane Stadium', 70000,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Stanford Stadium', 50000,df['stadium_capacity'])
df['stadium_capacity'] = np.where(df['stadium_name'] == 'Rice Stadium', 47000,df['stadium_capacity'])

# Take out comma's from the Stadium Capacity
df['stadium_capacity'] = df.stadium_capacity.replace(',','', regex=True)
# Stadium Capacity to Integer
df = df.astype({'stadium_capacity': 'int'})

In [386]:
#df[df['stadium_capacity'].isna()].stadium_name.value_counts()

#### Complete Column for which Week of a Season a given game was played

In [378]:
# Week 1-17 Regular Season Games
# Week 18 and Above Playoff Games

# Wildcard Playoff Games set as Week 18
df['schedule_week'] = np.where(df['schedule_week'] == "Wildcard", 18,df['schedule_week'])
df['schedule_week'] = np.where(df['schedule_week'] == "WildCard", 18,df['schedule_week'])
# Divional Playoff Games set as Week 19
df['schedule_week'] = np.where(df['schedule_week'] == 'Division', 19,df['schedule_week'])
# Conference Championship Games set as Week 20
df['schedule_week'] = np.where(df['schedule_week'] == 'Conference', 20,df['schedule_week'])
# Superbowl Championship Games set as Week 21
df['schedule_week'] = np.where(df['schedule_week'] == 'Superbowl', 21,df['schedule_week'])
df['schedule_week'] = np.where(df['schedule_week'] == 'SuperBowl', 21,df['schedule_week'])

# Change Week Number to Integers 
df = df.astype({'schedule_week': 'int'})

#### Get the Day of the Week a game was played 

In [382]:
# converting the string to datetime format
import datetime
df['schedule_date'] = pd.to_datetime(df['schedule_date'])


# Get Day of the Week Game was Played
# 0 = Monday, 
# 1 = Tuesday
# 2 = Wednesday
# 3 = Thursday
# 4 = Friday
# 5 = Saturday
# 6 = Sunday
df['weekday'] = df['schedule_date'].dt.dayofweek

### Getting Distance Between Home & Away Team

#### Start with creating a column for Away Team Zipcodes

In [383]:
# To get an accurate Zipcode, get dataframe from early in the week & not a neutral stadium.
early = df[(df.schedule_week < 4)&(df.stadium_neutral == False)]

# Get a Dataframe of just the Teams and Their Corresponding Zipcodes
zip_df = early[['team_home', 'zipcode']]


# Change Column Names to use as Zipcode when Teams Away 
zip_df.columns = ['team_away','zipcode_away']

# Drop Duplicates so Teams only have one Zipcode per Season
zip_df = zip_df.drop_duplicates(subset = 'team_away')

## Merge Away Zip Codes to Original Dataset ##
df = df.merge(zip_df,how='left')


#### Create a Function to get distance between Two Zip Codes

In [384]:
## Install The Needed Packages ##
#!pip install uszipcode
#!pip install mpu

from uszipcode import SearchEngine
import mpu

# Zipcode Reader note Reading zipcode of Tempe,AZ so changed to Scottsdale,AZ
df['zipcode'] = np.where(df['zipcode'] == '85287','85054',df['zipcode'])
df['zipcode_away'] = np.where(df['zipcode_away'] == '85287','85054',df['zipcode_away'])

# Instantiate Zip Code Reader
search = SearchEngine(simple_zipcode=True)

# Define Function that Takes two Zip Codes and Returns Distance between the Zip Codes
def get_dist(zipcode_1,zipcode_2):
    # Get Zip Code 1 Latitude and Longitude
    zip1 = search.by_zipcode(zipcode_1)
    lat1 = zip1.lat
    long1 = zip1.lng
    # Get Zip Code 2 Latitude and Longitude
    zip2 = search.by_zipcode(zipcode_2)
    lat2 = zip2.lat
    long2 = zip2.lng
    # Return Distance
    return mpu.haversine_distance((lat1,long1),(lat2,long2))

# Create Column Taking in the Zip Code of the Home Team and Zip Code of the Away Team. 
# Returning a column returning the distance between the Two Zip Codes
df['dist_diff'] = df[['zipcode', 'zipcode_away']].apply(lambda x: get_dist(*x), axis = 1)

### Add Team Data

#### Import Data from ....

In [28]:
team_df = pd.read_csv('data/teams_df.csv')

#### Chan 

In [None]:
team_df['Tm'] = np.where(team_df['Tm'] == 'Washington Redskins', 'Washington Football Team',team_df['Tm'])

In [29]:
home_team_df = team_df
away_team_df = team_df

In [30]:
# Get Home Team Season Data & Merge to Final Dataframe
home_team_df['schedule_season'] = home_team_df['Season_Year'] + 1 
home_team_df['T'] = home_team_df['T'].fillna(0)
home_team_df['PF_per_game'] = home_team_df['PF']/(home_team_df['W'] + home_team_df['L'] + home_team_df['T'])
home_team_df['PA_per_game'] = home_team_df['PA']/(home_team_df['W'] + home_team_df['L'] + home_team_df['T'])
home_team_df = home_team_df.drop(columns=['Season_Year', 'W', 'L', 'T','PF','PA'])

col_names = list(home_team_df.columns)
home_col_names = ["home_" + col for col in col_names]
home_team_df.columns = home_col_names
home_team_df['team_home'] = home_team_df['home_Tm']
home_team_df['schedule_season'] = home_team_df['home_schedule_season']
home_team_df = home_team_df.drop(columns=['home_Tm','home_schedule_season'])

final_df = df.merge(home_team_df,how='left')

In [31]:
# Get Away Team Season Data & Merge to Final Dataframe
away_team_df['schedule_season'] = away_team_df['Season_Year'] + 1
away_team_df['T'] = away_team_df['T'].fillna(0)
away_team_df['PF_per_game'] = away_team_df['PF']/(away_team_df['W'] + away_team_df['L'] + away_team_df['T'])
away_team_df['PA_per_game'] = away_team_df['PA']/(away_team_df['W'] + away_team_df['L'] + away_team_df['T'])
away_team_df = away_team_df.drop(columns=['Season_Year', 'W', 'L', 'T','PF','PA'])

col_names = list(away_team_df.columns)
away_col_names = ["away_" + col for col in col_names]
away_team_df.columns = away_col_names
away_team_df['team_away'] = away_team_df['away_Tm']
away_team_df['schedule_season'] = away_team_df['away_schedule_season']
away_team_df = away_team_df.drop(columns=['away_Tm','away_schedule_season'])

final_df = final_df.merge(away_team_df,how='left')

In [32]:
#### Will Amend if there is Time #### Fix Outdate Team Names

#final_df[final_df['home_W-L%'].isna()].team_home.value_counts()

final_df = final_df.dropna(subset=['home_W-L%'])
final_df = final_df.dropna(subset=['away_W-L%'])

# Add Superbowl Odds

In [33]:
superbowl_odds_df = pd.read_csv('data/superbowl_odds.csv')

# Fix Outdated Team Names
superbowl_odds_df['Tm'] = np.where(superbowl_odds_df['Tm'] == 'Washington Redskins', 'Washington Football Team',superbowl_odds_df['Tm'])



In [34]:
# Get Superbowls Odds for Home and Away Teams

home_superbowl_odds_df = superbowl_odds_df
away_superbowl_odds_df = superbowl_odds_df

In [35]:
# Get Home Superbowl Odds Data & Merge to Final Dataframe

home_superbowl_odds_df.columns = ['team_home', 'home_superbowl_odds','schedule_season']
final_df = final_df.merge(home_superbowl_odds_df,how='left')

In [36]:
# Get Home Superbowl Odds Data & Merge to Final Dataframe

away_superbowl_odds_df.columns = ['team_away', 'away_superbowl_odds','schedule_season']
final_df = final_df.merge(away_superbowl_odds_df,how='left')

In [37]:
#### Will Amend if there is Time, For now drop 2020 data

final_df = final_df.dropna(subset=['home_superbowl_odds'])

In [38]:
final_df

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail,total,over_under_result,stadium_name,stadium_location,stadium_open,stadium_type,stadium_address,stadium_weather_station_code,stadium_weather_type,stadium_capacity,stadium_surface,STATION,LATITUDE,LONGITUDE,ELEVATION,zipcode,zipcode_away,dist_diff,weekday,home_W-L%,home_PD,home_MoV,home_SoS,home_SRS,home_OSRS,home_DSRS,home_PF_per_game,home_PA_per_game,away_W-L%,away_PD,away_MoV,away_SoS,away_SRS,away_OSRS,away_DSRS,away_PF_per_game,away_PA_per_game,home_superbowl_odds,away_superbowl_odds
0,1979-09-01,1979,1,False,Tampa Bay Buccaneers,31.0,16.0,Detroit Lions,TB,3.0,30.0,False,79.0,9.0,87,,47,Over,Houlihan's Stadium,"Tampa, FL",1969,outdoor,"4201 North Dale Mabry Highway, Tampa, Florida ...",33607,warm,50000,Grass,USW00012842,27.96194,-82.5403,5.8,33607,48342,1633.703354,5,0.313,-18.0,-1.1,-2.2,-3.3,-4.1,0.8,15.0625,16.1875,0.438,-10.0,-0.6,-1.2,-1.8,-0.1,-1.7,18.1250,18.7500,25000.0,3500.0
1,1979-09-02,1979,1,False,Buffalo Bills,7.0,9.0,Miami Dolphins,MIA,5.0,39.0,False,74.0,15.0,74,,16,Under,Ralph Wilson Stadium,"Orchard Park, NY",1973,outdoor,"1 Bills Dr, Orchard Park, NY 14127",14127,cold,73967,FieldTurf,US1NYER0093,42.88900,-78.8901,178.0,14127,33125,1886.643589,6,0.313,-52.0,-3.3,0.0,-3.3,-0.1,-3.2,18.8750,22.1250,0.688,118.0,7.4,0.2,7.6,4.2,3.3,23.2500,15.8750,30000.0,900.0
2,1979-09-02,1979,1,False,Chicago Bears,6.0,3.0,Green Bay Packers,CHI,3.0,31.0,False,78.0,11.0,68,,9,Under,Soldier Field,"Chicago, IL",1926,outdoor,"1410 Museum Campus Dr, Chicago, IL 60605",60605,cold,61500,Grass,USC00111550,41.85580,-87.6094,177.7,60605,54304,293.605345,6,0.438,-21.0,-1.3,-1.1,-2.4,-2.7,0.3,15.8125,17.1250,0.531,-20.0,-1.3,0.7,-0.6,-2.2,1.6,15.5625,16.8125,4000.0,10000.0
3,1979-09-02,1979,1,False,Denver Broncos,10.0,0.0,Cincinnati Bengals,DEN,3.0,31.5,False,69.0,6.0,38,,10,Under,Mile High Stadium,"Denver, CO",1960,outdoor,"1701 Bryant St, Denver, CO 80204",80204,cold,75000,Grass,USW00023062,39.76330,-104.8694,1611.2,80204,45202,1760.923990,6,0.625,84.0,5.3,-0.2,5.0,-1.9,6.9,17.6250,12.3750,0.250,-32.0,-2.0,0.7,-1.3,-2.7,1.3,15.7500,17.7500,1200.0,2500.0
4,1979-09-02,1979,1,False,Kansas City Chiefs,14.0,0.0,Baltimore Colts,KC,1.0,37.0,False,76.0,8.0,71,,14,Under,Arrowhead Stadium,"Kansas City, MO",1972,outdoor,"1 Arrowhead Dr, Kansas City, MO 64129",64129,cold,76416,Grass,US1MOJC0028,39.06920,-94.4871,264.9,64129,21218,1540.452209,6,0.250,-84.0,-5.3,0.9,-4.4,-3.0,-1.4,15.1875,20.4375,0.313,-182.0,-11.4,2.6,-8.8,-2.8,-6.0,14.9375,26.3125,75000.0,3500.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9840,2020-01-12,2019,19,True,Green Bay Packers,28.0,23.0,Seattle Seahawks,GB,4.5,45.5,False,,,,,51,Over,Lambeau Field,"Green Bay, WI",1957,outdoor,"1265 Lombardi Ave, Green Bay, WI 54304",54304,cold,80735,Grass,USW00014898,44.47940,-88.1366,209.4,54304,98104,2644.782031,6,0.406,-24.0,-1.5,-1.2,-2.7,0.0,-2.7,23.5000,25.0000,0.625,81.0,5.1,-0.6,4.5,3.0,1.5,26.7500,21.6875,2000.0,2000.0
9841,2020-01-12,2019,19,True,Kansas City Chiefs,51.0,31.0,Houston Texans,KC,10.0,50.5,False,,,,,82,Over,Arrowhead Stadium,"Kansas City, MO",1972,outdoor,"1 Arrowhead Dr, Kansas City, MO 64129",64129,cold,76416,Grass,US1MOJC0028,39.06920,-94.4871,264.9,64129,77054,1045.221204,6,0.750,144.0,9.0,-0.1,8.9,12.6,-3.8,35.3125,26.3125,0.688,86.0,5.4,-1.5,3.8,2.4,1.4,25.1250,19.7500,600.0,5000.0
9842,2020-01-19,2019,20,True,Kansas City Chiefs,35.0,24.0,Tennessee Titans,KC,7.0,51.0,False,,,,,59,Over,Arrowhead Stadium,"Kansas City, MO",1972,outdoor,"1 Arrowhead Dr, Kansas City, MO 64129",64129,cold,76416,Grass,US1MOJC0028,39.06920,-94.4871,264.9,64129,37213,752.899963,6,0.750,144.0,9.0,-0.1,8.9,12.6,-3.8,35.3125,26.3125,0.563,7.0,0.4,-0.2,0.2,-3.2,3.5,19.3750,18.9375,600.0,8000.0
9843,2020-01-19,2019,20,True,San Francisco 49ers,37.0,20.0,Green Bay Packers,SF,8.0,46.5,False,,,,,57,Over,Levi's Stadium,"Santa Clara, CA",2014,outdoor,"4900 Marie P. DeBartolo Way, Santa Clara, CA 9...",95054,moderate,68500,Grass,USW00023234,37.61970,-122.3647,2.4,95054,54304,2930.347417,6,0.250,-93.0,-5.8,0.3,-5.5,-2.5,-3.1,21.3750,27.1875,0.406,-24.0,-1.5,-1.2,-2.7,0.0,-2.7,23.5000,25.0000,4000.0,2000.0


# Get Differences of Team Stats

In [39]:
# Gets differnce is Team Data Statistics

final_df['diff_W-L%'] = abs(final_df['home_W-L%'] - final_df['away_W-L%'])
final_df['diff_PD'] = abs(final_df['home_PD']- final_df['away_PD'])
final_df['diff_MoV'] = abs(final_df['home_MoV']- final_df['away_MoV'])
final_df['diff_SoS'] = abs(final_df['home_SoS']- final_df['away_SoS'])
final_df['diff_SRS'] = abs(final_df['home_SRS']- final_df['away_SRS'])
final_df['diff_OSRS'] = abs(final_df['home_OSRS']- final_df['away_OSRS'])
final_df['diff_PF_per_game'] = abs(final_df['home_PF_per_game']- final_df['away_PF_per_game'])
final_df['diff_PA_per_game'] = abs(final_df['home_PA_per_game']- final_df['away_PA_per_game'])
final_df['diff_superbowl_odds'] = abs(final_df['home_superbowl_odds']- final_df['away_superbowl_odds'])

In [40]:
final_df

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail,total,over_under_result,stadium_name,stadium_location,stadium_open,stadium_type,stadium_address,stadium_weather_station_code,stadium_weather_type,stadium_capacity,stadium_surface,STATION,LATITUDE,LONGITUDE,ELEVATION,zipcode,zipcode_away,dist_diff,weekday,home_W-L%,home_PD,home_MoV,home_SoS,home_SRS,home_OSRS,home_DSRS,home_PF_per_game,home_PA_per_game,away_W-L%,away_PD,away_MoV,away_SoS,away_SRS,away_OSRS,away_DSRS,away_PF_per_game,away_PA_per_game,home_superbowl_odds,away_superbowl_odds,diff_W-L%,diff_PD,diff_MoV,diff_SoS,diff_SRS,diff_OSRS,diff_PF_per_game,diff_PA_per_game,diff_superbowl_odds
0,1979-09-01,1979,1,False,Tampa Bay Buccaneers,31.0,16.0,Detroit Lions,TB,3.0,30.0,False,79.0,9.0,87,,47,Over,Houlihan's Stadium,"Tampa, FL",1969,outdoor,"4201 North Dale Mabry Highway, Tampa, Florida ...",33607,warm,50000,Grass,USW00012842,27.96194,-82.5403,5.8,33607,48342,1633.703354,5,0.313,-18.0,-1.1,-2.2,-3.3,-4.1,0.8,15.0625,16.1875,0.438,-10.0,-0.6,-1.2,-1.8,-0.1,-1.7,18.1250,18.7500,25000.0,3500.0,0.125,8.0,0.5,1.0,1.5,4.0,3.0625,2.5625,21500.0
1,1979-09-02,1979,1,False,Buffalo Bills,7.0,9.0,Miami Dolphins,MIA,5.0,39.0,False,74.0,15.0,74,,16,Under,Ralph Wilson Stadium,"Orchard Park, NY",1973,outdoor,"1 Bills Dr, Orchard Park, NY 14127",14127,cold,73967,FieldTurf,US1NYER0093,42.88900,-78.8901,178.0,14127,33125,1886.643589,6,0.313,-52.0,-3.3,0.0,-3.3,-0.1,-3.2,18.8750,22.1250,0.688,118.0,7.4,0.2,7.6,4.2,3.3,23.2500,15.8750,30000.0,900.0,0.375,170.0,10.7,0.2,10.9,4.3,4.3750,6.2500,29100.0
2,1979-09-02,1979,1,False,Chicago Bears,6.0,3.0,Green Bay Packers,CHI,3.0,31.0,False,78.0,11.0,68,,9,Under,Soldier Field,"Chicago, IL",1926,outdoor,"1410 Museum Campus Dr, Chicago, IL 60605",60605,cold,61500,Grass,USC00111550,41.85580,-87.6094,177.7,60605,54304,293.605345,6,0.438,-21.0,-1.3,-1.1,-2.4,-2.7,0.3,15.8125,17.1250,0.531,-20.0,-1.3,0.7,-0.6,-2.2,1.6,15.5625,16.8125,4000.0,10000.0,0.093,1.0,0.0,1.8,1.8,0.5,0.2500,0.3125,6000.0
3,1979-09-02,1979,1,False,Denver Broncos,10.0,0.0,Cincinnati Bengals,DEN,3.0,31.5,False,69.0,6.0,38,,10,Under,Mile High Stadium,"Denver, CO",1960,outdoor,"1701 Bryant St, Denver, CO 80204",80204,cold,75000,Grass,USW00023062,39.76330,-104.8694,1611.2,80204,45202,1760.923990,6,0.625,84.0,5.3,-0.2,5.0,-1.9,6.9,17.6250,12.3750,0.250,-32.0,-2.0,0.7,-1.3,-2.7,1.3,15.7500,17.7500,1200.0,2500.0,0.375,116.0,7.3,0.9,6.3,0.8,1.8750,5.3750,1300.0
4,1979-09-02,1979,1,False,Kansas City Chiefs,14.0,0.0,Baltimore Colts,KC,1.0,37.0,False,76.0,8.0,71,,14,Under,Arrowhead Stadium,"Kansas City, MO",1972,outdoor,"1 Arrowhead Dr, Kansas City, MO 64129",64129,cold,76416,Grass,US1MOJC0028,39.06920,-94.4871,264.9,64129,21218,1540.452209,6,0.250,-84.0,-5.3,0.9,-4.4,-3.0,-1.4,15.1875,20.4375,0.313,-182.0,-11.4,2.6,-8.8,-2.8,-6.0,14.9375,26.3125,75000.0,3500.0,0.063,98.0,6.1,1.7,4.4,0.2,0.2500,5.8750,71500.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9840,2020-01-12,2019,19,True,Green Bay Packers,28.0,23.0,Seattle Seahawks,GB,4.5,45.5,False,,,,,51,Over,Lambeau Field,"Green Bay, WI",1957,outdoor,"1265 Lombardi Ave, Green Bay, WI 54304",54304,cold,80735,Grass,USW00014898,44.47940,-88.1366,209.4,54304,98104,2644.782031,6,0.406,-24.0,-1.5,-1.2,-2.7,0.0,-2.7,23.5000,25.0000,0.625,81.0,5.1,-0.6,4.5,3.0,1.5,26.7500,21.6875,2000.0,2000.0,0.219,105.0,6.6,0.6,7.2,3.0,3.2500,3.3125,0.0
9841,2020-01-12,2019,19,True,Kansas City Chiefs,51.0,31.0,Houston Texans,KC,10.0,50.5,False,,,,,82,Over,Arrowhead Stadium,"Kansas City, MO",1972,outdoor,"1 Arrowhead Dr, Kansas City, MO 64129",64129,cold,76416,Grass,US1MOJC0028,39.06920,-94.4871,264.9,64129,77054,1045.221204,6,0.750,144.0,9.0,-0.1,8.9,12.6,-3.8,35.3125,26.3125,0.688,86.0,5.4,-1.5,3.8,2.4,1.4,25.1250,19.7500,600.0,5000.0,0.062,58.0,3.6,1.4,5.1,10.2,10.1875,6.5625,4400.0
9842,2020-01-19,2019,20,True,Kansas City Chiefs,35.0,24.0,Tennessee Titans,KC,7.0,51.0,False,,,,,59,Over,Arrowhead Stadium,"Kansas City, MO",1972,outdoor,"1 Arrowhead Dr, Kansas City, MO 64129",64129,cold,76416,Grass,US1MOJC0028,39.06920,-94.4871,264.9,64129,37213,752.899963,6,0.750,144.0,9.0,-0.1,8.9,12.6,-3.8,35.3125,26.3125,0.563,7.0,0.4,-0.2,0.2,-3.2,3.5,19.3750,18.9375,600.0,8000.0,0.187,137.0,8.6,0.1,8.7,15.8,15.9375,7.3750,7400.0
9843,2020-01-19,2019,20,True,San Francisco 49ers,37.0,20.0,Green Bay Packers,SF,8.0,46.5,False,,,,,57,Over,Levi's Stadium,"Santa Clara, CA",2014,outdoor,"4900 Marie P. DeBartolo Way, Santa Clara, CA 9...",95054,moderate,68500,Grass,USW00023234,37.61970,-122.3647,2.4,95054,54304,2930.347417,6,0.250,-93.0,-5.8,0.3,-5.5,-2.5,-3.1,21.3750,27.1875,0.406,-24.0,-1.5,-1.2,-2.7,0.0,-2.7,23.5000,25.0000,4000.0,2000.0,0.156,69.0,4.3,1.5,2.8,2.5,2.1250,2.1875,2000.0


In [41]:
final_df.to_csv('data/final_df.csv', index = False, header=True)