In [3]:
import pandas as pd
from pandas.core.computation.check import NUMEXPR_INSTALLED
import numpy as np 
from bs4 import BeautifulSoup
import requests
from datetime import datetime
import re

In [4]:
final_data = []
year = 2019
weeks = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]

for week in weeks:
    url = f'https://www.espn.com/nfl/scoreboard/_/week/{week}/year/{year}/seasontype/2'
    soup = BeautifulSoup(requests.get(url).content, 'html.parser')

    week_data = []
    for board in soup.select('.ScoreboardScoreCell'):
        title = board.find_previous(class_='Card__Header__Title').text
        teams = [t.text for t in board.select('.ScoreCell__TeamName')]
        scores = [s.text for s in board.select('.ScoreCell__Score')] or ['-', '-']

        week_data.append((week, title, teams[0], scores[0], teams[1], scores[1]))

    final_data.extend(week_data)

df_scores_2019 = pd.DataFrame(final_data, columns=['Week', 'Date', 'Team 1', 'Score 1', 'Team 2', 'Score 2'])
df_scores_2019 = df_scores_2019.rename(columns={'Team 1': 'Away Team', 'Score 1': 'Away Score', 'Team 2': 'Home Team', 'Score 2':'Home Score'})
df_scores_2019['Final Score'] = df_scores_2019['Away Score'] + ' - ' + df_scores_2019['Home Score']
#print(df.to_markdown(index=False))
# Replace missing scores with NaN and dtype convert object to int 
df_scores_2019['Away Score'] = df_scores_2019['Away Score'].replace('-', np.nan)
df_scores_2019['Home Score'] = df_scores_2019['Home Score'].replace('-', np.nan)
df_scores_2019['Away Score'] = pd.to_numeric(df_scores_2019['Away Score'])
df_scores_2019['Home Score'] =  pd.to_numeric(df_scores_2019['Home Score'])

# Calculate total points
df_scores_2019['Total Points'] = df_scores_2019['Away Score'] + df_scores_2019['Home Score']

# Calculate winner
df_scores_2019['Winner'] = df_scores_2019.apply(lambda row: row['Away Team'] if row['Away Score'] > row['Home Score'] else row['Home Team'], axis=1)

# Calculate score differential
df_scores_2019['Away Score Differential'] = df_scores_2019['Home Score'] - df_scores_2019['Away Score']
df_scores_2019['Home Score Differential'] = df_scores_2019['Away Score'] - df_scores_2019['Home Score']
df_scores_2019['Matchup'] = df_scores_2019['Away Team'] + ' @ ' + df_scores_2019['Home Team']
df_scores_2019['Date'] = pd.to_datetime(df_scores_2019['Date'], format='%A, %B %d, %Y').dt.strftime('%Y-%m-%d')
# Use a lambda function to create the 'Home_Away_Winner' column
df_scores_2019['Home_Away_Winner'] = df_scores_2019.apply(lambda row: 'Home' 
                                                          if row['Winner'] == row['Home Team'] 
                                                          else 'Away' if row['Winner'] == row['Away Team'] else 'No Winner', axis=1)
df_scores_2019.tail()

Unnamed: 0,Week,Date,Away Team,Away Score,Home Team,Home Score,Final Score,Total Points,Winner,Away Score Differential,Home Score Differential,Matchup,Home_Away_Winner
251,17,2019-12-29,Eagles,34,Giants,17,34 - 17,51,Eagles,-17,17,Eagles @ Giants,Away
252,17,2019-12-29,Colts,20,Jaguars,38,20 - 38,58,Jaguars,18,-18,Colts @ Jaguars,Home
253,17,2019-12-29,Steelers,10,Ravens,28,10 - 28,38,Ravens,18,-18,Steelers @ Ravens,Home
254,17,2019-12-29,Titans,35,Texans,14,35 - 14,49,Titans,-21,21,Titans @ Texans,Away
255,17,2019-12-29,49ers,26,Seahawks,21,26 - 21,47,49ers,-5,5,49ers @ Seahawks,Away


In [5]:
#import consensus data for spreads 
game_dates = list(df_scores_2019['Date'].unique())
uri_list = []
uri = "https://contests.covers.com/consensus/topconsensus/nfl/overall/"

for date in game_dates:
    uri_list.append(uri + date)

spreads = []
for item in uri_list:
    try:
        res = requests.get(item)
        soup = BeautifulSoup(res.content, 'lxml')
        table = soup.find_all('table')[0]
        temp_df = pd.read_html(str(table))[0]  # Obtain the first table from the list
        temp_df['Date'] = re.search(r'\d{4}-\d{2}-\d{2}', item).group()  # Add the corresponding date column

        spreads.append(temp_df)
    except IndexError:
        print(f"IndexError occurred for URL: {item}")
        continue
    except Exception as e:
        print(f"An error occurred for URL: {item}")
        print(f"Error details: {str(e)}")
        continue

df_spreads_2019 = pd.concat(spreads, ignore_index=True)

In [6]:
#clean matchup column 
df_spreads_2019['Matchup'] = df_spreads_2019['Matchup'].str[4:].str.upper().str.replace("  ",' @ ')
# clean add home and away column 
df_spreads_2019[['Away Team', 'Home Team']] = df_spreads_2019['Matchup'].str.split(' @ ', expand=True)
# Remove leading/trailing whitespaces
df_spreads_2019['Away Team'] = df_spreads_2019['Away Team'].str.strip()
df_spreads_2019['Home Team'] = df_spreads_2019['Home Team'].str.strip()
#split to create away and home spread cols 
df_spreads_2019[['Away Spread', 'Home Spread']] = df_spreads_2019['Sides'].str.split(expand=True)
#split to create away and home consensus cols 
df_spreads_2019[['Away Consensus', 'Home Consensus']] = df_spreads_2019['Consensus'].str.split(expand=True)
# convert to dtype = float for consensus cols 
df_spreads_2019['Away Consensus'] =  df_spreads_2019['Away Consensus'].str.rstrip('%').astype(int)/100
df_spreads_2019['Home Consensus'] = df_spreads_2019['Home Consensus'].str.rstrip('%').astype(int)/100
#create consensus favorite vs. underdog cols 
df_spreads_2019['Con. Spread Favorite'] = df_spreads_2019.apply(lambda x: x['Away Team'] if x['Away Consensus'] > x['Home Consensus'] else x['Home Team'], axis=1)
df_spreads_2019['Con. Spread Underdog'] = df_spreads_2019.apply(lambda x: x['Away Team'] if x['Away Consensus'] < x['Home Consensus'] else x['Home Team'], axis=1)

# Define the dictionary mapping for replacements
replacement_dict = {'LV': 'Raiders',
                    'KC': 'Chiefs',
                    'NE': 'Patriots',
                    'GB': 'Packers',
                    'NO': 'Saints',
                    'BAL': 'Ravens',
                    'SF': '49ers',
                    'NYG': 'Giants',
                    'JAC': 'Jaguars',
                    'PIT': 'Steelers',
                    'TB': 'Buccaneers',
                    'CLE': 'Browns',
                    'IND': 'Colts',
                    'PHI': 'Eagles',
                    'ARI': 'Cardinals',
                    'WAS': 'Commanders',
                    'ATL': 'Falcons',
                    'NYJ': 'Jets',
                    'SEA': 'Seahawks',
                    'MIA': 'Dolphins',
                    'HOU': 'Texans',
                    'CAR': 'Panthers',
                    'CHI': 'Bears',
                    'CIN': 'Bengals',
                    'LAR': 'Rams',
                    'BUF': 'Bills',
                    'DET': 'Lions',
                    'DEN': 'Broncos',
                    'TEN': 'Titans',
                    'MIN': 'Vikings',
                    'LAC': 'Chargers',
                    'DAL': 'Cowboys'}
# Replace the values in the DataFrame column using map()
df_spreads_2019['Away Team'] = df_spreads_2019['Away Team'].map(replacement_dict)
df_spreads_2019['Home Team'] = df_spreads_2019['Home Team'].map(replacement_dict)
df_spreads_2019['Con. Spread Favorite'] = df_spreads_2019['Con. Spread Favorite'].map(replacement_dict)
df_spreads_2019['Con. Spread Underdog'] = df_spreads_2019['Con. Spread Underdog'].map(replacement_dict)
df_spreads_2019['Matchup'] = df_spreads_2019['Away Team'] + ' @ ' + df_spreads_2019['Home Team']
df_spreads_2019.tail()

Unnamed: 0,Matchup,Date,Consensus,Sides,Picks,Indepth,Away Team,Home Team,Away Spread,Home Spread,Away Consensus,Home Consensus,Con. Spread Favorite,Con. Spread Underdog
251,Packers @ Lions,2019-12-29,45% 55%,0 0,700 869,Details,Packers,Lions,0.0,0.0,0.45,0.55,Lions,Packers
252,49ers @ Seahawks,2019-12-29,46% 54%,0 0,823 975,Details,49ers,Seahawks,0.0,0.0,0.46,0.54,Seahawks,49ers
253,Falcons @ Buccaneers,2019-12-29,53% 47%,+5.5 -5.5,880 769,Details,Falcons,Buccaneers,5.5,-5.5,0.53,0.47,Falcons,Buccaneers
254,Raiders @ Broncos,2019-12-29,53% 47%,+9.5 -9.5,901 790,Details,Raiders,Broncos,9.5,-9.5,0.53,0.47,Raiders,Broncos
255,Eagles @ Giants,2019-12-29,50% 50%,-16.5 +16.5,838 837,Details,Eagles,Giants,-16.5,16.5,0.5,0.5,Giants,Giants


In [7]:
#import consensus data for spreads 
game_dates = list(df_scores_2019['Date'].unique())
uri_list = []
uri = "https://contests.covers.com/consensus/topoverunderconsensus/nfl/overall/"

for date in game_dates:
    uri_list.append(uri + date)

totals = []
for item in uri_list:
    try:
        res = requests.get(item)
        soup = BeautifulSoup(res.content, 'lxml')
        table = soup.find_all('table')[0]
        temp_df = pd.read_html(str(table))[0]  # Obtain the first table from the list
        temp_df['Date'] = re.search(r'\d{4}-\d{2}-\d{2}', item).group()  # Add the corresponding date column

        totals.append(temp_df)
    except IndexError:
        print(f"IndexError occurred for URL: {item}")
        continue
    except Exception as e:
        print(f"An error occurred for URL: {item}")
        print(f"Error details: {str(e)}")
        continue

df_totals_2019 = pd.concat(totals, ignore_index=True)
df_totals_2019.tail()

Unnamed: 0,Matchup,Date,Consensus,Total,Picks,Indepth
251,NFL Nyj Buf,2019-12-29,53 % Under 47 % Over,37.5,579 515,Details
252,NFL Ten Hou,2019-12-29,53 % Over 47 % Under,44.5,564 509,Details
253,NFL Chi Min,2019-12-29,52 % Under 48 % Over,36.5,562 514,Details
254,NFL Mia Ne,2019-12-29,52 % Over 48 % Under,45.0,567 524,Details
255,NFL Ind Jac,2019-12-29,51 % Over 49 % Under,42.0,571 539,Details


In [8]:
#clean matchup column 
df_totals_2019['Matchup'] = df_totals_2019['Matchup'].str[4:].str.upper().str.replace("  ",' @ ')
# clean add home and away column 
df_totals_2019[['Away Team', 'Home Team']] = df_totals_2019['Matchup'].str.split(' @ ', expand=True)
# Remove leading/trailing whitespaces
df_totals_2019['Away Team'] = df_totals_2019['Away Team'].str.strip()
df_totals_2019['Home Team'] = df_totals_2019['Home Team'].str.strip()
# create consensus columns 
df_totals_2019['Over Consensus'] = df_totals_2019['Consensus'].str.extract(r'(\d+) % Over').astype(int)/100
df_totals_2019['Under Consensus'] = df_totals_2019['Consensus'].str.extract(r'(\d+) % Under').astype(int)/100
#create consensus favorites vs underdogs 
df_totals_2019['Con. Total Favorite'] = df_totals_2019[['Over Consensus', 'Under Consensus']].apply(lambda x: 'Over' 
                                                                                                    if x['Over Consensus'] > x['Under Consensus'] else 'Under', axis=1)
df_totals_2019['Con. Total Underdog'] = df_totals_2019[['Over Consensus', 'Under Consensus']].apply(lambda x: 'Over' 
                                                                                                    if x['Over Consensus'] < x['Under Consensus'] else 'Under', axis=1)
# Replace the values in the DataFrame column using map() from our previously defined replacement_dict
df_totals_2019['Away Team'] = df_totals_2019['Away Team'].map(replacement_dict)
df_totals_2019['Home Team'] = df_totals_2019['Home Team'].map(replacement_dict)
df_totals_2019['Matchup'] = df_totals_2019['Away Team'] + ' @ ' + df_totals_2019['Home Team']
df_totals_2019.tail()

Unnamed: 0,Matchup,Date,Consensus,Total,Picks,Indepth,Away Team,Home Team,Over Consensus,Under Consensus,Con. Total Favorite,Con. Total Underdog
251,Jets @ Bills,2019-12-29,53 % Under 47 % Over,37.5,579 515,Details,Jets,Bills,0.47,0.53,Under,Over
252,Titans @ Texans,2019-12-29,53 % Over 47 % Under,44.5,564 509,Details,Titans,Texans,0.53,0.47,Over,Under
253,Bears @ Vikings,2019-12-29,52 % Under 48 % Over,36.5,562 514,Details,Bears,Vikings,0.48,0.52,Under,Over
254,Dolphins @ Patriots,2019-12-29,52 % Over 48 % Under,45.0,567 524,Details,Dolphins,Patriots,0.52,0.48,Over,Under
255,Colts @ Jaguars,2019-12-29,51 % Over 49 % Under,42.0,571 539,Details,Colts,Jaguars,0.51,0.49,Over,Under


In [9]:
print(df_scores_2019.columns)
print(df_spreads_2019.columns)
print(df_totals_2019.columns)

Index(['Week', 'Date', 'Away Team', 'Away Score', 'Home Team', 'Home Score',
       'Final Score', 'Total Points', 'Winner', 'Away Score Differential',
       'Home Score Differential', 'Matchup', 'Home_Away_Winner'],
      dtype='object')
Index(['Matchup', 'Date', 'Consensus', 'Sides', 'Picks', 'Indepth',
       'Away Team', 'Home Team', 'Away Spread', 'Home Spread',
       'Away Consensus', 'Home Consensus', 'Con. Spread Favorite',
       'Con. Spread Underdog'],
      dtype='object')
Index(['Matchup', 'Date', 'Consensus', 'Total', 'Picks', 'Indepth',
       'Away Team', 'Home Team', 'Over Consensus', 'Under Consensus',
       'Con. Total Favorite', 'Con. Total Underdog'],
      dtype='object')


In [10]:
# pd.merge(df1, df2, on=['Key_Column_1', 'Key_Column_2'], how='inner')
combined_df = pd.merge(df_scores_2019[['Week', 'Date', 'Matchup', 'Final Score', 'Total Points', 
                                       'Winner', 'Home_Away_Winner', 'Away Team', 'Away Score', 
                                       'Away Score Differential', 'Home Team', 'Home Score', 'Home Score Differential']], 
                       df_spreads_2019[['Matchup', 'Date', 'Away Team','Away Spread', 'Home Team', 
                                        'Home Spread', 'Away Consensus', 'Home Consensus', 
                                        'Con. Spread Favorite','Con. Spread Underdog']],
                       on=['Matchup', 'Date', 'Away Team', 'Home Team'], 
                       how='inner')

In [11]:
final_df_nfl_2019 = pd.merge(combined_df[['Week', 'Date', 'Matchup', 'Final Score', 'Total Points', 'Winner',
                                           'Home_Away_Winner', 'Away Team', 'Away Score',
                                           'Away Score Differential', 'Home Team', 'Home Score',
                                           'Home Score Differential', 'Away Spread', 'Home Spread',
                                           'Away Consensus', 'Home Consensus', 'Con. Spread Favorite',
                                           'Con. Spread Underdog']],
                            df_totals_2019[['Matchup', 'Date', 'Away Team', 'Home Team', 'Total',
                                            'Over Consensus', 'Under Consensus',
                                            'Con. Total Favorite', 'Con. Total Underdog']],
                             on=['Matchup', 'Date', 'Away Team', 'Home Team'], 
                             how='inner')

final_df_nfl_2019['Win Marginal'] = np.abs(final_df_nfl_2019['Away Score'] - final_df_nfl_2019['Home Score'])

In [12]:
# 1) did away vs home team cover? -- Done 
# Convert 'Away Spread' and 'Home Spread' columns to numeric values
final_df_nfl_2019['Away Spread'] = pd.to_numeric(final_df_nfl_2019['Away Spread'])
final_df_nfl_2019['Home Spread'] = pd.to_numeric(final_df_nfl_2019['Home Spread'])

final_df_nfl_2019['Spread Covering Team'] = final_df_nfl_2019.apply(lambda row: row['Home Team'] 
                                                              if row['Home Score'] + row['Home Spread'] >= row['Away Score'] 
                                                              else row['Away Team'], axis=1)
final_df_nfl_2019[['Matchup', 'Final Score','Home Team', 'Home Spread', 'Home Score', 'Home Spread','Away Team', 'Away Spread', 'Spread Covering Team']].head()

Unnamed: 0,Matchup,Final Score,Home Team,Home Spread,Home Score,Home Spread.1,Away Team,Away Spread,Spread Covering Team
0,Packers @ Bears,10 - 3,Bears,-3.0,3,-3.0,Packers,3.0,Packers
1,Titans @ Browns,43 - 13,Browns,-5.5,13,-5.5,Titans,5.5,Titans
2,Ravens @ Dolphins,59 - 10,Dolphins,7.0,10,7.0,Ravens,-7.0,Ravens
3,Falcons @ Vikings,12 - 28,Vikings,-3.5,28,-3.5,Falcons,3.5,Vikings
4,Bills @ Jets,17 - 16,Jets,-2.5,16,-2.5,Bills,2.5,Bills


In [13]:
# 2) Did over/under hit? -- Done

# Create "Totals Covered" column
final_df_nfl_2019['Totals Covering Line'] = final_df_nfl_2019.apply(lambda row: 'Over' 
                                          if row['Total Points'] > row['Total'] 
                                          else ('Under' if row['Total Points'] < row['Total'] else 'Push'), axis=1)

# Print the updated DataFrame
final_df_nfl_2019[['Matchup', 'Total Points', 'Total', 'Totals Covering Line']].head()

Unnamed: 0,Matchup,Total Points,Total,Totals Covering Line
0,Packers @ Bears,13,46.5,Under
1,Titans @ Browns,56,44.0,Over
2,Ravens @ Dolphins,69,40.0,Over
3,Falcons @ Vikings,40,46.5,Under
4,Bills @ Jets,33,39.0,Under


In [14]:
# 3) Did consensus fav vs underdog win? [Spread] -- Done

# Create "Consensus Spread Winner" column
final_df_nfl_2019['Consensus Spread Winner'] = final_df_nfl_2019.apply(lambda row: 'Majority' 
                                            if row['Spread Covering Team'] in [row['Away Team'], row['Home Team']] and
                                                          ((row['Spread Covering Team'] == row['Away Team'] and row['Away Consensus'] > 0.5) or
                                                           (row['Spread Covering Team'] == row['Home Team'] and row['Home Consensus'] > 0.5))
                                            else 'Minority' if row['Spread Covering Team'] in [row['Away Team'], row['Home Team']] and
                                                          ((row['Spread Covering Team'] == row['Away Team'] and row['Away Consensus'] < 0.5) or
                                                           (row['Spread Covering Team'] == row['Home Team'] and row['Home Consensus'] < 0.5))
                                            else 'Push' if row['Spread Covering Team'] in [row['Away Team'], row['Home Team']] and
                                                          ((row['Spread Covering Team'] == row['Away Team'] and row['Away Consensus'] == 0.5) or
                                                           (row['Spread Covering Team'] == row['Home Team'] and row['Home Consensus'] == 0.5))
                                            else '', axis=1)

# Print the updated DataFrame
final_df_nfl_2019[['Away Team', 'Home Team', 'Away Consensus', 'Home Consensus', 'Consensus Spread Winner']].head()

Unnamed: 0,Away Team,Home Team,Away Consensus,Home Consensus,Consensus Spread Winner
0,Packers,Bears,0.56,0.44,Majority
1,Titans,Browns,0.44,0.56,Minority
2,Ravens,Dolphins,0.67,0.33,Majority
3,Falcons,Vikings,0.53,0.47,Minority
4,Bills,Jets,0.46,0.54,Minority


In [15]:
# 4) Did consensus fav vs underdog win? [Totals] -- Done
# Create "Total Consensus Winner" column
final_df_nfl_2019['Total Consensus Winner'] = final_df_nfl_2019.apply(lambda row: 'Majority' 
                                                  if ((row['Totals Covering Line'] == 'Under' and row['Under Consensus'] > 0.5) or
                                                                   (row['Totals Covering Line'] == 'Over' and row['Over Consensus'] > 0.5))
                                                  else 'Minority' if ((row['Totals Covering Line'] == 'Under' and row['Under Consensus'] < 0.5) or
                                                                   (row['Totals Covering Line'] == 'Over' and row['Over Consensus'] < 0.5))
                                                  else 'Push' if (row['Under Consensus'] == 0.5) or (row['Over Consensus'] == 0.5)
                                                  else '', axis=1)

# Print the updated DataFrame
#final_df_nfl_2019[['Matchup', 'Total Points', 'Total', 'Totals Covering Line', 'Over Consensus', 'Away Consensus', 'Total Consensus Winner']]

In [16]:
final_df_nfl_2019.columns

Index(['Week', 'Date', 'Matchup', 'Final Score', 'Total Points', 'Winner',
       'Home_Away_Winner', 'Away Team', 'Away Score',
       'Away Score Differential', 'Home Team', 'Home Score',
       'Home Score Differential', 'Away Spread', 'Home Spread',
       'Away Consensus', 'Home Consensus', 'Con. Spread Favorite',
       'Con. Spread Underdog', 'Total', 'Over Consensus', 'Under Consensus',
       'Con. Total Favorite', 'Con. Total Underdog', 'Win Marginal',
       'Spread Covering Team', 'Totals Covering Line',
       'Consensus Spread Winner', 'Total Consensus Winner'],
      dtype='object')

In [17]:
final_df_nfl_2019

Unnamed: 0,Week,Date,Matchup,Final Score,Total Points,Winner,Home_Away_Winner,Away Team,Away Score,Away Score Differential,...,Total,Over Consensus,Under Consensus,Con. Total Favorite,Con. Total Underdog,Win Marginal,Spread Covering Team,Totals Covering Line,Consensus Spread Winner,Total Consensus Winner
0,1,2019-09-05,Packers @ Bears,10 - 3,13,Packers,Away,Packers,10,-7,...,46.5,0.41,0.59,Under,Over,7,Packers,Under,Majority,Majority
1,1,2019-09-08,Titans @ Browns,43 - 13,56,Titans,Away,Titans,43,-30,...,44.0,0.45,0.55,Under,Over,30,Titans,Over,Minority,Minority
2,1,2019-09-08,Ravens @ Dolphins,59 - 10,69,Ravens,Away,Ravens,59,-49,...,40.0,0.46,0.54,Under,Over,49,Ravens,Over,Majority,Minority
3,1,2019-09-08,Falcons @ Vikings,12 - 28,40,Vikings,Home,Falcons,12,16,...,46.5,0.53,0.47,Over,Under,16,Vikings,Under,Minority,Minority
4,1,2019-09-08,Bills @ Jets,17 - 16,33,Bills,Away,Bills,17,-1,...,39.0,0.55,0.45,Over,Under,1,Bills,Under,Minority,Minority
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,17,2019-12-29,Eagles @ Giants,34 - 17,51,Eagles,Away,Eagles,34,-17,...,45.0,0.64,0.36,Over,Under,17,Eagles,Over,Push,Majority
236,17,2019-12-29,Colts @ Jaguars,20 - 38,58,Jaguars,Home,Colts,20,18,...,42.0,0.51,0.49,Over,Under,18,Jaguars,Over,Minority,Majority
237,17,2019-12-29,Steelers @ Ravens,10 - 28,38,Ravens,Home,Steelers,10,18,...,36.5,0.41,0.59,Under,Over,18,Ravens,Over,Majority,Minority
238,17,2019-12-29,Titans @ Texans,35 - 14,49,Titans,Away,Titans,35,-21,...,44.5,0.53,0.47,Over,Under,21,Titans,Over,Minority,Majority


In [18]:
final_df_nfl_2019.to_csv('final_df_nfl_2019.csv')