In [3]:
import pandas as pd
from pandas.core.computation.check import NUMEXPR_INSTALLED
import numpy as np 
from bs4 import BeautifulSoup
import requests
from datetime import datetime
import re

In [4]:
final_data = []
year = 2022
weeks = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]

for week in weeks:
    url = f'https://www.espn.com/nfl/scoreboard/_/week/{week}/year/{year}/seasontype/2'
    soup = BeautifulSoup(requests.get(url).content, 'html.parser')

    week_data = []
    for board in soup.select('.ScoreboardScoreCell'):
        title = board.find_previous(class_='Card__Header__Title').text
        teams = [t.text for t in board.select('.ScoreCell__TeamName')]
        scores = [s.text for s in board.select('.ScoreCell__Score')] or ['-', '-']

        week_data.append((week, title, teams[0], scores[0], teams[1], scores[1]))

    final_data.extend(week_data)

df_scores_2022 = pd.DataFrame(final_data, columns=['Week', 'Date', 'Team 1', 'Score 1', 'Team 2', 'Score 2'])
df_scores_2022 = df_scores_2022.rename(columns={'Team 1': 'Away Team', 'Score 1': 'Away Score', 'Team 2': 'Home Team', 'Score 2':'Home Score'})
df_scores_2022['Final Score'] = df_scores_2022['Away Score'] + ' - ' + df_scores_2022['Home Score']
#print(df.to_markdown(index=False))
# Replace missing scores with NaN and dtype convert object to int 
df_scores_2022['Away Score'] = df_scores_2022['Away Score'].replace('-', np.nan)
df_scores_2022['Home Score'] = df_scores_2022['Home Score'].replace('-', np.nan)
df_scores_2022['Away Score'] = pd.to_numeric(df_scores_2022['Away Score'])
df_scores_2022['Home Score'] =  pd.to_numeric(df_scores_2022['Home Score'])

# Calculate total points
df_scores_2022['Total Points'] = df_scores_2022['Away Score'] + df_scores_2022['Home Score']

# Calculate winner
df_scores_2022['Winner'] = df_scores_2022.apply(lambda row: row['Away Team'] if row['Away Score'] > row['Home Score'] else row['Home Team'], axis=1)

# Calculate score differential
df_scores_2022['Away Score Differential'] = df_scores_2022['Home Score'] - df_scores_2022['Away Score']
df_scores_2022['Home Score Differential'] = df_scores_2022['Away Score'] - df_scores_2022['Home Score']
df_scores_2022['Matchup'] = df_scores_2022['Away Team'] + ' @ ' + df_scores_2022['Home Team']
df_scores_2022['Date'] = pd.to_datetime(df_scores_2022['Date'], format='%A, %B %d, %Y').dt.strftime('%Y-%m-%d')
# Use a lambda function to create the 'Home_Away_Winner' column
df_scores_2022['Home_Away_Winner'] = df_scores_2022.apply(lambda row: 'Home' 
                                                          if row['Winner'] == row['Home Team'] 
                                                          else 'Away' if row['Winner'] == row['Away Team'] else 'No Winner', axis=1)
df_scores_2022.tail()

Unnamed: 0,Week,Date,Away Team,Away Score,Home Team,Home Score,Final Score,Total Points,Winner,Away Score Differential,Home Score Differential,Matchup,Home_Away_Winner
267,18,2023-01-08,Giants,16.0,Eagles,22.0,16 - 22,38.0,Eagles,6.0,-6.0,Giants @ Eagles,Home
268,18,2023-01-08,Cardinals,13.0,49ers,38.0,13 - 38,51.0,49ers,25.0,-25.0,Cardinals @ 49ers,Home
269,18,2023-01-08,Rams,16.0,Seahawks,19.0,16 - 19,35.0,Seahawks,3.0,-3.0,Rams @ Seahawks,Home
270,18,2023-01-08,Cowboys,6.0,Commanders,26.0,6 - 26,32.0,Commanders,20.0,-20.0,Cowboys @ Commanders,Home
271,18,2023-01-08,Lions,20.0,Packers,16.0,20 - 16,36.0,Lions,-4.0,4.0,Lions @ Packers,Away


In [5]:
#import consensus data for spreads 
game_dates = list(df_scores_2022['Date'].unique())
uri_list = []
uri = "https://contests.covers.com/consensus/topconsensus/nfl/overall/"

for date in game_dates:
    uri_list.append(uri + date)

spreads = []
for item in uri_list:
    try:
        res = requests.get(item)
        soup = BeautifulSoup(res.content, 'lxml')
        table = soup.find_all('table')[0]
        temp_df = pd.read_html(str(table))[0]  # Obtain the first table from the list
        temp_df['Date'] = re.search(r'\d{4}-\d{2}-\d{2}', item).group()  # Add the corresponding date column

        spreads.append(temp_df)
    except IndexError:
        print(f"IndexError occurred for URL: {item}")
        continue
    except Exception as e:
        print(f"An error occurred for URL: {item}")
        print(f"Error details: {str(e)}")
        continue

df_spreads_2022 = pd.concat(spreads, ignore_index=True)

IndexError occurred for URL: https://contests.covers.com/consensus/topconsensus/nfl/overall/2022-11-17


In [6]:
#clean matchup column 
df_spreads_2022['Matchup'] = df_spreads_2022['Matchup'].str[4:].str.upper().str.replace("  ",' @ ')
# clean add home and away column 
df_spreads_2022[['Away Team', 'Home Team']] = df_spreads_2022['Matchup'].str.split(' @ ', expand=True)
# Remove leading/trailing whitespaces
df_spreads_2022['Away Team'] = df_spreads_2022['Away Team'].str.strip()
df_spreads_2022['Home Team'] = df_spreads_2022['Home Team'].str.strip()
#split to create away and home spread cols 
df_spreads_2022[['Away Spread', 'Home Spread']] = df_spreads_2022['Sides'].str.split(expand=True)
#split to create away and home consensus cols 
df_spreads_2022[['Away Consensus', 'Home Consensus']] = df_spreads_2022['Consensus'].str.split(expand=True)
# convert to dtype = float for consensus cols 
df_spreads_2022['Away Consensus'] =  df_spreads_2022['Away Consensus'].str.rstrip('%').astype(int)/100
df_spreads_2022['Home Consensus'] = df_spreads_2022['Home Consensus'].str.rstrip('%').astype(int)/100
#create consensus favorite vs. underdog cols 
df_spreads_2022['Con. Spread Favorite'] = df_spreads_2022.apply(lambda x: x['Away Team'] if x['Away Consensus'] > x['Home Consensus'] else x['Home Team'], axis=1)
df_spreads_2022['Con. Spread Underdog'] = df_spreads_2022.apply(lambda x: x['Away Team'] if x['Away Consensus'] < x['Home Consensus'] else x['Home Team'], axis=1)

# Define the dictionary mapping for replacements
replacement_dict = {'LV': 'Raiders',
                    'KC': 'Chiefs',
                    'NE': 'Patriots',
                    'GB': 'Packers',
                    'NO': 'Saints',
                    'BAL': 'Ravens',
                    'SF': '49ers',
                    'NYG': 'Giants',
                    'JAC': 'Jaguars',
                    'PIT': 'Steelers',
                    'TB': 'Buccaneers',
                    'CLE': 'Browns',
                    'IND': 'Colts',
                    'PHI': 'Eagles',
                    'ARI': 'Cardinals',
                    'WAS': 'Commanders',
                    'ATL': 'Falcons',
                    'NYJ': 'Jets',
                    'SEA': 'Seahawks',
                    'MIA': 'Dolphins',
                    'HOU': 'Texans',
                    'CAR': 'Panthers',
                    'CHI': 'Bears',
                    'CIN': 'Bengals',
                    'LAR': 'Rams',
                    'BUF': 'Bills',
                    'DET': 'Lions',
                    'DEN': 'Broncos',
                    'TEN': 'Titans',
                    'MIN': 'Vikings',
                    'LAC': 'Chargers',
                    'DAL': 'Cowboys'}
# Replace the values in the DataFrame column using map()
df_spreads_2022['Away Team'] = df_spreads_2022['Away Team'].map(replacement_dict)
df_spreads_2022['Home Team'] = df_spreads_2022['Home Team'].map(replacement_dict)
df_spreads_2022['Con. Spread Favorite'] = df_spreads_2022['Con. Spread Favorite'].map(replacement_dict)
df_spreads_2022['Con. Spread Underdog'] = df_spreads_2022['Con. Spread Underdog'].map(replacement_dict)
df_spreads_2022['Matchup'] = df_spreads_2022['Away Team'] + ' @ ' + df_spreads_2022['Home Team']
df_spreads_2022.tail()

Unnamed: 0,Matchup,Date,Consensus,Sides,Picks,Indepth,Away Team,Home Team,Away Spread,Home Spread,Away Consensus,Home Consensus,Con. Spread Favorite,Con. Spread Underdog
266,Patriots @ Bills,2023-01-08,45% 55%,+8.5 -8.5,615 764,Details,Patriots,Bills,8.5,-8.5,0.45,0.55,Bills,Patriots
267,Ravens @ Bengals,2023-01-08,45% 55%,+12.5 -12.5,632 777,Details,Ravens,Bengals,12.5,-12.5,0.45,0.55,Bengals,Ravens
268,Panthers @ Saints,2023-01-08,54% 46%,+3.5 -3.5,774 649,Details,Panthers,Saints,3.5,-3.5,0.54,0.46,Panthers,Saints
269,Rams @ Seahawks,2023-01-08,48% 52%,+4.5 -4.5,670 733,Details,Rams,Seahawks,4.5,-4.5,0.48,0.52,Seahawks,Rams
270,Lions @ Packers,2023-01-08,50% 50%,+3.5 -3.5,736 728,Details,Lions,Packers,3.5,-3.5,0.5,0.5,Packers,Packers


In [7]:
#import consensus data for spreads 
game_dates = list(df_scores_2022['Date'].unique())
uri_list = []
uri = "https://contests.covers.com/consensus/topoverunderconsensus/nfl/overall/"

for date in game_dates:
    uri_list.append(uri + date)

totals = []
for item in uri_list:
    try:
        res = requests.get(item)
        soup = BeautifulSoup(res.content, 'lxml')
        table = soup.find_all('table')[0]
        temp_df = pd.read_html(str(table))[0]  # Obtain the first table from the list
        temp_df['Date'] = re.search(r'\d{4}-\d{2}-\d{2}', item).group()  # Add the corresponding date column

        totals.append(temp_df)
    except IndexError:
        print(f"IndexError occurred for URL: {item}")
        continue
    except Exception as e:
        print(f"An error occurred for URL: {item}")
        print(f"Error details: {str(e)}")
        continue

df_totals_2022 = pd.concat(totals, ignore_index=True)
df_totals_2022.tail()

Unnamed: 0,Matchup,Date,Consensus,Total,Picks,Indepth
267,NFL Ari Sf,2023-01-08,55 % Over 45 % Under,39.5,513 428,Details
268,NFL Lac Den,2023-01-08,54 % Under 46 % Over,39.5,499 431,Details
269,NFL Bal Cin,2023-01-08,54 % Over 46 % Under,39.5,515 445,Details
270,NFL Nyj Mia,2023-01-08,54 % Under 46 % Over,37.0,533 461,Details
271,NFL Car No,2023-01-08,50 % Under 50 % Over,41.5,479 474,Details


In [8]:
#clean matchup column 
df_totals_2022['Matchup'] = df_totals_2022['Matchup'].str[4:].str.upper().str.replace("  ",' @ ')
# clean add home and away column 
df_totals_2022[['Away Team', 'Home Team']] = df_totals_2022['Matchup'].str.split(' @ ', expand=True)
# Remove leading/trailing whitespaces
df_totals_2022['Away Team'] = df_totals_2022['Away Team'].str.strip()
df_totals_2022['Home Team'] = df_totals_2022['Home Team'].str.strip()
# create consensus columns 
df_totals_2022['Over Consensus'] = df_totals_2022['Consensus'].str.extract(r'(\d+) % Over').astype(int)/100
df_totals_2022['Under Consensus'] = df_totals_2022['Consensus'].str.extract(r'(\d+) % Under').astype(int)/100
#create consensus favorites vs underdogs 
df_totals_2022['Con. Total Favorite'] = df_totals_2022[['Over Consensus', 'Under Consensus']].apply(lambda x: 'Over' 
                                                                                                    if x['Over Consensus'] > x['Under Consensus'] else 'Under', axis=1)
df_totals_2022['Con. Total Underdog'] = df_totals_2022[['Over Consensus', 'Under Consensus']].apply(lambda x: 'Over' 
                                                                                                    if x['Over Consensus'] < x['Under Consensus'] else 'Under', axis=1)
# Replace the values in the DataFrame column using map() from our previously defined replacement_dict
df_totals_2022['Away Team'] = df_totals_2022['Away Team'].map(replacement_dict)
df_totals_2022['Home Team'] = df_totals_2022['Home Team'].map(replacement_dict)
df_totals_2022['Matchup'] = df_totals_2022['Away Team'] + ' @ ' + df_totals_2022['Home Team']
df_totals_2022.tail()

Unnamed: 0,Matchup,Date,Consensus,Total,Picks,Indepth,Away Team,Home Team,Over Consensus,Under Consensus,Con. Total Favorite,Con. Total Underdog
267,Cardinals @ 49ers,2023-01-08,55 % Over 45 % Under,39.5,513 428,Details,Cardinals,49ers,0.55,0.45,Over,Under
268,Chargers @ Broncos,2023-01-08,54 % Under 46 % Over,39.5,499 431,Details,Chargers,Broncos,0.46,0.54,Under,Over
269,Ravens @ Bengals,2023-01-08,54 % Over 46 % Under,39.5,515 445,Details,Ravens,Bengals,0.54,0.46,Over,Under
270,Jets @ Dolphins,2023-01-08,54 % Under 46 % Over,37.0,533 461,Details,Jets,Dolphins,0.46,0.54,Under,Over
271,Panthers @ Saints,2023-01-08,50 % Under 50 % Over,41.5,479 474,Details,Panthers,Saints,0.5,0.5,Under,Under


In [9]:
print(df_scores_2022.columns)
print(df_spreads_2022.columns)
print(df_totals_2022.columns)

Index(['Week', 'Date', 'Away Team', 'Away Score', 'Home Team', 'Home Score',
       'Final Score', 'Total Points', 'Winner', 'Away Score Differential',
       'Home Score Differential', 'Matchup', 'Home_Away_Winner'],
      dtype='object')
Index(['Matchup', 'Date', 'Consensus', 'Sides', 'Picks', 'Indepth',
       'Away Team', 'Home Team', 'Away Spread', 'Home Spread',
       'Away Consensus', 'Home Consensus', 'Con. Spread Favorite',
       'Con. Spread Underdog'],
      dtype='object')
Index(['Matchup', 'Date', 'Consensus', 'Total', 'Picks', 'Indepth',
       'Away Team', 'Home Team', 'Over Consensus', 'Under Consensus',
       'Con. Total Favorite', 'Con. Total Underdog'],
      dtype='object')


In [10]:
# pd.merge(df1, df2, on=['Key_Column_1', 'Key_Column_2'], how='inner')
combined_df = pd.merge(df_scores_2022[['Week', 'Date', 'Matchup', 'Final Score', 'Total Points', 
                                       'Winner', 'Home_Away_Winner', 'Away Team', 'Away Score', 
                                       'Away Score Differential', 'Home Team', 'Home Score', 'Home Score Differential']], 
                       df_spreads_2022[['Matchup', 'Date', 'Away Team','Away Spread', 'Home Team', 
                                        'Home Spread', 'Away Consensus', 'Home Consensus', 
                                        'Con. Spread Favorite','Con. Spread Underdog']],
                       on=['Matchup', 'Date', 'Away Team', 'Home Team'], 
                       how='inner')

In [11]:
final_df_nfl_2022 = pd.merge(combined_df[['Week', 'Date', 'Matchup', 'Final Score', 'Total Points', 'Winner',
                                           'Home_Away_Winner', 'Away Team', 'Away Score',
                                           'Away Score Differential', 'Home Team', 'Home Score',
                                           'Home Score Differential', 'Away Spread', 'Home Spread',
                                           'Away Consensus', 'Home Consensus', 'Con. Spread Favorite',
                                           'Con. Spread Underdog']],
                            df_totals_2022[['Matchup', 'Date', 'Away Team', 'Home Team', 'Total',
                                            'Over Consensus', 'Under Consensus',
                                            'Con. Total Favorite', 'Con. Total Underdog']],
                             on=['Matchup', 'Date', 'Away Team', 'Home Team'], 
                             how='inner')

final_df_nfl_2022['Win Marginal'] = np.abs(final_df_nfl_2022['Away Score'] - final_df_nfl_2022['Home Score'])

In [12]:
# 1) did away vs home team cover? -- Done 
# Convert 'Away Spread' and 'Home Spread' columns to numeric values
final_df_nfl_2022['Away Spread'] = pd.to_numeric(final_df_nfl_2022['Away Spread'])
final_df_nfl_2022['Home Spread'] = pd.to_numeric(final_df_nfl_2022['Home Spread'])

final_df_nfl_2022['Spread Covering Team'] = final_df_nfl_2022.apply(lambda row: row['Home Team'] 
                                                              if row['Home Score'] + row['Home Spread'] >= row['Away Score'] 
                                                              else row['Away Team'], axis=1)
final_df_nfl_2022[['Matchup', 'Final Score','Home Team', 'Home Spread', 'Home Score', 'Home Spread','Away Team', 'Away Spread', 'Spread Covering Team']].head()

Unnamed: 0,Matchup,Final Score,Home Team,Home Spread,Home Score,Home Spread.1,Away Team,Away Spread,Spread Covering Team
0,Bills @ Rams,31 - 10,Rams,1.0,10.0,1.0,Bills,-1.0,Bills
1,Saints @ Falcons,27 - 26,Falcons,5.0,26.0,5.0,Saints,-5.0,Falcons
2,49ers @ Bears,10 - 19,Bears,6.0,19.0,6.0,49ers,-6.0,Bears
3,Steelers @ Bengals,23 - 20,Bengals,-6.5,20.0,-6.5,Steelers,6.5,Steelers
4,Eagles @ Lions,38 - 35,Lions,4.0,35.0,4.0,Eagles,-4.0,Lions


In [13]:
# 2) Did over/under hit? -- Done

# Create "Totals Covered" column
final_df_nfl_2022['Totals Covering Line'] = final_df_nfl_2022.apply(lambda row: 'Over' 
                                          if row['Total Points'] > row['Total'] 
                                          else ('Under' if row['Total Points'] < row['Total'] else 'Push'), axis=1)

# Print the updated DataFrame
final_df_nfl_2022[['Matchup', 'Total Points', 'Total', 'Totals Covering Line']].head()

Unnamed: 0,Matchup,Total Points,Total,Totals Covering Line
0,Bills @ Rams,41.0,52.0,Under
1,Saints @ Falcons,53.0,43.5,Over
2,49ers @ Bears,29.0,38.0,Under
3,Steelers @ Bengals,43.0,44.5,Under
4,Eagles @ Lions,73.0,48.5,Over


In [14]:
# 3) Did consensus fav vs underdog win? [Spread] -- Done

# Create "Consensus Spread Winner" column
final_df_nfl_2022['Consensus Spread Winner'] = final_df_nfl_2022.apply(lambda row: 'Majority' 
                                            if row['Spread Covering Team'] in [row['Away Team'], row['Home Team']] and
                                                          ((row['Spread Covering Team'] == row['Away Team'] and row['Away Consensus'] > 0.5) or
                                                           (row['Spread Covering Team'] == row['Home Team'] and row['Home Consensus'] > 0.5))
                                            else 'Minority' if row['Spread Covering Team'] in [row['Away Team'], row['Home Team']] and
                                                          ((row['Spread Covering Team'] == row['Away Team'] and row['Away Consensus'] < 0.5) or
                                                           (row['Spread Covering Team'] == row['Home Team'] and row['Home Consensus'] < 0.5))
                                            else 'Push' if row['Spread Covering Team'] in [row['Away Team'], row['Home Team']] and
                                                          ((row['Spread Covering Team'] == row['Away Team'] and row['Away Consensus'] == 0.5) or
                                                           (row['Spread Covering Team'] == row['Home Team'] and row['Home Consensus'] == 0.5))
                                            else '', axis=1)

# Print the updated DataFrame
final_df_nfl_2022[['Away Team', 'Home Team', 'Away Consensus', 'Home Consensus', 'Consensus Spread Winner']].head()

Unnamed: 0,Away Team,Home Team,Away Consensus,Home Consensus,Consensus Spread Winner
0,Bills,Rams,0.55,0.45,Majority
1,Saints,Falcons,0.59,0.41,Minority
2,49ers,Bears,0.54,0.46,Minority
3,Steelers,Bengals,0.48,0.52,Minority
4,Eagles,Lions,0.59,0.41,Minority


In [15]:
# 4) Did consensus fav vs underdog win? [Totals] -- Done
# Create "Total Consensus Winner" column
final_df_nfl_2022['Total Consensus Winner'] = final_df_nfl_2022.apply(lambda row: 'Majority' 
                                                  if ((row['Totals Covering Line'] == 'Under' and row['Under Consensus'] > 0.5) or
                                                                   (row['Totals Covering Line'] == 'Over' and row['Over Consensus'] > 0.5))
                                                  else 'Minority' if ((row['Totals Covering Line'] == 'Under' and row['Under Consensus'] < 0.5) or
                                                                   (row['Totals Covering Line'] == 'Over' and row['Over Consensus'] < 0.5))
                                                  else 'Push' if (row['Under Consensus'] == 0.5) or (row['Over Consensus'] == 0.5)
                                                  else '', axis=1)

# Print the updated DataFrame
#final_df_nfl_2022[['Matchup', 'Total Points', 'Total', 'Totals Covering Line', 'Over Consensus', 'Away Consensus', 'Total Consensus Winner']]

In [16]:
final_df_nfl_2022.columns

Index(['Week', 'Date', 'Matchup', 'Final Score', 'Total Points', 'Winner',
       'Home_Away_Winner', 'Away Team', 'Away Score',
       'Away Score Differential', 'Home Team', 'Home Score',
       'Home Score Differential', 'Away Spread', 'Home Spread',
       'Away Consensus', 'Home Consensus', 'Con. Spread Favorite',
       'Con. Spread Underdog', 'Total', 'Over Consensus', 'Under Consensus',
       'Con. Total Favorite', 'Con. Total Underdog', 'Win Marginal',
       'Spread Covering Team', 'Totals Covering Line',
       'Consensus Spread Winner', 'Total Consensus Winner'],
      dtype='object')

In [19]:
final_df_nfl_2022.dtypes

Week                         int64
Date                        object
Matchup                     object
Final Score                 object
Total Points               float64
Winner                      object
Home_Away_Winner            object
Away Team                   object
Away Score                 float64
Away Score Differential    float64
Home Team                   object
Home Score                 float64
Home Score Differential    float64
Away Spread                float64
Home Spread                float64
Away Consensus             float64
Home Consensus             float64
Con. Spread Favorite        object
Con. Spread Underdog        object
Total                      float64
Over Consensus             float64
Under Consensus            float64
Con. Total Favorite         object
Con. Total Underdog         object
Win Marginal               float64
Spread Covering Team        object
Totals Covering Line        object
Consensus Spread Winner     object
Total Consensus Winn

In [18]:
final_df_nfl_2022.to_csv('final_df_nfl_2022.csv')