In [163]:
# Libraries
from bs4 import BeautifulSoup, Comment
import numpy as np
import pandas as pd
import urllib.request
import requests
import random
import datetime
import math
import gspread
from df2gspread import df2gspread as d2g

## Helper Functions

In [164]:
# gets weekday of game give html div
def get_day(wk_day_span):
    wk_day_txt = wk_day_span.text.strip()
    date_components = wk_day_txt.split(' ')
    return(date_components[0])

In [165]:
# gets opposite of home line formatted correctly
def get_away_line(home_line):
    home_line = int(home_line) if float(home_line).is_integer() else float(home_line)
    if home_line < 0:
        return("{0:+}".format(home_line * -1))
    elif home_line > 0:
        return("{0:-}".format(home_line * -1))
    return('0')

In [166]:
# unused in this iteration of code
# function for returning the week of the NFL season
# :param dt: datetime object (can change input type if necessary)
# :return: tuple of (week number, year)
def get_week_season(dt):
    day_of_year = dt.timetuple().tm_yday
    # 252 was the day of year the season started
    # handling final weeks of season in 2022
    if day_of_year < 251:
        day_of_year += 365
    week_num = math.ceil((day_of_year - 251) / 7)
    return((dt.year, week_num))

## Scraping Test Code

In [150]:
link = 'https://www.cbssports.com/nfl/scoreboard/all/2021/regular/' + str(4) + '/'
with urllib.request.urlopen(link) as url:
    page = url.read()
soup = BeautifulSoup(page, "html.parser")
# list of game info boxes for all games
score_boxes = soup.find_all('div', {'class':'live-update'})
score_box = score_boxes[0]
odds = score_box.find_all('td', {'class': 'in-progress-odds'})
over_under = odds[0].text.strip()[1:]
home_line = str(odds[1].text.strip())
away_line = float(home_line) * -1
teams = score_box.find_all('td', {'class': 'team'})
wk_day_span = score_box.find('span', {'class': 'game-status'})
#print(wk_day_span)
wk_day = get_day(wk_day_span)
#print(teams)
away_team = teams[0]
home_team = teams[1]
away_team_name = away_team.find_all('a')[1].text
away_team_abbr = away_team.find('a')['href'].split('/')[5]
home_team_name = home_team.find_all('a')[1].text
home_team_abbr = home_team.find('a')['href'].split('/')[5]
game_info_array = [home_team_name, home_team_abbr, away_team_name, away_team_abbr,
                 home_line, away_line, over_under, wk_day]
print(game_info_array)

['Bengals', 'CIN', 'Jaguars', 'JAC', '-6', 6.0, '47', 'THU']


## Scraping Functions

In [153]:
def get_game_info(score_box):
    odds = score_box.find_all('td', {'class': 'in-progress-odds'})
    over_under = odds[0].text.strip()[1:]
    home_line = odds[1].text.strip()
    away_line = get_away_line(home_line)
    teams = score_box.find_all('td', {'class': 'team'})
    wk_day_span = score_box.find('span', {'class': 'game-status'})
    wk_day = get_day(wk_day_span)
    away_team = teams[0]
    home_team = teams[1]
    away_team_name = away_team.find_all('a')[1].text
    away_team_abbr = away_team.find('a')['href'].split('/')[5]
    home_team_name = home_team.find_all('a')[1].text
    home_team_abbr = home_team.find('a')['href'].split('/')[5]
    game_info_array = [home_team_name, home_team_abbr, away_team_name, away_team_abbr,
                     home_line, away_line, over_under, wk_day]
    return(game_info_array)

In [154]:
def get_game_info_list(week_num):
    link = 'https://www.cbssports.com/nfl/scoreboard/all/2021/regular/' + str(week_num) + '/'
    with urllib.request.urlopen(link) as url:
        page = url.read()
    soup = BeautifulSoup(page, "html.parser")
    # list of game info boxes for all games
    score_boxes = soup.find_all('div', {'class':'live-update'})
    #game_info_list = np.array([])
    game_info_list = []
    for score_box in score_boxes:
        #print(score_box)
        game_info_list.append(get_game_info(score_box))
    return(game_info_list)

## Scrape CBS & convert to df

In [186]:
week_num = 4
array = get_game_info_list(week_num)
array

[['Bengals', 'CIN', 'Jaguars', 'JAC', '-6', '+6', '47', 'THU'],
 ['Cowboys', 'DAL', 'Panthers', 'CAR', '-3.5', '+3.5', '50.5', 'SUN'],
 ['Vikings', 'MIN', 'Browns', 'CLE', '+1.5', '-1.5', '51', 'SUN'],
 ['Bears', 'CHI', 'Lions', 'DET', '-6', '+6', '46', 'SUN'],
 ['Bills', 'BUF', 'Texans', 'HOU', '-16.5', '+16.5', '49', 'SUN'],
 ['Dolphins', 'MIA', 'Colts', 'IND', '-2.5', '+2.5', '45.5', 'SUN'],
 ['Eagles', 'PHI', 'Chiefs', 'KC', '+6.5', '-6.5', '54', 'SUN'],
 ['Saints', 'NO', 'Giants', 'NYG', '-6.5', '+6.5', '46.5', 'SUN'],
 ['Jets', 'NYJ', 'Titans', 'TEN', '+6.5', '-6.5', '48', 'SUN'],
 ['Falcons', 'ATL', 'Football Team', 'WAS', '+1.5', '-1.5', '48.5', 'SUN'],
 ['Rams', 'LAR', 'Cardinals', 'ARI', '-6', '+6', '53', 'SUN'],
 ['49ers', 'SF', 'Seahawks', 'SEA', '-3.5', '+3.5', '50', 'SUN'],
 ['Broncos', 'DEN', 'Ravens', 'BAL', '+1', '-1', '46.5', 'SUN'],
 ['Packers', 'GB', 'Steelers', 'PIT', '-6.5', '+6.5', '47.5', 'SUN'],
 ['Patriots', 'NE', 'Buccaneers', 'TB', '+5.5', '-5.5', '51', 'SUN

In [187]:
cols = ['home_team_name', 'home_team_abbr', 'away_team_name', 
        'away_team_abbr', 'home_line', 'away_line', 
        'over_under', 'wk_day']
game_info = pd.DataFrame(array, columns=cols)
game_info

Unnamed: 0,home_team_name,home_team_abbr,away_team_name,away_team_abbr,home_line,away_line,over_under,wk_day
0,Bengals,CIN,Jaguars,JAC,-6.0,6.0,47.0,THU
1,Cowboys,DAL,Panthers,CAR,-3.5,3.5,50.5,SUN
2,Vikings,MIN,Browns,CLE,1.5,-1.5,51.0,SUN
3,Bears,CHI,Lions,DET,-6.0,6.0,46.0,SUN
4,Bills,BUF,Texans,HOU,-16.5,16.5,49.0,SUN
5,Dolphins,MIA,Colts,IND,-2.5,2.5,45.5,SUN
6,Eagles,PHI,Chiefs,KC,6.5,-6.5,54.0,SUN
7,Saints,NO,Giants,NYG,-6.5,6.5,46.5,SUN
8,Jets,NYJ,Titans,TEN,6.5,-6.5,48.0,SUN
9,Falcons,ATL,Football Team,WAS,1.5,-1.5,48.5,SUN


## Export to Google Sheets

In [191]:
# open connection to gsheets
gc = gspread.oauth()
sh = gc.open("Pickem Test Sheet")

# create new worksheet for the given week
worksheet_name = 'week' + str(week_num)
sh.add_worksheet(worksheet_name, rows=game_info.shape[0], cols=game_info.shape[1])
worksheet = sh.worksheet(worksheet_name)

# push df to google sheet
worksheet.update([game_info.columns.values.tolist()] + game_info.values.tolist())

{'spreadsheetId': '1mZeUuGEyT8K3v4TK9foLP85Ts0Z6Qe323EYeI6KUmIY',
 'updatedRange': 'week4!A1:H17',
 'updatedRows': 17,
 'updatedColumns': 8,
 'updatedCells': 136}