# Schedule scraper


In [3]:
# Imports
import numpy as np
import pandas as pd
from bs4  import BeautifulSoup
import requests
import time
from datetime import datetime 
import warnings
warnings.filterwarnings("ignore")
import sys
import json
from json import loads, dumps
import lxml
from requests import ConnectionError, ReadTimeout, ConnectTimeout, HTTPError, Timeout
import xml
import re
from natsort import natsorted
import xml.etree.ElementTree as ET
import xmltodict
from xml.parsers.expat import ExpatError
from requests.exceptions import ChunkedEncodingError

In [4]:
# function
def scrape_schedule(start_date, end_date):
    
    """
    Scrape the NHL's API and get a schedule back.
    """
    
    url = 'https://statsapi.web.nhl.com/api/v1/schedule?startDate=' + start_date + '&endDate=' + end_date
    page = requests.get(url, timeout = 500)
    loaddict = json.loads(page.content)
    date_list = (loaddict['dates'])
    date_df = pd.DataFrame(date_list)
    
    gamedf = pd.DataFrame()

    for i in range (0, len(date_df)):
        datedf = pd.DataFrame(date_df.games.iloc[i])
        gamedf = gamedf.append(datedf)
    global team_df
    team_df = pd.DataFrame(gamedf['teams'].values.tolist(), index = gamedf.index)
    away_df = pd.DataFrame(team_df['away'].values.tolist(), index = team_df.index)
    home_df = pd.DataFrame(team_df['home'].values.tolist(), index = team_df.index)
    away_team_df = pd.DataFrame(away_df['team'].values.tolist(), index = away_df.index)
    home_team_df = pd.DataFrame(home_df['team'].values.tolist(), index = home_df.index)

    gamedf = gamedf.assign(
        state = pd.DataFrame(gamedf['status'].values.tolist(), index = gamedf.index)['detailedState'],
        homename = home_team_df['name'],
        homeid = home_team_df['id'],
        homescore = home_df['score'],
        awayname = away_team_df['name'],
        awayid = away_team_df['id'],
        awayscore = away_df['score'],
        venue = pd.DataFrame(gamedf['venue'].values.tolist(), index = gamedf.index)['name'],
        gameDate = pd.to_datetime(gamedf['gameDate']).dt.tz_convert('EST')
    )

    gamedf = gamedf.loc[:, ['gamePk', 'link', 'gameType', 'season', 'gameDate','homeid', 'homename',  'homescore','awayid', 'awayname',  'awayscore', 'state', 'venue']].rename(
        columns = {'gamePk':'ID', 'gameType':'type', 'gameDate':'date'})
    
    gamedf['type']

    return(gamedf)

In [11]:
ytd_schedule = scrape_schedule("2022-09-01", "2023-04-04")

In [14]:
ytd_schedule

Unnamed: 0,ID,link,type,season,date,homeid,homename,homescore,awayid,awayname,awayscore,state,venue
0,2022010001,/api/v1/game/2022010001/feed/live,PR,20222023,2022-09-24 12:00:00-05:00,10,Toronto Maple Leafs,4,9,Ottawa Senators,1,Final,Scotiabank Arena
1,2022010002,/api/v1/game/2022010002/feed/live,PR,20222023,2022-09-24 18:00:00-05:00,4,Philadelphia Flyers,2,6,Boston Bruins,1,Final,Wells Fargo Center
2,2022010003,/api/v1/game/2022010003/feed/live,PR,20222023,2022-09-24 18:00:00-05:00,10,Toronto Maple Leafs,2,9,Ottawa Senators,4,Final,Scotiabank Arena
3,2022010004,/api/v1/game/2022010004/feed/live,PR,20222023,2022-09-24 19:00:00-05:00,53,Arizona Coyotes,4,19,St. Louis Blues,5,Final,Intrust Bank Arena
0,2022010005,/api/v1/game/2022010005/feed/live,PR,20222023,2022-09-25 12:00:00-05:00,5,Pittsburgh Penguins,3,29,Columbus Blue Jackets,2,Final,PPG Paints Arena
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6,2022021235,/api/v1/game/2022021235/feed/live,R,20222023,2023-04-04 19:00:00-05:00,18,Nashville Predators,3,54,Vegas Golden Knights,2,Final,Bridgestone Arena
7,2022021236,/api/v1/game/2022021236/feed/live,R,20222023,2023-04-04 20:00:00-05:00,20,Calgary Flames,3,16,Chicago Blackhawks,4,Final,Scotiabank Saddledome
8,2022021238,/api/v1/game/2022021238/feed/live,R,20222023,2023-04-04 21:00:00-05:00,23,Vancouver Canucks,2,55,Seattle Kraken,5,Final,Rogers Arena
9,2022021237,/api/v1/game/2022021237/feed/live,R,20222023,2023-04-04 21:30:00-05:00,26,Los Angeles Kings,1,22,Edmonton Oilers,3,Final,Crypto.com Arena


In [23]:
schedule_2021 = ytd_schedule
schedule_2021 = schedule_2021[schedule_2021.type=='R']
game_list_2021 = list(schedule_2021.ID)

game_list_2021

[2022020001,
 2022020002,
 2022020003,
 2022020004,
 2022020005,
 2022020006,
 2022020007,
 2022020008,
 2022020009,
 2022020010,
 2022020011,
 2022020012,
 2022020013,
 2022020014,
 2022020015,
 2022020017,
 2022020016,
 2022020018,
 2022020019,
 2022020020,
 2022020021,
 2022020022,
 2022020023,
 2022020024,
 2022020025,
 2022020026,
 2022020027,
 2022020028,
 2022020029,
 2022020030,
 2022020031,
 2022020032,
 2022020033,
 2022020034,
 2022020035,
 2022020037,
 2022020038,
 2022020036,
 2022020039,
 2022020040,
 2022020041,
 2022020042,
 2022020043,
 2022020044,
 2022020045,
 2022020046,
 2022020047,
 2022020048,
 2022020049,
 2022020050,
 2022020053,
 2022020051,
 2022020054,
 2022020055,
 2022020052,
 2022020056,
 2022020057,
 2022020058,
 2022020059,
 2022020060,
 2022020061,
 2022020062,
 2022020063,
 2022020064,
 2022020065,
 2022020066,
 2022020067,
 2022020069,
 2022020068,
 2022020070,
 2022020071,
 2022020072,
 2022020073,
 2022020074,
 2022020075,
 2022020077,
 2022020076,

In [21]:
# team home game counts
home_games = ytd_schedule.groupby(['homename'])['homename'].count()
home_games

homename
Anaheim Ducks            40
Arizona Coyotes          40
Boston Bruins            41
Buffalo Sabres           42
Calgary Flames           43
Carolina Hurricanes      43
Chicago Blackhawks       42
Colorado Avalanche       42
Columbus Blue Jackets    42
Dallas Stars             42
Detroit Red Wings        42
Edmonton Oilers          44
Eisbaren Berlin           1
Florida Panthers         40
Los Angeles Kings        43
Minnesota Wild           42
Montréal Canadiens       44
Nashville Predators      41
New Jersey Devils        42
New York Islanders       41
New York Rangers         41
Ottawa Senators          42
Philadelphia Flyers      42
Pittsburgh Penguins      42
SC Bern                   1
San Jose Sharks          41
Seattle Kraken           41
St. Louis Blues          43
Tampa Bay Lightning      42
Team Atlantic             2
Team Central              1
Toronto Maple Leafs      44
Vancouver Canucks        43
Vegas Golden Knights     44
Washington Capitals      41
Winnipeg Je