In [1]:
# imports
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import time
import pandas as pd


In [2]:
# set target year var
target_year = '2025'

# create url vars
url_past = f'https://www.flashscore.com/nhl-{int(target_year)}-{int(target_year) + 1}/results/'
url_current = 'https://www.flashscore.com/nhl/results/'

# pick url based on target year
if target_year != '2025':
    master_url = url_past
else:
    master_url = url_current

# print year
print('target year:', target_year)

target year: 2025


In [3]:
# event better code to extract the game date plus time as well
driver = webdriver.Chrome()
driver.get(master_url)
time.sleep(5)

# click reject cookies button when you first visit the site
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# wait for the OneTrust cookie banner to appear
try:
    reject_all_btn = WebDriverWait(driver, 5).until(
        EC.element_to_be_clickable((By.ID, "onetrust-reject-all-handler"))
    )
    driver.execute_script("arguments[0].click();", reject_all_btn)  # avoids iframe overlay issues
    print("✅ Reject All clicked")
except:
    print("⚠️ Reject All button not found or not clickable")

# new code to click show more matches button
while True:
    try:
        btn = driver.find_element(By.LINK_TEXT, "Show more matches")
        driver.execute_script("arguments[0].scrollIntoView({block:'center'}); arguments[0].click();", btn)
        time.sleep(2)
    except NoSuchElementException:
        break

# Get all game divs
games = driver.find_elements(By.CSS_SELECTOR, "div.event__match")

# get len of games
len_games = len(games)

game_data = []

# set default zero var for OT periods if no OT
def_zero = 0

for idx, game in enumerate(games):
    if idx % 100 == 0:
        print(f'{idx} of {len_games}')
    try:
        # get game time
        game_time = game.find_element(By.CSS_SELECTOR, "div.event__time").text
        game_link = game.find_element(By.TAG_NAME, "a").get_attribute("href")
    
        # get home and away teams
        home_team = game.find_element(By.CSS_SELECTOR, "div.event__participant.event__participant--home").text
        away_team = game.find_element(By.CSS_SELECTOR, "div.event__participant.event__participant--away").text
        
        # get total scores home and away
        home_score = game.find_element(By.CSS_SELECTOR, "span.event__score.event__score--home").text
        away_score = game.find_element(By.CSS_SELECTOR, "span.event__score.event__score--away").text

        # get goals by period regular time
        p1_home = game.find_element(By.CSS_SELECTOR, "div.event__part--home.event__part--1").text
        p1_away = game.find_element(By.CSS_SELECTOR, "div.event__part--away.event__part--1").text

        p2_home = game.find_element(By.CSS_SELECTOR, "div.event__part--home.event__part--2").text
        p2_away = game.find_element(By.CSS_SELECTOR, "div.event__part--away.event__part--2").text

        p3_home = game.find_element(By.CSS_SELECTOR, "div.event__part--home.event__part--3").text
        p3_away = game.find_element(By.CSS_SELECTOR, "div.event__part--away.event__part--3").text

        # get if game went to AOT or PEN if any
        try:
            stage = game.find_element(By.CSS_SELECTOR, "div.event__stage--block").text
        except NoSuchElementException:
            stage = "Reg" 

        try:
            p4_home = game.find_element(By.CSS_SELECTOR, "div.event__part--home.event__part--4").text
        except NoSuchElementException:
            p4_home = def_zero

        try:
            p4_away = game.find_element(By.CSS_SELECTOR, "div.event__part--away.event__part--4").text
        except NoSuchElementException:
            p4_away = def_zero

        # --- Period 5 ---
        try:
            p5_home = game.find_element(By.CSS_SELECTOR, "div.event__part--home.event__part--5").text
        except NoSuchElementException:
            p5_home = def_zero

        try:
            p5_away = game.find_element(By.CSS_SELECTOR, "div.event__part--away.event__part--5").text
        except NoSuchElementException:
            p5_away = def_zero

        # --- Period 6 ---
        try:
            p6_home = game.find_element(By.CSS_SELECTOR, "div.event__part--home.event__part--6").text
        except NoSuchElementException:
            p6_home = def_zero

        try:
            p6_away = game.find_element(By.CSS_SELECTOR, "div.event__part--away.event__part--6").text
        except NoSuchElementException:
            p6_away = def_zero

        # --- Period 7 ---
        try:
            p7_home = game.find_element(By.CSS_SELECTOR, "div.event__part--home.event__part--7").text
        except NoSuchElementException:
            p7_home = def_zero

        try:
            p7_away = game.find_element(By.CSS_SELECTOR, "div.event__part--away.event__part--7").text
        except NoSuchElementException:
            p7_away = def_zero

    except NoSuchElementException:
        home_team = away_team = home_score = away_score = game_time = None

    # thanks to the @ home is the bottom team even though the site labeling is off
    # this is somewhat concerning for other leagues need to deeper dive to ensure home vs away is correct
    # or is it good bc i did check NPB (will check again) but @ seems like a lazy fix to accomodate MLB
    game_data.append({
        "Game Link": game_link,
        "Game Date Time": game_time,
        "Extra Time": stage,
        "Home Team": home_team, 
        "Away Team": away_team,
        "Home Score": home_score, 
        "Away Score": away_score,
        "P1 Home Score": p1_home,
        "P1 Away Score": p1_away,
        "P2 Home Score": p2_home,
        "P2 Away Score": p2_away,
        "P3 Home Score": p3_home,
        "P3 Away Score": p3_away,
        "P4 Home Score": p4_home,
        "P4 Away Score": p4_away,
        "P5 Home Score": p5_home,
        "P5 Away Score": p5_away,
        "P6 Home Score": p6_home,
        "P6 Away Score": p6_away,
        "P7 Home Score": p7_home,
        "P7 Away Score": p7_away,
    })

# Convert to DataFrame
df = pd.DataFrame(game_data)

# close webpage
driver.quit()

# inspect df
df.info()
df.head()


✅ Reject All clicked
0 of 336
100 of 336
200 of 336
300 of 336
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 336 entries, 0 to 335
Data columns (total 21 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Game Link       336 non-null    object
 1   Game Date Time  336 non-null    object
 2   Extra Time      336 non-null    object
 3   Home Team       336 non-null    object
 4   Away Team       336 non-null    object
 5   Home Score      336 non-null    object
 6   Away Score      336 non-null    object
 7   P1 Home Score   336 non-null    object
 8   P1 Away Score   336 non-null    object
 9   P2 Home Score   336 non-null    object
 10  P2 Away Score   336 non-null    object
 11  P3 Home Score   336 non-null    object
 12  P3 Away Score   336 non-null    object
 13  P4 Home Score   336 non-null    object
 14  P4 Away Score   336 non-null    object
 15  P5 Home Score   336 non-null    object
 16  P5 Away Score   336 non-null    obj

Unnamed: 0,Game Link,Game Date Time,Extra Time,Home Team,Away Team,Home Score,Away Score,P1 Home Score,P1 Away Score,P2 Home Score,...,P3 Home Score,P3 Away Score,P4 Home Score,P4 Away Score,P5 Home Score,P5 Away Score,P6 Home Score,P6 Away Score,P7 Home Score,P7 Away Score
0,https://www.flashscore.com/match/hockey/ottawa...,08.11. 13:00\nAOT,AOT,Philadelphia Flyers,Ottawa Senators,2,3,0,2,1,...,1,0,0,1,0,0,0,0,0,0
1,https://www.flashscore.com/match/hockey/new-je...,08.11. 12:30\nPen,Pen,New Jersey Devils,Pittsburgh Penguins,2,1,1,0,0,...,0,0,0,0,2,0,0,0,0,0
2,https://www.flashscore.com/match/hockey/san-jo...,07.11. 22:00,Reg,San Jose Sharks,Winnipeg Jets,2,1,1,1,0,...,1,0,0,0,0,0,0,0,0,0
3,https://www.flashscore.com/match/hockey/calgar...,07.11. 21:00,Reg,Calgary Flames,Chicago Blackhawks,0,4,0,1,0,...,0,3,0,0,0,0,0,0,0,0
4,https://www.flashscore.com/match/hockey/detroi...,07.11. 19:00,Reg,Detroit Red Wings,New York Rangers,1,4,1,1,0,...,0,2,0,0,0,0,0,0,0,0


In [4]:
# filter out any rows with key missing data in this col subset
required_cols = ["Game Date Time", "Home Team", "Away Team", "Home Score", "Away Score"]

df = df.dropna(subset=required_cols)             # remove None / NaN
df = df[~df[required_cols].isin([""]).any(axis=1)]  # remove empty strings

# inspect
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 336 entries, 0 to 335
Data columns (total 21 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Game Link       336 non-null    object
 1   Game Date Time  336 non-null    object
 2   Extra Time      336 non-null    object
 3   Home Team       336 non-null    object
 4   Away Team       336 non-null    object
 5   Home Score      336 non-null    object
 6   Away Score      336 non-null    object
 7   P1 Home Score   336 non-null    object
 8   P1 Away Score   336 non-null    object
 9   P2 Home Score   336 non-null    object
 10  P2 Away Score   336 non-null    object
 11  P3 Home Score   336 non-null    object
 12  P3 Away Score   336 non-null    object
 13  P4 Home Score   336 non-null    object
 14  P4 Away Score   336 non-null    object
 15  P5 Home Score   336 non-null    object
 16  P5 Away Score   336 non-null    object
 17  P6 Home Score   336 non-null    int64 
 18  P6 Away Sc

In [None]:
# split date and time
df[['Date', 'Time']] = df['Game Date Time'].str.split(' ', n=1, expand=True)
df

# # change feb 29 to 28; leap year is dumb
# df['Date'] = df['Date'].str.replace('29.02.', '28.02.')
# df

# clean date into real date col
df['Date'] = pd.to_datetime(df['Date'].str.replace('.', '/') + str(target_year), dayfirst=True)

# clean Time col
df['Time'] = df['Time'].str.split('\n').str[0]

# add year col; TODO - actually from how this code block is Year is actually season
df['Year'] = df['Date'].dt.year

# add total score col
df['Total Score'] = df['Home Score'].astype(int) + df['Away Score'].astype(int)

# drop orig date + time col
df = df.drop('Game Date Time', axis=1)

df.head()


  df['Date'] = pd.to_datetime(df['Date'].str.replace('.', '/') + str(target_year), dayfirst=True)


Unnamed: 0,Game Link,Extra Time,Home Team,Away Team,Home Score,Away Score,P1 Home Score,P1 Away Score,P2 Home Score,P2 Away Score,...,P5 Home Score,P5 Away Score,P6 Home Score,P6 Away Score,P7 Home Score,P7 Away Score,Date,Time,Year,Total Score
0,https://www.flashscore.com/match/hockey/ottawa...,AOT,Philadelphia Flyers,Ottawa Senators,2,3,0,2,1,0,...,0,0,0,0,0,0,2025-11-08,13:00,2025,5
1,https://www.flashscore.com/match/hockey/new-je...,Pen,New Jersey Devils,Pittsburgh Penguins,2,1,1,0,0,1,...,2,0,0,0,0,0,2025-11-08,12:30,2025,3
2,https://www.flashscore.com/match/hockey/san-jo...,Reg,San Jose Sharks,Winnipeg Jets,2,1,1,1,0,0,...,0,0,0,0,0,0,2025-11-07,22:00,2025,3
3,https://www.flashscore.com/match/hockey/calgar...,Reg,Calgary Flames,Chicago Blackhawks,0,4,0,1,0,0,...,0,0,0,0,0,0,2025-11-07,21:00,2025,4
4,https://www.flashscore.com/match/hockey/detroi...,Reg,Detroit Red Wings,New York Rangers,1,4,1,1,0,1,...,0,0,0,0,0,0,2025-11-07,19:00,2025,5


In [6]:
# convert all cols with Score in the name to int
score_cols = [col for col in df.columns if 'Score' in col]

# convert score cols to ints
for col in score_cols:
    df[col] = df[col].astype(int)

# inspect
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 336 entries, 0 to 335
Data columns (total 24 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Game Link      336 non-null    object        
 1   Extra Time     336 non-null    object        
 2   Home Team      336 non-null    object        
 3   Away Team      336 non-null    object        
 4   Home Score     336 non-null    int32         
 5   Away Score     336 non-null    int32         
 6   P1 Home Score  336 non-null    int32         
 7   P1 Away Score  336 non-null    int32         
 8   P2 Home Score  336 non-null    int32         
 9   P2 Away Score  336 non-null    int32         
 10  P3 Home Score  336 non-null    int32         
 11  P3 Away Score  336 non-null    int32         
 12  P4 Home Score  336 non-null    int32         
 13  P4 Away Score  336 non-null    int32         
 14  P5 Home Score  336 non-null    int32         
 15  P5 Away Score  336 non-

Unnamed: 0,Game Link,Extra Time,Home Team,Away Team,Home Score,Away Score,P1 Home Score,P1 Away Score,P2 Home Score,P2 Away Score,...,P5 Home Score,P5 Away Score,P6 Home Score,P6 Away Score,P7 Home Score,P7 Away Score,Date,Time,Year,Total Score
0,https://www.flashscore.com/match/hockey/ottawa...,AOT,Philadelphia Flyers,Ottawa Senators,2,3,0,2,1,0,...,0,0,0,0,0,0,2025-11-08,13:00,2025,5
1,https://www.flashscore.com/match/hockey/new-je...,Pen,New Jersey Devils,Pittsburgh Penguins,2,1,1,0,0,1,...,2,0,0,0,0,0,2025-11-08,12:30,2025,3
2,https://www.flashscore.com/match/hockey/san-jo...,Reg,San Jose Sharks,Winnipeg Jets,2,1,1,1,0,0,...,0,0,0,0,0,0,2025-11-07,22:00,2025,3
3,https://www.flashscore.com/match/hockey/calgar...,Reg,Calgary Flames,Chicago Blackhawks,0,4,0,1,0,0,...,0,0,0,0,0,0,2025-11-07,21:00,2025,4
4,https://www.flashscore.com/match/hockey/detroi...,Reg,Detroit Red Wings,New York Rangers,1,4,1,1,0,1,...,0,0,0,0,0,0,2025-11-07,19:00,2025,5


In [7]:
# save to excel
df.to_excel(fr'data\nhl_game_results_{target_year}.xlsx', index=False)