In [1]:
# pull the 1x2 odds for all games

In [2]:
# imports
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from urllib.parse import urljoin
import numpy as np
import pandas as pd


In [3]:
# read in all games data
all_games = pd.read_excel('data/master_games_data.xlsx', header=0)

# inspect
all_games.info()
all_games.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3381 entries, 0 to 3380
Data columns (total 44 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   Game_Link                 3381 non-null   object        
 1   Extra_Time                3381 non-null   object        
 2   Home_Team                 3381 non-null   object        
 3   Away_Team                 3381 non-null   object        
 4   Home_Score                3381 non-null   int64         
 5   Away_Score                3381 non-null   int64         
 6   P1_Home_Score             3381 non-null   int64         
 7   P1_Away_Score             3381 non-null   int64         
 8   P2_Home_Score             3381 non-null   int64         
 9   P2_Away_Score             3381 non-null   int64         
 10  P3_Home_Score             3381 non-null   int64         
 11  P3_Away_Score             3381 non-null   int64         
 12  P4_Home_Score       

Unnamed: 0,Game_Link,Extra_Time,Home_Team,Away_Team,Home_Score,Away_Score,P1_Home_Score,P1_Away_Score,P2_Home_Score,P2_Away_Score,...,Away_Conf,Home_Div,Away_Div,Conf_Matchup,Div_Matchup,Conf_Pair,Div_Pair,Team_Pair,Game_Start_Hour,Start_Hour_Group
0,https://www.flashscore.com/match/hockey/edmont...,Reg,Florida Panthers,Edmonton Oilers,2,1,1,1,1,0,...,Western,Atlantic,Pacific,False,False,Eastern-Western,Atlantic-Pacific,Florida Panthers vs Edmonton Oilers,20,Late
1,https://www.flashscore.com/match/hockey/edmont...,Reg,Edmonton Oilers,Florida Panthers,5,1,1,0,2,0,...,Eastern,Pacific,Atlantic,False,False,Western-Eastern,Pacific-Atlantic,Edmonton Oilers vs Florida Panthers,20,Late
2,https://www.flashscore.com/match/hockey/edmont...,Reg,Florida Panthers,Edmonton Oilers,3,5,0,1,2,3,...,Western,Atlantic,Pacific,False,False,Eastern-Western,Atlantic-Pacific,Florida Panthers vs Edmonton Oilers,20,Late
3,https://www.flashscore.com/match/hockey/edmont...,Reg,Edmonton Oilers,Florida Panthers,8,1,3,1,3,0,...,Eastern,Pacific,Atlantic,False,False,Western-Eastern,Pacific-Atlantic,Edmonton Oilers vs Florida Panthers,20,Late
4,https://www.flashscore.com/match/hockey/edmont...,Reg,Edmonton Oilers,Florida Panthers,3,4,0,1,1,3,...,Eastern,Pacific,Atlantic,False,False,Western-Eastern,Pacific-Atlantic,Edmonton Oilers vs Florida Panthers,20,Late


In [5]:
# filter all_games to just 2025 data
all_games = all_games[all_games['Season'] == 2025]

In [6]:
# isolate the game links
game_urls = all_games['Game_Link']

# store len of game_urls
len_games = len(game_urls)

# inspect first url
game_urls[0]

KeyError: 0

In [7]:
from urllib.parse import urlsplit, urlunsplit

# build odds urls from game url
odds_urls = []

for x in game_urls:
    # split url into parts
    parts = urlsplit(x)
    path = parts.path.rstrip("/")  # remove trailing slash

    # insert odds/1x2-odds/full-time/ before the last segment
    new_path = path + "/odds/1x2-odds/full-time/"

    # reconstruct URL with query string preserved (?mid=...)
    final_url = urlunsplit((parts.scheme, parts.netloc, new_path, parts.query, parts.fragment))

    # add to list
    odds_urls.append(final_url)

# inspect final odds urls
print(len(odds_urls))
odds_urls[:5]


368


['https://www.flashscore.com/match/hockey/columbus-blue-jackets-0vCzmbQ5/seattle-kraken-MibYQ5nU/odds/1x2-odds/full-time/?mid=fDroX6xH',
 'https://www.flashscore.com/match/hockey/vancouver-canucks-YiOrnTV9/winnipeg-jets-r3L8pAaJ/odds/1x2-odds/full-time/?mid=j9ywxETq',
 'https://www.flashscore.com/match/hockey/anaheim-ducks-4CM4ojpD/colorado-avalanche-hACAnvBa/odds/1x2-odds/full-time/?mid=MapwZS74',
 'https://www.flashscore.com/match/hockey/minnesota-wild-j9Ck7BHi/san-jose-sharks-E588Co9j/odds/1x2-odds/full-time/?mid=QXjUzkwh',
 'https://www.flashscore.com/match/hockey/calgary-flames-EZekktRt/st-louis-blues-KSAayVoi/odds/1x2-odds/full-time/?mid=fwj7sz6M']

In [8]:
# directly access the odds pages and scrape the 1x2 odds
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from urllib.parse import urljoin
import numpy as np
import pandas as pd

# ---------------------------
# Chrome speed-oriented setup
# ---------------------------
opts = Options()
opts.add_argument("--headless=new")
opts.add_argument("--disable-gpu")
opts.add_argument("--no-sandbox")
opts.add_argument("--disable-dev-shm-usage")
opts.add_argument("--disable-extensions")
opts.add_argument("--disable-notifications")
opts.add_argument("--blink-settings=imagesEnabled=false")  # block images

# also block heavy content types
prefs = {
    "profile.managed_default_content_settings.images": 2,
    "profile.managed_default_content_settings.stylesheets": 2,
    "profile.managed_default_content_settings.fonts": 2
}
opts.add_experimental_option("prefs", prefs)

driver = webdriver.Chrome(options=opts)
wait = WebDriverWait(driver, 8)

master_odds_1x2 = []

for idx, url in enumerate(odds_urls):
    # print every 100 urls
    if idx % 150 == 0:
        print(f'{idx} of {len_games}')

    try:    
        # navigate to url
        driver.get(url)

        # Wait until at least one ODD_CELL appears
        wait.until(
            EC.presence_of_element_located(
                (By.CSS_SELECTOR, "[data-analytics-element^='ODDS_COMP'][data-analytics-element*='ODD_CELL_']")
            )
        )
    
        # ✅ Get ALL elements for cells ending in ODD_CELL_1, 2, and 3
        cell1_elems = driver.find_elements(
            By.CSS_SELECTOR, "[data-analytics-element^='ODDS_COMP'][data-analytics-element$='CELL_1'] span"
        )
        cell2_elems = driver.find_elements(
            By.CSS_SELECTOR, "[data-analytics-element^='ODDS_COMP'][data-analytics-element$='CELL_2'] span"
        )
        cell3_elems = driver.find_elements(
            By.CSS_SELECTOR, "[data-analytics-element^='ODDS_COMP'][data-analytics-element$='CELL_3'] span"
        )

        # get all the vals and replace + with ''
        cell1_vals = [float(e.text.replace("+", "")) if e.text.strip() else np.nan for e in cell1_elems]
        cell2_vals = [float(e.text.replace("+", "")) if e.text.strip() else np.nan for e in cell2_elems]
        cell3_vals = [float(e.text.replace("+", "")) if e.text.strip() else np.nan for e in cell3_elems]

        # get means of values
        cell1_mean = np.mean(cell1_vals) if cell1_vals else np.nan
        cell2_mean = np.mean(cell2_vals) if cell2_vals else np.nan
        cell3_mean = np.mean(cell3_vals) if cell3_vals else np.nan

    except:
        print("Could not find odds for URL:", url)
        cell1_mean = np.nan
        cell2_mean = np.nan
        cell3_mean = np.nan

    if idx % 25 == 0:
        print(cell1_mean, cell2_mean, cell3_mean)

    # store final means in master list
    master_odds_1x2.append([cell1_mean, cell2_mean, cell3_mean])

driver.quit()

print("\n✅ FINAL RESULT (master_odds_1x2):")
print(master_odds_1x2)

0 of 368
2.275 4.225 2.55
2.025 4.325 2.9
150 of 368
2.15 4.125 2.775
3.3 4.375 1.86
300 of 368
1.5750000000000002 4.75 4.0
Could not find odds for URL: https://www.flashscore.com/match/hockey/new-jersey-devils-Kh0O591A/washington-capitals-UND9nmoG/odds/1x2-odds/full-time/?mid=GYwA0BK8
Could not find odds for URL: https://www.flashscore.com/match/hockey/new-jersey-devils-Kh0O591A/ottawa-senators-noTCHmWM/odds/1x2-odds/full-time/?mid=2yVAC6V8
Could not find odds for URL: https://www.flashscore.com/match/hockey/colorado-avalanche-hACAnvBa/utah-mammoth-hnwxhtVp/odds/1x2-odds/full-time/?mid=YcWIpiph
Could not find odds for URL: https://www.flashscore.com/match/hockey/calgary-flames-EZekktRt/edmonton-oilers-Mg0qoxeI/odds/1x2-odds/full-time/?mid=8GxRDAFU
Could not find odds for URL: https://www.flashscore.com/match/hockey/calgary-flames-EZekktRt/edmonton-oilers-Mg0qoxeI/odds/1x2-odds/full-time/?mid=ns8yYD0n
Could not find odds for URL: https://www.flashscore.com/match/hockey/florida-panthers

In [11]:
# Convert master list to DataFrame
df_odds = pd.DataFrame(master_odds_1x2, columns=['Odds_1', 'Odds_X', 'Odds_2'])

# reset index on all games
all_games = all_games.reset_index(drop=True)

# Concatenate side by side for final results
final_data = pd.concat([all_games, df_odds], axis=1) 
final_data

Unnamed: 0,Game_Link,Extra_Time,Home_Team,Away_Team,Home_Score,Away_Score,P1_Home_Score,P1_Away_Score,P2_Home_Score,P2_Away_Score,...,Conf_Matchup,Div_Matchup,Conf_Pair,Div_Pair,Team_Pair,Game_Start_Hour,Start_Hour_Group,Odds_1,Odds_X,Odds_2
0,https://www.flashscore.com/match/hockey/columb...,Pen,Seattle Kraken,Columbus Blue Jackets,1,2,1,0,0,1,...,False,False,Western-Eastern,Pacific-Metropolitan,Seattle Kraken vs Columbus Blue Jackets,22,Late,2.275,4.225,2.550
1,https://www.flashscore.com/match/hockey/vancou...,Reg,Vancouver Canucks,Winnipeg Jets,3,5,2,3,0,0,...,True,False,Western-Western,Pacific-Central,Vancouver Canucks vs Winnipeg Jets,22,Late,2.600,4.125,2.250
2,https://www.flashscore.com/match/hockey/anahei...,Reg,Colorado Avalanche,Anaheim Ducks,4,1,1,1,1,0,...,True,False,Western-Western,Central-Pacific,Colorado Avalanche vs Anaheim Ducks,21,Late,1.590,4.875,4.200
3,https://www.flashscore.com/match/hockey/minnes...,AOT,Minnesota Wild,San Jose Sharks,1,2,0,0,1,0,...,True,False,Western-Western,Central-Pacific,Minnesota Wild vs San Jose Sharks,20,Late,1.770,4.500,3.525
4,https://www.flashscore.com/match/hockey/calgar...,Reg,St. Louis Blues,Calgary Flames,3,2,2,0,1,2,...,True,False,Western-Western,Central-Pacific,St. Louis Blues vs Calgary Flames,20,Late,2.075,4.100,2.900
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
363,https://www.flashscore.com/match/hockey/minnes...,AOT,Winnipeg Jets,Minnesota Wild,2,3,2,0,0,1,...,True,True,Western-Western,Central-Central,Winnipeg Jets vs Minnesota Wild,17,Mid,2.200,4.000,2.450
364,https://www.flashscore.com/match/hockey/florid...,Reg,Nashville Predators,Florida Panthers,5,0,1,0,2,0,...,False,False,Western-Eastern,Central-Atlantic,Nashville Predators vs Florida Panthers,15,Mid,,,
365,https://www.flashscore.com/match/hockey/ottawa...,Reg,Ottawa Senators,Toronto Maple Leafs,3,4,0,3,2,1,...,True,True,Eastern-Eastern,Atlantic-Atlantic,Ottawa Senators vs Toronto Maple Leafs,15,Mid,1.440,5.250,4.500
366,https://www.flashscore.com/match/hockey/new-je...,Reg,New Jersey Devils,New York Rangers,3,5,1,1,1,4,...,True,True,Eastern-Eastern,Metropolitan-Metropolitan,New Jersey Devils vs New York Rangers,13,Early,2.200,4.000,2.450


In [12]:
# inspect distinct values of Odds_X
final_data['Odds_X'].isna().sum()

np.int64(8)

In [13]:
# write data to excel
final_data.to_excel(r'data/2025_1x2_data.xlsx', index=False)