In [1]:
# pull the 1x2 odds for all games

In [2]:
# imports
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from urllib.parse import urljoin
import numpy as np
import pandas as pd


In [3]:
# read in all games data
all_games = pd.read_excel('data/master_games_data.xlsx', header=0)

# inspect
all_games.info()
all_games.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3349 entries, 0 to 3348
Data columns (total 46 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   Game_Link                 3349 non-null   object        
 1   Extra_Time                3349 non-null   object        
 2   Home_Team                 3349 non-null   object        
 3   Away_Team                 3349 non-null   object        
 4   Home_Score                3349 non-null   int64         
 5   Away_Score                3349 non-null   int64         
 6   P1_Home_Score             3349 non-null   int64         
 7   P1_Away_Score             3349 non-null   int64         
 8   P2_Home_Score             3349 non-null   int64         
 9   P2_Away_Score             3349 non-null   int64         
 10  P3_Home_Score             3349 non-null   int64         
 11  P3_Away_Score             3349 non-null   int64         
 12  P4_Home_Score       

Unnamed: 0,Game_Link,Extra_Time,Home_Team,Away_Team,Home_Score,Away_Score,P1_Home_Score,P1_Away_Score,P2_Home_Score,P2_Away_Score,...,Away_Conf,Home_Div,Away_Div,Conf_Matchup,Div_Matchup,Conf_Pair,Div_Pair,Team_Pair,Game_Start_Hour,Start_Hour_Group
0,https://www.flashscore.com/match/hockey/edmont...,Reg,Florida Panthers,Edmonton Oilers,2,1,1,1,1,0,...,Western,Atlantic,Pacific,False,False,Eastern-Western,Atlantic-Pacific,Florida Panthers vs Edmonton Oilers,20,Late
1,https://www.flashscore.com/match/hockey/edmont...,Reg,Edmonton Oilers,Florida Panthers,5,1,1,0,2,0,...,Eastern,Pacific,Atlantic,False,False,Western-Eastern,Pacific-Atlantic,Edmonton Oilers vs Florida Panthers,20,Late
2,https://www.flashscore.com/match/hockey/edmont...,Reg,Florida Panthers,Edmonton Oilers,3,5,0,1,2,3,...,Western,Atlantic,Pacific,False,False,Eastern-Western,Atlantic-Pacific,Florida Panthers vs Edmonton Oilers,20,Late
3,https://www.flashscore.com/match/hockey/edmont...,Reg,Edmonton Oilers,Florida Panthers,8,1,3,1,3,0,...,Eastern,Pacific,Atlantic,False,False,Western-Eastern,Pacific-Atlantic,Edmonton Oilers vs Florida Panthers,20,Late
4,https://www.flashscore.com/match/hockey/edmont...,Reg,Edmonton Oilers,Florida Panthers,3,4,0,1,1,3,...,Eastern,Pacific,Atlantic,False,False,Western-Eastern,Pacific-Atlantic,Edmonton Oilers vs Florida Panthers,20,Late


In [None]:
# isolate the game links
game_urls = all_games['Game_Link']

# store len of game_urls
len_games = len(game_urls)

# inspect first url
game_urls[0]

In [5]:
from urllib.parse import urlsplit, urlunsplit

# build odds urls from game url
odds_urls = []

for x in game_urls:
    # split url into parts
    parts = urlsplit(x)
    path = parts.path.rstrip("/")  # remove trailing slash

    # insert odds/1x2-odds/full-time/ before the last segment
    new_path = path + "/odds/1x2-odds/full-time/"

    # reconstruct URL with query string preserved (?mid=...)
    final_url = urlunsplit((parts.scheme, parts.netloc, new_path, parts.query, parts.fragment))

    # add to list
    odds_urls.append(final_url)

# inspect final odds urls
print(len(odds_urls))
odds_urls[:5]


1511


['https://www.flashscore.com/match/hockey/edmonton-oilers-Mg0qoxeI/florida-panthers-fc1eq8Pp/odds/1x2-odds/full-time/?mid=ro9gcXlC',
 'https://www.flashscore.com/match/hockey/edmonton-oilers-Mg0qoxeI/florida-panthers-fc1eq8Pp/odds/1x2-odds/full-time/?mid=h4AMJeNH',
 'https://www.flashscore.com/match/hockey/edmonton-oilers-Mg0qoxeI/florida-panthers-fc1eq8Pp/odds/1x2-odds/full-time/?mid=Qo6jnIyh',
 'https://www.flashscore.com/match/hockey/edmonton-oilers-Mg0qoxeI/florida-panthers-fc1eq8Pp/odds/1x2-odds/full-time/?mid=lhmVlZPk',
 'https://www.flashscore.com/match/hockey/edmonton-oilers-Mg0qoxeI/florida-panthers-fc1eq8Pp/odds/1x2-odds/full-time/?mid=I9dafe2S']

In [None]:
# directly access the odds pages and scrape the 1x2 odds
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from urllib.parse import urljoin
import numpy as np
import pandas as pd

# ---------------------------
# Chrome speed-oriented setup
# ---------------------------
opts = Options()
opts.add_argument("--headless=new")
opts.add_argument("--disable-gpu")
opts.add_argument("--no-sandbox")
opts.add_argument("--disable-dev-shm-usage")
opts.add_argument("--disable-extensions")
opts.add_argument("--disable-notifications")
opts.add_argument("--blink-settings=imagesEnabled=false")  # block images

# also block heavy content types
prefs = {
    "profile.managed_default_content_settings.images": 2,
    "profile.managed_default_content_settings.stylesheets": 2,
    "profile.managed_default_content_settings.fonts": 2
}
opts.add_experimental_option("prefs", prefs)

driver = webdriver.Chrome(options=opts)
wait = WebDriverWait(driver, 8)

# define base url
base_url = 'https://www.flashscore.com/'

master_odds_1x2 = []

for idx, url in enumerate(odds_urls):
    # print every 100 urls
    if idx % 150 == 0:
        print(f'{idx} of {len_games}')

    try:    
        # navigate to url
        driver.get(url)

        # Wait until at least one ODD_CELL appears
        wait.until(
            EC.presence_of_element_located(
                (By.CSS_SELECTOR, "[data-analytics-element^='ODDS_COMP'][data-analytics-element*='ODD_CELL_']")
            )
        )
    
        # ✅ Get ALL elements for cells ending in ODD_CELL_1, 2, and 3
        cell1_elems = driver.find_elements(
            By.CSS_SELECTOR, "[data-analytics-element^='ODDS_COMP'][data-analytics-element$='CELL_1'] span"
        )
        cell2_elems = driver.find_elements(
            By.CSS_SELECTOR, "[data-analytics-element^='ODDS_COMP'][data-analytics-element$='CELL_2'] span"
        )
        cell3_elems = driver.find_elements(
            By.CSS_SELECTOR, "[data-analytics-element^='ODDS_COMP'][data-analytics-element$='CELL_3'] span"
        )

        # get all the vals and replace + with ''
        cell1_vals = [float(e.text.replace("+", "")) if e.text.strip() else np.nan for e in cell1_elems]
        cell2_vals = [float(e.text.replace("+", "")) if e.text.strip() else np.nan for e in cell2_elems]
        cell3_vals = [float(e.text.replace("+", "")) if e.text.strip() else np.nan for e in cell3_elems]

        # get means of values
        cell1_mean = np.mean(cell1_vals) if cell1_vals else np.nan
        cell2_mean = np.mean(cell2_vals) if cell2_vals else np.nan
        cell3_mean = np.mean(cell3_vals) if cell3_vals else np.nan

    except:
        print("Could not find odds for URL:", url)
        cell1_mean = np.nan
        cell2_mean = np.nan
        cell3_mean = np.nan

    if idx % 75 == 0:
        print(cell1_mean, cell2_mean, cell3_mean)

    # store final means in master list
    master_odds_1x2.append([cell1_mean, cell2_mean, cell3_mean])

driver.quit()

print("\n✅ FINAL RESULT (master_odds_1x2):")
print(master_odds_1x2)

0 of 1511
2.45 4.0 2.45
Could not find odds for URL: https://www.flashscore.com/match/hockey/boston-bruins-jeans23J/toronto-maple-leafs-ShE7Z8G3/odds/1x2-odds/full-time/?mid=phOcbFfS
2.5 4.2 2.35
150 of 1511
4.75 4.5 1.59
3.1 4.25 1.95
300 of 1511
2.45 4.0 2.4
3.4 4.2 1.87
450 of 1511
2.15 4.0 2.8
1.4 5.0 6.5
600 of 1511
2.75 4.0 2.25
2.15 3.8 2.9
750 of 1511
2.6 4.0 2.3
4.0 4.5 1.71
900 of 1511
2.4 4.0 2.5
3.5 4.2 1.83
1050 of 1511
2.6 4.0 2.3
2.4 4.0 2.55
1200 of 1511
2.4 4.2 2.45
2.45 4.2 2.4
1350 of 1511
2.5 4.25 2.3
Could not find odds for URL: https://www.flashscore.com/match/hockey/chicago-blackhawks-A1kbmKeg/st-louis-blues-KSAayVoi/odds/1x2-odds/full-time/?mid=vqanlpMm
Could not find odds for URL: https://www.flashscore.com/match/hockey/detroit-red-wings-dnJdmCF0/toronto-maple-leafs-ShE7Z8G3/odds/1x2-odds/full-time/?mid=dGDUZSoa
Could not find odds for URL: https://www.flashscore.com/match/hockey/montreal-canadiens-nu2G7VWc/ottawa-senators-noTCHmWM/odds/1x2-odds/full-time/?mid=

In [None]:
# Convert master list to DataFrame
df_odds = pd.DataFrame(master_odds_1x2, columns=['Odds_1', 'Odds_X', 'Odds_2'])

# Concatenate side by side for final results
final_data = pd.concat([all_games, df_odds], axis=1) 
final_data

Unnamed: 0,Game_Link,Extra_Time,Home_Team,Away_Team,Home_Score,Away_Score,P1_Home_Score,P1_Away_Score,P2_Home_Score,P2_Away_Score,...,Conf_Matchup,Div_Matchup,Conf_Pair,Div_Pair,Team_Pair,Game_Start_Hour,Start_Hour_Group,Odds_1,Odds_X,Odds_2
0,https://www.flashscore.com/match/hockey/edmont...,Reg,Florida Panthers,Edmonton Oilers,2,1,1,1,1,0,...,False,False,Eastern-Western,Atlantic-Pacific,Florida Panthers vs Edmonton Oilers,20,Late,2.45,4.0,2.45
1,https://www.flashscore.com/match/hockey/edmont...,Reg,Edmonton Oilers,Florida Panthers,5,1,1,0,2,0,...,False,False,Western-Eastern,Pacific-Atlantic,Edmonton Oilers vs Florida Panthers,20,Late,2.35,4.0,2.55
2,https://www.flashscore.com/match/hockey/edmont...,Reg,Florida Panthers,Edmonton Oilers,3,5,0,1,2,3,...,False,False,Eastern-Western,Atlantic-Pacific,Florida Panthers vs Edmonton Oilers,20,Late,2.00,4.0,3.10
3,https://www.flashscore.com/match/hockey/edmont...,Reg,Edmonton Oilers,Florida Panthers,8,1,3,1,3,0,...,False,False,Western-Eastern,Pacific-Atlantic,Edmonton Oilers vs Florida Panthers,20,Late,2.30,4.0,2.55
4,https://www.flashscore.com/match/hockey/edmont...,Reg,Edmonton Oilers,Florida Panthers,3,4,0,1,1,3,...,False,False,Western-Eastern,Pacific-Atlantic,Edmonton Oilers vs Florida Panthers,20,Late,2.15,4.0,2.80
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1506,https://www.flashscore.com/match/hockey/columb...,Pen,Pittsburgh Penguins,Columbus Blue Jackets,3,2,2,0,0,0,...,True,True,Eastern-Eastern,Metropolitan-Metropolitan,Pittsburgh Penguins vs Columbus Blue Jackets,13,Early,,,
1507,https://www.flashscore.com/match/hockey/arizon...,Reg,Los Angeles Kings,Arizona Coyotes,3,2,0,1,0,0,...,False,False,,,Los Angeles Kings vs Arizona Coyotes,0,Late,,,
1508,https://www.flashscore.com/match/hockey/arizon...,Reg,Arizona Coyotes,St. Louis Blues,5,1,1,0,2,0,...,False,False,,,Arizona Coyotes vs St. Louis Blues,20,Late,,,
1509,https://www.flashscore.com/match/hockey/arizon...,Reg,St. Louis Blues,Arizona Coyotes,3,2,1,0,2,0,...,False,False,,,St. Louis Blues vs Arizona Coyotes,15,Mid,,,


In [9]:
# inspect distinct values of Odds_X
final_data['Odds_X'].isna().sum()

112

In [None]:
# write 2025 data to excel
final_data.to_excel(r'data/master_1x2_data.xlsx', index=False)