In [None]:
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import json
import datetime
import csv
import os
import re

In [None]:
### NFL ###

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run Chrome in headless mode

# Initialize the Chrome WebDriver with the options
driver = webdriver.Chrome(options=chrome_options)

# Navigate to the URL
url = 'https://data.vsin.com/nfl/vegas-odds-linetracker/'
driver.get(url)

try:
    # Wait for the table to be present
    table_xpath = '/html/body/div[6]/div[2]/div/div[3]/div/div/div/div[2]/b/div[2]/table'
    table = WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.XPATH, table_xpath))
    )

    # Get all immediate child elements of the table (both thead and tbody)
    table_children = table.find_elements(By.XPATH, './*')

    # Initialize an empty list to hold all rows of data
    data = []

    # Initialize current column names as empty
    column_names = []

    # Iterate over each child element of the table
    for child in table_children:
        if child.tag_name.lower() == 'thead':
            # Extract column names from the header row
            header_cells = child.find_elements(By.XPATH, './tr/th')
            column_names = [cell.text.strip() for cell in header_cells]
            # Handle empty header names
            column_names = [name if name else f"Column{index+1}" for index, name in enumerate(column_names)]
        elif child.tag_name.lower() == 'tbody':
            # Use the current column names to extract data
            rows = child.find_elements(By.TAG_NAME, "tr")
            for row in rows:
                cells = row.find_elements(By.TAG_NAME, "td")
                cell_data = [cell.text.strip() for cell in cells]
                # Only add row if there is data
                if cell_data:
                    # Match the number of columns in data with column names
                    if len(cell_data) != len(column_names):
                        # Adjust cell_data or column_names if necessary
                        max_length = max(len(cell_data), len(column_names))
                        cell_data.extend([None] * (max_length - len(cell_data)))
                        column_names.extend([f"ExtraColumn{index+1}" for index in range(len(column_names), max_length)])
                    # Create a dictionary using column names as keys
                    row_data = {column_names[index]: value for index, value in enumerate(cell_data)}
                    data.append(row_data)
        else:
            # Other types of elements, skip or handle if needed
            pass

except Exception as e:
    print(f"An error occurred: {e}")
finally:
    # Close the WebDriver
    driver.quit()

# Generate a timestamp
current_time = datetime.datetime.now()
# timestamp = current_time.strftime("%Y%m%d_%H%M%S")
# timestamp = current_time.strftime("%Y%m%d_%H%M")
timestamp = current_time.strftime("%Y%m%d_%H%M")

# Write the data to a JSON file with timestamp in the filename
filename = f'data/nfl_odds_vsin_{timestamp}.json'
with open(filename, 'w', encoding='utf-8') as json_file:
    json.dump(data, json_file, ensure_ascii=False, indent=4)

print(f"Data has been saved to {filename}")

In [30]:
# Detect odds movement

# Function to dynamically load JSON files based on timestamps
def load_files(directory):
    # Use regex to extract datetime from filenames (assuming filenames follow the same pattern)
    files = [f for f in os.listdir(directory) if re.match(r'nfl_odds_vsin_\d{8}_\d{4}\.json', f)]
    # Sort files by datetime in the filename
    files.sort(key=lambda x: re.findall(r'(\d{8}_\d{4})', x)[0])
    return files

# Function to format odds (to deal with multiline strings for favorite/underdog odds)
def format_odds(odds):
    return odds.replace("\n", " | ")

# Function to compare odds between two datasets and include the date in the output
# Function to compare odds between two datasets and include the date in the output
def detect_odds_movement(odds_before, odds_after):
    movements = []
    
    # Loop through games in odds_before
    for game_before, game_after in zip(odds_before, odds_after):
        if game_before['Time'] == game_after['Time']:  # Match games by time
            
            # Extract the date from the second column (column name)
            game_date_column_name = list(game_before.keys())[1]  # The actual column name for the date
            
            # Compare odds from different sportsbooks
            for key in game_before:
                if key not in ["Time", game_date_column_name] and key in game_after:
                    if game_before[key] != game_after[key]:  # Check if odds have changed
                        movements.append({
                            'game_time': game_before['Time'],
                            'game_date_column_name': game_date_column_name,  # Save the actual column name
                            'game_date_value': game_before[game_date_column_name],  # Save the value of the date column
                            'sportsbook': key,
                            'odds_before': format_odds(game_before[key]),
                            'odds_after': format_odds(game_after[key])
                        })
    return movements

# Directory containing the odds files
directory = 'data/'

# Load and sort files
files = load_files(directory)

# Loop through consecutive files and compare odds
for i in range(len(files) - 1):
    file1 = files[i]
    file2 = files[i + 1]
    
    with open(os.path.join(directory, file1)) as f1, open(os.path.join(directory, file2)) as f2:
        odds_before = json.load(f1)
        odds_after = json.load(f2)
    
    # Detect movements between consecutive files
    odds_movements = detect_odds_movement(odds_before, odds_after)
    
    # Output movements
    if odds_movements:
        print(f"\nODDS MOVEMENT DETECTED {file1} and {file2}:\n")
        for movement in odds_movements:
            # print(f"Game Time: {movement['game_time']}")
            # game_date_value = game_date_value.strip(' ')
            print(f"Game Date: {movement['game_date_column_name']}")
            print(f"Matchup: {movement['game_date_value'].replace('\n', ' vs').strip()}")
            print(f"Sportsbook: {movement['sportsbook']}")
            print(f"Odds before: {movement['odds_before']}")
            print(f"Odds after: {movement['odds_after']}")
            print("")
    else:
        print(f"No odds movement detected between {file1} and {file2}.")


ODDS MOVEMENT DETECTED nfl_odds_vsin_20240926_1456.json and nfl_odds_vsin_20240926_1458.json:

Game Date: Thu,September 26th
Matchup: Dallas Cowboys vs  New York Giants
Sportsbook: DK
Odds before: -5 -108 | +5 -112
Odds after: -5 -110 | +5 -110

No odds movement detected between nfl_odds_vsin_20240926_1458.json and nfl_odds_vsin_20240926_1500.json.

ODDS MOVEMENT DETECTED nfl_odds_vsin_20240926_1500.json and nfl_odds_vsin_20240926_1502.json:

Game Date: Sun,September 29th
Matchup: Cincinnati Bengals vs  Carolina Panthers
Sportsbook: DK
Odds before: -4.5 -110 | +4.5 -110
Odds after: -4.5 -112 | +4.5 -108


ODDS MOVEMENT DETECTED nfl_odds_vsin_20240926_1502.json and nfl_odds_vsin_20240926_1504.json:

Game Date: Thu,September 26th
Matchup: Dallas Cowboys vs  New York Giants
Sportsbook: GLD Nugget
Odds before: -5.5 -110 | +5.5 -110
Odds after: -5 -110 | +5 -110

Game Date: Sun,September 29th
Matchup: Cincinnati Bengals vs  Carolina Panthers
Sportsbook: DK
Odds before: -4.5 -112 | +4.5 -10

In [32]:
# Print all games in 1 file

# Load the JSON file from the data directory
with open('data/nfl_odds_vsin_20240926_1456.json') as f:
    games = json.load(f)

# Function to print detailed game information in one line for each sportsbook
def print_game_info(game):
    # Extract the second column name (day and matchup)
    day_and_matchup = list(game.keys())[1]  # Extract the second column name (which contains the date)
    teams = game[day_and_matchup].replace('\n', ' ').replace('Splits', '').strip()  # Remove "Splits" from the teams
    game_time = game['Time'].replace('Splits', '').replace('\n', '').strip()
    print(f"Game Day: {day_and_matchup} | Game Time: {game_time}")
    print(f"Matchup: {teams}\n")
    
    for sportsbook, line in game.items():
        if sportsbook not in ["Time", day_and_matchup]:  # Exclude time and matchup keys
            # Combine the sportsbook and its line into one line
            print(f"\033[1m{sportsbook}:\033[0m {line.replace('\n', ' ')}")
    print('-' * 50)

for game in games[:]:
    print_game_info(game)

# # Print the first 3 games
# print("First 3 Games:")
# for game in games[:3]:
#     print_game_info(game)

# # Print the last 3 games
# print("\nLast 3 Games:")
# for game in games[-3:]:
#     print_game_info(game)

Game Day: Thu,September 26th | Game Time: 8:15 PM
Matchup: Dallas Cowboys   New York Giants

[1mDK Open:[0m -7 -110 +7 -110
[1mDK:[0m -5 -108 +5 -112
[1mCirca:[0m -5 -110 +5 -110
[1mSouth Point:[0m -5.5 -110 +5.5 -110
[1mGLD Nugget:[0m -5.5 -110 +5.5 -110
[1mWestgate:[0m -5 -105 +5 -105
[1mWynn:[0m -5.5 -110 +5.5 -110
[1mStations:[0m -5.5 -110 +5.5 -110
[1mCaesars:[0m - -
[1mBetMGM:[0m -5.5 -110 +5.5 -110
--------------------------------------------------
Game Day: Sun,September 29th | Game Time: 1:00 PM
Matchup: New Orleans Saints   Atlanta Falcons

[1mDK Open:[0m +1 -115 -1 -105
[1mDK:[0m +2.5 -110 -2.5 -110
[1mCirca:[0m +2.5 -110 -2.5 -110
[1mSouth Point:[0m +1.5 -110 -1.5 -110
[1mGLD Nugget:[0m +2.5 -110 -2.5 -110
[1mWestgate:[0m +2.5 -108 -2.5 -108
[1mWynn:[0m +2.5 -110 -2.5 -110
[1mStations:[0m +2.5 -110 -2.5 -110
[1mCaesars:[0m - -
[1mBetMGM:[0m +2.5 -110 -2.5 -110
--------------------------------------------------
Game Day: Sun,September 