In [None]:
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import json
import datetime
import csv

In [None]:
### NFL ###

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run Chrome in headless mode

# Initialize the Chrome WebDriver with the options
driver = webdriver.Chrome(options=chrome_options)

# Navigate to the URL
url = 'https://data.vsin.com/nfl/vegas-odds-linetracker/'
driver.get(url)

try:
    # Wait for the table to be present
    table_xpath = '/html/body/div[6]/div[2]/div/div[3]/div/div/div/div[2]/b/div[2]/table'
    table = WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.XPATH, table_xpath))
    )

    # Get all immediate child elements of the table (both thead and tbody)
    table_children = table.find_elements(By.XPATH, './*')

    # Initialize an empty list to hold all rows of data
    data = []

    # Initialize current column names as empty
    column_names = []

    # Iterate over each child element of the table
    for child in table_children:
        if child.tag_name.lower() == 'thead':
            # Extract column names from the header row
            header_cells = child.find_elements(By.XPATH, './tr/th')
            column_names = [cell.text.strip() for cell in header_cells]
            # Handle empty header names
            column_names = [name if name else f"Column{index+1}" for index, name in enumerate(column_names)]
        elif child.tag_name.lower() == 'tbody':
            # Use the current column names to extract data
            rows = child.find_elements(By.TAG_NAME, "tr")
            for row in rows:
                cells = row.find_elements(By.TAG_NAME, "td")
                cell_data = [cell.text.strip() for cell in cells]
                # Only add row if there is data
                if cell_data:
                    # Match the number of columns in data with column names
                    if len(cell_data) != len(column_names):
                        # Adjust cell_data or column_names if necessary
                        max_length = max(len(cell_data), len(column_names))
                        cell_data.extend([None] * (max_length - len(cell_data)))
                        column_names.extend([f"ExtraColumn{index+1}" for index in range(len(column_names), max_length)])
                    # Create a dictionary using column names as keys
                    row_data = {column_names[index]: value for index, value in enumerate(cell_data)}
                    data.append(row_data)
        else:
            # Other types of elements, skip or handle if needed
            pass

except Exception as e:
    print(f"An error occurred: {e}")
finally:
    # Close the WebDriver
    driver.quit()

# Generate a timestamp
current_time = datetime.datetime.now()
# timestamp = current_time.strftime("%Y%m%d_%H%M%S")
# timestamp = current_time.strftime("%Y%m%d_%H%M")
timestamp = current_time.strftime("%Y%m%d_%H%M")

# Write the data to a JSON file with timestamp in the filename
filename = f'data/nfl_odds_vsin_{timestamp}.json'
with open(filename, 'w', encoding='utf-8') as json_file:
    json.dump(data, json_file, ensure_ascii=False, indent=4)

print(f"Data has been saved to {filename}")

In [None]:
import json

# Load the JSON file from the data directory
with open('data/nfl_odds_vsin_20240926_1357.json') as f:
    games = json.load(f)

# Function to print detailed game information in one line for each sportsbook
def print_game_info(game):
    # Extract the second column name (day and matchup)
    day_and_matchup = list(game.keys())[1]  # Extract the second column name (which contains the date)
    teams = game[day_and_matchup].replace('\n', ' ').replace('Splits', '').strip()  # Remove "Splits" from the teams
    game_time = game['Time'].replace('Splits', '').replace('\n', '').strip()
    print(f"Game Day: {day_and_matchup} | Game Time: {game_time}")
    print(f"Matchup: {teams}\n")
    
    for sportsbook, line in game.items():
        if sportsbook not in ["Time", day_and_matchup]:  # Exclude time and matchup keys
            # Combine the sportsbook and its line into one line
            print(f"\033[1m{sportsbook}:\033[0m {line.replace('\n', ' ')}")
    print('-' * 50)

for game in games[:]:
    print_game_info(game)

# # Print the first 3 games
# print("First 3 Games:")
# for game in games[:3]:
#     print_game_info(game)

# # Print the last 3 games
# print("\nLast 3 Games:")
# for game in games[-3:]:
#     print_game_info(game)

In [None]:
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import json
import datetime

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run Chrome in headless mode

# Initialize the Chrome WebDriver with the options
driver = webdriver.Chrome(options=chrome_options)

# Navigate to the URL
# url = 'https://data.vsin.com/vegas-odds-linetracker/'
url = 'https://data.vsin.com/nfl/vegas-odds-linetracker/'
driver.get(url)

# Wait for the page to load
time.sleep(5)

# Locate the table body using XPath
tbody_xpath = '/html/body/div[6]/div[2]/div/div[3]/div/div/div/div[2]/b/div[2]/table/tbody[1]'
tbody_xpath = '/html/body/div[6]/div[2]/div/div[3]/div/div/div/div[2]/b/div[2]/table/tbody[2]'
tbody_xpath = '/html/body/div[6]/div[2]/div/div[3]/div/div/div/div[2]/b/div[2]/table/tbody[3]'
tbody_xpath = '/html/body/div[6]/div[2]/div/div[3]/div/div/div/div[2]/b/div[2]/table/tbody[4]'
tbody_xpath = '/html/body/div[6]/div[2]/div/div[3]/div/div/div/div[2]/b/div[2]/table/tbody[5]'
tbody = driver.find_element(By.XPATH, tbody_xpath)

# Initialize an empty list to hold all rows of data
data = []

# Extract data from each row
rows = tbody.find_elements(By.TAG_NAME, "tr")
for row in rows:
    cells = row.find_elements(By.TAG_NAME, "td")
    cell_data = [cell.text for cell in cells]
    row_data = {'Column{}'.format(index+1): value for index, value in enumerate(cell_data)}
    data.append(row_data)

# Close the WebDriver
driver.quit()

# Generate a timestamp
current_time = datetime.datetime.now()
timestamp = current_time.strftime("%Y%m%d_%H%M%S")

# Write the data to a JSON file with timestamp in the filename
filename = f'output_data_{timestamp}.json'
with open(filename, 'w', encoding='utf-8') as json_file:
    json.dump(data, json_file, ensure_ascii=False, indent=4)

In [None]:
/html/body/div[6]/div[2]/div/div[3]/div/div/div/div[2]/b/div[2]
/html/body/div[6]/div[2]/div/div[3]/div/div/div/div[2]/b/div[2]/table/tbody[1]
/html/body/div[6]/div[2]/div/div[3]/div/div/div/div[2]/b/div[2]/table/tbody[4]

In [None]:
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import json
import datetime

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run Chrome in headless mode

# Initialize the Chrome WebDriver with the options
driver = webdriver.Chrome(options=chrome_options)

# Navigate to the URL
url = 'https://data.vsin.com/nfl/vegas-odds-linetracker/'
driver.get(url)

# Wait for the page to load
time.sleep(5)

# Initialize an empty list to hold all rows of data
data = []

# Locate all tbody elements using XPath
tbody_list = driver.find_elements(By.XPATH, '/html/body/div[6]/div[2]/div/div[3]/div/div/div/div[2]/b/div[2]/table/tbody')

# Iterate over each tbody element
for tbody in tbody_list:
    # Extract data from each row in the current tbody
    rows = tbody.find_elements(By.TAG_NAME, "tr")
    for row in rows:
        cells = row.find_elements(By.TAG_NAME, "td")
        cell_data = [cell.text for cell in cells]
        row_data = {'Column{}'.format(index+1): value for index, value in enumerate(cell_data)}
        data.append(row_data)

# Close the WebDriver
driver.quit()

# Generate a timestamp
current_time = datetime.datetime.now()
timestamp = current_time.strftime("%Y%m%d_%H%M%S")

# Write the data to a JSON file with timestamp in the filename
filename = f'output_data_{timestamp}.json'
with open(filename, 'w', encoding='utf-8') as json_file:
    json.dump(data, json_file, ensure_ascii=False, indent=4)


In [None]:
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import json
import datetime

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run Chrome in headless mode

# Initialize the Chrome WebDriver with the options
driver = webdriver.Chrome(options=chrome_options)

# Navigate to the URL
url = 'https://data.vsin.com/nfl/vegas-odds-linetracker/'
driver.get(url)

# Wait for the table to be present
table_xpath = '/html/body/div[6]/div[2]/div/div[3]/div/div/div/div[2]/b/div[2]/table'
table = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.XPATH, table_xpath))
)

# Extract column names from the header row
header_xpath = table_xpath + '/thead[1]/tr'
header_row = driver.find_element(By.XPATH, header_xpath)
header_cells = header_row.find_elements(By.TAG_NAME, 'th')
column_names = [cell.text.strip() for cell in header_cells]

# Handle cases where headers are empty or have special characters
column_names = [name if name else f"Column{index+1}" for index, name in enumerate(column_names)]

# Find all tbody elements within the table
tbodies = table.find_elements(By.TAG_NAME, 'tbody')

# Initialize an empty list to hold all rows of data
data = []

# Iterate over each tbody and extract data
for tbody in tbodies:
    rows = tbody.find_elements(By.TAG_NAME, "tr")
    for row in rows:
        cells = row.find_elements(By.TAG_NAME, "td")
        cell_data = [cell.text.strip() for cell in cells]
        # Only add row if there is data
        if cell_data:
            # Match the number of columns in data with column names
            if len(cell_data) != len(column_names):
                # Adjust cell_data or column_names if necessary
                # Here, we'll pad the shorter list with None
                max_length = max(len(cell_data), len(column_names))
                cell_data.extend([None] * (max_length - len(cell_data)))
                column_names.extend([f"ExtraColumn{index+1}" for index in range(len(column_names), max_length)])
            # Create a dictionary using column names as keys
            row_data = {column_names[index]: value for index, value in enumerate(cell_data)}
            data.append(row_data)

# Close the WebDriver
driver.quit()

# Generate a timestamp
current_time = datetime.datetime.now()
timestamp = current_time.strftime("%Y%m%d_%H%M%S")

# Write the data to a JSON file with timestamp in the filename
filename = f'output_data_{timestamp}.json'
with open(filename, 'w', encoding='utf-8') as json_file:
    json.dump(data, json_file, ensure_ascii=False, indent=4)

print(f"Data has been saved to {filename}")


In [None]:
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import json
import datetime

# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run Chrome in headless mode

# Initialize the Chrome WebDriver with the options
driver = webdriver.Chrome(options=chrome_options)

# Navigate to the URL
url = 'https://data.vsin.com/nfl/vegas-odds-linetracker/'
driver.get(url)

try:
    # Wait for the table to be present
    table_xpath = '/html/body/div[6]/div[2]/div/div[3]/div/div/div/div[2]/b/div[2]/table'
    table = WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.XPATH, table_xpath))
    )

    # Get all immediate child elements of the table (both thead and tbody)
    table_children = table.find_elements(By.XPATH, './*')

    # Initialize an empty list to hold all rows of data
    data = []

    # Initialize current column names as empty
    column_names = []

    # Iterate over each child element of the table
    for child in table_children:
        if child.tag_name.lower() == 'thead':
            # Extract column names from the header row
            header_cells = child.find_elements(By.XPATH, './tr/th')
            column_names = [cell.text.strip() for cell in header_cells]
            # Handle empty header names
            column_names = [name if name else f"Column{index+1}" for index, name in enumerate(column_names)]
        elif child.tag_name.lower() == 'tbody':
            # Use the current column names to extract data
            rows = child.find_elements(By.TAG_NAME, "tr")
            for row in rows:
                cells = row.find_elements(By.TAG_NAME, "td")
                cell_data = [cell.text.strip() for cell in cells]
                # Only add row if there is data
                if cell_data:
                    # Match the number of columns in data with column names
                    if len(cell_data) != len(column_names):
                        # Adjust cell_data or column_names if necessary
                        max_length = max(len(cell_data), len(column_names))
                        cell_data.extend([None] * (max_length - len(cell_data)))
                        column_names.extend([f"ExtraColumn{index+1}" for index in range(len(column_names), max_length)])
                    # Create a dictionary using column names as keys
                    row_data = {column_names[index]: value for index, value in enumerate(cell_data)}
                    data.append(row_data)
        else:
            # Other types of elements, skip or handle if needed
            pass

except Exception as e:
    print(f"An error occurred: {e}")
finally:
    # Close the WebDriver
    driver.quit()

# Generate a timestamp
current_time = datetime.datetime.now()
# timestamp = current_time.strftime("%Y%m%d_%H%M%S")
# timestamp = current_time.strftime("%Y%m%d_%H%M")
timestamp = current_time.strftime("%Y%m%d_%H%M")

# Write the data to a JSON file with timestamp in the filename
filename = f'data/nfl_odds_vsin_{timestamp}.json'
with open(filename, 'w', encoding='utf-8') as json_file:
    json.dump(data, json_file, ensure_ascii=False, indent=4)

print(f"Data has been saved to {filename}")

In [None]:
!mkdir data