In [1]:
import pandas as pd
import mysql.connector
import warnings

warnings.filterwarnings('ignore')

# Connect to database
conn = mysql.connector.connect(
    host="127.0.0.1",
    port="3307",
    user="root",
    database="odds_tracker"
)

# Modified query to include line posting times
query = """
WITH WeekBefore AS (
    SELECT 
        s.id as spread_id,
        s.game_id,
        s.spread,
        c.name as casino,
        s.recorded_at,
        g.commence_time,
        ABS(TIMESTAMPDIFF(HOUR, g.commence_time, s.recorded_at) - 168) as hours_from_target,
        ROW_NUMBER() OVER (PARTITION BY s.game_id, c.name 
                          ORDER BY ABS(TIMESTAMPDIFF(HOUR, g.commence_time, s.recorded_at) - 168)) as rn
    FROM spreads s
    JOIN casinos c ON s.casino_id = c.id
    JOIN games g ON s.game_id = g.id
    JOIN spread_results sr ON s.id = sr.spread_id
    WHERE c.name IN ('fanduel', 'pinnacle')
)
SELECT 
    g.commence_time,
    ht.name as home_team,
    at.name as away_team,
    MAX(CASE WHEN wb.casino = 'pinnacle' THEN wb.spread END) as pinnacle_spread,
    MAX(CASE WHEN wb.casino = 'pinnacle' THEN wb.recorded_at END) as pinnacle_time,
    MAX(CASE WHEN wb.casino = 'fanduel' THEN wb.spread END) as fanduel_spread,
    MAX(CASE WHEN wb.casino = 'fanduel' THEN wb.recorded_at END) as fanduel_time,
    sc.home_score,
    sc.away_score
FROM WeekBefore wb
JOIN spread_results sr ON wb.spread_id = sr.spread_id
JOIN games g ON wb.game_id = g.id
JOIN teams ht ON g.home_team_id = ht.id
JOIN teams at ON g.away_team_id = at.id
JOIN scores sc ON sr.score_id = sc.id
JOIN sports sp ON g.sport_id = sp.id
WHERE wb.rn = 1
AND sp.key = 'americanfootball_nfl'
GROUP BY g.id, g.commence_time, ht.name, at.name, sc.home_score, sc.away_score
HAVING pinnacle_spread IS NOT NULL AND fanduel_spread IS NOT NULL
ORDER BY g.commence_time;
"""

# Load data into pandas
df = pd.read_sql(query, conn)

# Calculate actual margin and errors
df['actual_margin'] = df['home_score'] - df['away_score']
df['pinnacle_error'] = abs(df['actual_margin'] + df['pinnacle_spread'])
df['fanduel_error'] = abs(df['actual_margin'] + df['fanduel_spread'])

# Determine which casino had better line
df['better_line'] = df.apply(lambda row: 
    'Tie' if row['pinnacle_error'] == row['fanduel_error']
    else 'Pinnacle' if row['pinnacle_error'] < row['fanduel_error']
    else 'FanDuel', axis=1)

# Format the timestamps
df['game_time'] = pd.to_datetime(df['commence_time']).dt.strftime('%Y-%m-%d %H:%M')
df['pinnacle_line_time'] = pd.to_datetime(df['pinnacle_time']).dt.strftime('%Y-%m-%d %H:%M')
df['fanduel_line_time'] = pd.to_datetime(df['fanduel_time']).dt.strftime('%Y-%m-%d %H:%M')

# Format the output
output_df = df[[
    'game_time', 
    'home_team', 
    'away_team',
    'pinnacle_spread',
    'pinnacle_line_time',
    'fanduel_spread',
    'fanduel_line_time',
    'home_score',
    'away_score',
    'better_line'
]]

# Save to CSV
output_df.to_csv('line_comparison.csv', index=False)

# Print summary statistics
print("\nOverall Results:")
print("=" * 50)
results = df['better_line'].value_counts()
percentages = (df['better_line'].value_counts(normalize=True) * 100).round(2)

print("\nWho had the better line:")
for result in results.index:
    print(f"{result}: {results[result]} games ({percentages[result]}%)")

# Calculate average error for each casino
print("\nAverage Absolute Error:")
print(f"Pinnacle: {df['pinnacle_error'].mean():.2f} points")
print(f"FanDuel: {df['fanduel_error'].mean():.2f} points")

# Calculate average time before game
df['pinnacle_hours_before'] = (pd.to_datetime(df['commence_time']) - 
                              pd.to_datetime(df['pinnacle_time'])).dt.total_seconds() / 3600
df['fanduel_hours_before'] = (pd.to_datetime(df['commence_time']) - 
                             pd.to_datetime(df['fanduel_time'])).dt.total_seconds() / 3600

print("\nAverage Hours Before Game:")
print(f"Pinnacle: {df['pinnacle_hours_before'].mean():.1f} hours")
print(f"FanDuel: {df['fanduel_hours_before'].mean():.1f} hours")

# Show first few rows of the CSV
print("\nFirst few rows of the generated CSV:")
print(output_df.head().to_string())

# Close connection
conn.close()


Overall Results:

Who had the better line:
Tie: 457 games (43.98%)
FanDuel: 302 games (29.07%)
Pinnacle: 280 games (26.95%)

Average Absolute Error:
Pinnacle: 9.51 points
FanDuel: 9.05 points

Average Hours Before Game:
Pinnacle: 88.1 hours
FanDuel: 58.0 hours

First few rows of the generated CSV:
          game_time             home_team            away_team  pinnacle_spread pinnacle_line_time  fanduel_spread fanduel_line_time  home_score  away_score better_line
0  2020-09-11 00:20    Kansas City Chiefs       Houston Texans             -9.5   2020-09-10 00:00            -9.5  2020-09-10 00:00          34          20         Tie
1  2020-09-13 17:00   Washington Redskins  Philadelphia Eagles              6.0   2020-07-09 00:00             6.5  2020-07-11 00:00          27          17    Pinnacle
2  2020-09-13 17:00         Buffalo Bills        New York Jets             -6.5   2020-09-12 00:00            -6.5  2020-09-12 00:00          27          17         Tie
3  2020-09-13 17:00     