<a href="https://colab.research.google.com/github/cpsanzone/cross-sport-market-efficiency-analyzer/blob/main/Market_Efficiency_Final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
## Data Acquisition & ETL Pipeline (NFL Only)
# Engineering Note: Custom ETL pipeline to securely ingest data via Kaggle API.

!pip install -q kaggle
import os
import pandas as pd
from google.colab import userdata

# 1. Secure Auth
try:
    os.environ['KAGGLE_USERNAME'] = userdata.get('KAGGLE_USERNAME')
    os.environ['KAGGLE_KEY'] = userdata.get('KAGGLE_KEY')
    print("‚úÖ Credentials loaded.")
except:
    print("‚ö†Ô∏è Set Secrets in sidebar.")

# 2. Download NFL Data Only
print("‚è≥ Downloading NFL Data...")
!kaggle datasets download -d tobycrabtree/nfl-scores-and-betting-data --force
!unzip -o -q nfl-scores-and-betting-data.zip

# 3. Load DataFrame
try:
    nfl_df = pd.read_csv('spreadspoke_scores.csv')
    print(f"‚úÖ NFL Data Loaded Successfully: {len(nfl_df):,} rows")
except Exception as e:
    print(f"‚ùå Error Loading File: {e}")

‚úÖ Credentials loaded.
‚è≥ Downloading NFL Data...
Dataset URL: https://www.kaggle.com/datasets/tobycrabtree/nfl-scores-and-betting-data
License(s): CC-BY-NC-SA-4.0
Downloading nfl-scores-and-betting-data.zip to /content
  0% 0.00/251k [00:00<?, ?B/s]
100% 251k/251k [00:00<00:00, 489MB/s]
‚úÖ NFL Data Loaded Successfully: 14,358 rows


In [2]:
# --- BLOCK 1: CORE NFL ANALYSIS ---
import pandas as pd
import numpy as np

print("‚öôÔ∏è Processing NFL Data...")

# 1. Setup & Preprocessing
if 'nfl_df' not in globals():
    raise ValueError("‚ö†Ô∏è Data not found! Please run Cell 1 first.")

# Filter for Modern Era (2015+)
nfl_df['schedule_date'] = pd.to_datetime(nfl_df['schedule_date'])
nfl_df = nfl_df[nfl_df['schedule_date'].dt.year >= 2015].copy()

# Ensure numeric types
nfl_df['over_under_line'] = pd.to_numeric(nfl_df['over_under_line'], errors='coerce')
nfl_df['spread_favorite'] = pd.to_numeric(nfl_df['spread_favorite'], errors='coerce')
nfl_df.dropna(subset=['score_home', 'score_away', 'over_under_line', 'spread_favorite'], inplace=True)

# Calculate Outcomes
nfl_df['total_score'] = nfl_df['score_home'] + nfl_df['score_away']
nfl_df['over_hit'] = nfl_df['total_score'] > nfl_df['over_under_line']
nfl_df['spread_abs'] = nfl_df['spread_favorite'].abs()

# 2. Granular Segmentation Functions
def get_total_bucket(line):
    if line < 40: return '<40'
    elif 40 <= line < 45: return '40-45'
    elif 45 <= line < 50: return '45-50'
    elif 50 <= line < 55: return '50-55'
    return '>55'

def get_spread_bucket(spread):
    if spread < 1.5: return '0-1.5'
    elif 1.5 <= spread < 3.5: return '1.5-3.5'
    elif 3.5 <= spread < 5.5: return '3.5-5.5'
    elif 5.5 <= spread < 7.5: return '5.5-7.5'
    return '7.5+'

# Apply Buckets
nfl_df['total_bucket'] = nfl_df['over_under_line'].apply(get_total_bucket)
nfl_df['spread_bucket'] = nfl_df['spread_abs'].apply(get_spread_bucket)

# 3. ROI Calculation
print("üîç Scanning for NFL Market Inefficiencies...")

# Group by segments
analysis = nfl_df.groupby(['total_bucket', 'spread_bucket']).agg(
    games=('schedule_date', 'count'),
    over_wins=('over_hit', 'sum')
).reset_index()

# Calculate Metrics (Assuming standard -110 juice = 1.91 implied multiplier)
analysis['win_rate'] = (analysis['over_wins'] / analysis['games']) * 100
analysis['implied_roi'] = ((analysis['win_rate'] / 100 * 1.909) - 1) * 100

# Filter for "Gold" (ROI > 10% and Sample Size > 20 games)
high_roi_opportunities = analysis[
    (analysis['implied_roi'] > 10) &
    (analysis['games'] >= 20)
].sort_values(by='implied_roi', ascending=False)

print("\n‚úÖ NFL Analysis Complete. Top Opportunities:")
display(high_roi_opportunities)

‚öôÔ∏è Processing NFL Data...
üîç Scanning for NFL Market Inefficiencies...

‚úÖ NFL Analysis Complete. Top Opportunities:


Unnamed: 0,total_bucket,spread_bucket,games,over_wins,win_rate,implied_roi
17,<40,3.5-5.5,62,36,58.064516,10.845161


In [3]:
import pandas as pd
import numpy as np

# --- 1. & 2. Categorization Functions for NFL Total Line and Spread ---
low_total_threshold = 40
medium_total_threshold = 50

def categorize_granular_total_line(over_under_line):
    """
    Categorizes the NFL over/under line into granular bins.
    """
    if over_under_line < low_total_threshold:
        return '<40'
    elif 40 <= over_under_line < 45:
        return '40-45'
    elif 45 <= over_under_line < medium_total_threshold:
        return '45-50'
    elif medium_total_threshold <= over_under_line < 55:
        return '50-55'
    else: # over_under_line >= 55
        return '>55'

spread_thresholds_granular = {
    '0-1.5': (0, 1.5),
    '1.5-3.5': (1.5, 3.5),
    '3.5-5.5': (3.5, 5.5),
    '5.5-7.5': (5.5, 7.5),
    '7.5+': (7.5, np.inf)
}

def categorize_granular_spread(spread_value, thresholds):
    """
    Categorizes the NFL point spread into granular bins based on its absolute value.
    """
    abs_spread = abs(spread_value)
    for category, (lower, upper) in thresholds.items():
        if lower <= abs_spread < upper:
            return category
    return np.nan

# --- 3. AMERICAN_ODDS_PAYOUTS dictionary ---
AMERICAN_ODDS_PAYOUTS = {
    1: 1000, # +1000 odds
    2: 600,  # +600 odds
    3: 350,  # +350 odds
    4: 200,  # +200 odds
    5: 125,  # +125 odds
    6: -110, # -110 odds
    7: -130,
    8: -150,
    9: -170,
    10: -190,
    11: -200,
    12: -210
}

# --- 4. convert_american_odds_to_profit_factor function ---
def convert_american_odds_to_profit_factor(odds_value):
    """
    Converts American odds to a profit factor (amount won per $1 bet).
    e.g., +100 means win $100 on a $100 bet (profit factor 1.0)
    e.g., -110 means risk $110 to win $100 (profit factor 100/110 ~ 0.91)
    """
    if odds_value >= 0:
        return odds_value / 100.0
    else:
        return 100.0 / abs(odds_value)

# --- 5. LOSS_PAYOUT_FACTOR constant ---
LOSS_PAYOUT_FACTOR = -1.0

# --- 6. Data for final_high_roi_combinations_df ---
# This data represents the high-ROI granular combinations identified during analysis.
# It is extracted from the 'final_high_roi_combinations' DataFrame from the kernel state.
final_high_roi_combinations_data = [
    {'granular_total_line_category': '40-45', 'granular_spread_category': '7.5+', 'optimal_parlay_span_roi': 2, 'max_profit_loss_roi': 72.0, 'total_games_in_combo': 75, 'max_roi': 96.00, 'win_rate_at_optimal_span': 28.00, 'percentage_of_total_games': 9.26},
    {'granular_total_line_category': '40-45', 'granular_spread_category': '1.5-3.5', 'optimal_parlay_span_roi': 2, 'max_profit_loss_roi': 31.0, 'total_games_in_combo': 116, 'max_roi': 26.72, 'win_rate_at_optimal_span': 18.10, 'percentage_of_total_games': 14.32},
    {'granular_total_line_category': '45-50', 'granular_spread_category': '5.5-7.5', 'optimal_parlay_span_roi': 1, 'max_profit_loss_roi': 12.0, 'total_games_in_combo': 54, 'max_roi': 22.22, 'win_rate_at_optimal_span': 11.11, 'percentage_of_total_games': 6.67},
    {'granular_total_line_category': '45-50', 'granular_spread_category': '3.5-5.5', 'optimal_parlay_span_roi': 3, 'max_profit_loss_roi': 11.5, 'total_games_in_combo': 56, 'max_roi': 20.54, 'win_rate_at_optimal_span': 26.79, 'percentage_of_total_games': 6.91},
    {'granular_total_line_category': '40-45', 'granular_spread_category': '5.5-7.5', 'optimal_parlay_span_roi': 2, 'max_profit_loss_roi': 11.0, 'total_games_in_combo': 80, 'max_roi': 13.75, 'win_rate_at_optimal_span': 16.25, 'percentage_of_total_games': 9.88},
    {'granular_total_line_category': '<40', 'granular_spread_category': '1.5-3.5', 'optimal_parlay_span_roi': 3, 'max_profit_loss_roi': 7.5, 'total_games_in_combo': 51, 'max_roi': 14.71, 'win_rate_at_optimal_span': 25.49, 'percentage_of_total_games': 6.30}
]

# Convert to DataFrame for easier lookup
final_high_roi_combinations_df = pd.DataFrame(final_high_roi_combinations_data)


# --- 7. recommend_bet function ---
def recommend_bet(over_under_line, spread_favorite):
    """
    Recommends an NFL total score betting strategy based on granular categories
    and optimal parlay spans identified in historical analysis.

    Args:
        over_under_line (float): The game's over/under line.
        spread_favorite (float): The game's point spread for the favorite team.

    Returns:
        dict: A dictionary containing betting recommendation details, or a message indicating no recommendation.
    """
    # Categorize current game
    current_total_line_cat = categorize_granular_total_line(over_under_line)
    current_spread_cat = categorize_granular_spread(spread_favorite, spread_thresholds_granular)

    if pd.isna(current_total_line_cat) or pd.isna(current_spread_cat):
        return {
            "recommendation": "No recommendation",
            "reason": "Could not categorize game due to missing or invalid inputs."
        }

    # Find matching combination in the high-ROI DataFrame
    match = final_high_roi_combinations_df[
        (final_high_roi_combinations_df['granular_total_line_category'] == current_total_line_cat) &
        (final_high_roi_combinations_df['granular_spread_category'] == current_spread_cat)
    ]

    if not match.empty:
        # Get the top recommendation for this combination (e.g., highest ROI if multiple spans were tested)
        best_match = match.sort_values(by='max_roi', ascending=False).iloc[0]

        optimal_span = int(best_match['optimal_parlay_span_roi'])
        profit_factor_won = convert_american_odds_to_profit_factor(AMERICAN_ODDS_PAYOUTS.get(optimal_span, -100)) # Default to -100 odds if not found

        return {
            "recommendation": "Place a bet",
            "details": (
                f"Bet on total score to be within +/- {optimal_span} points of the over/under line. "
                f"This combination has historically yielded an ROI of {best_match['max_roi']:.2f}% "
                f"(over {int(best_match['total_games_in_combo'])} games) "
                f"with a historical win rate of {best_match['win_rate_at_optimal_span']:.2f}% "
                f"and a profit factor of {profit_factor_won:.2f} for a winning bet."
            ),
            "optimal_parlay_span": optimal_span,
            "historical_roi": f"{best_match['max_roi']:.2f}%",
            "historical_profit_loss": f"${best_match['max_profit_loss_roi']:.2f}",
            "historical_win_rate": f"{best_match['win_rate_at_optimal_span']:.2f}%"
        }
    else:
        return {
            "recommendation": "No recommendation",
            "reason": "This specific combination of total line and spread did not meet high-ROI and frequency criteria."
        }

# --- 8. Interactive Loop for User Input ---
def run_interactive_tool():
    print("\n--- Interactive NFL Betting Recommendation Tool ---")
    print("Enter 'exit' at any prompt to quit.")
    while True:
        try:
            ou_input = input("Enter Over/Under Line (e.g., 47.5): ")
            if ou_input.lower() == 'exit': break
            over_under_line = float(ou_input)

            spread_input = input("Enter Spread Favorite (e.g., -7.5 for favorite by 7.5, 7.5 for underdog by 7.5): ")
            if spread_input.lower() == 'exit': break
            spread_favorite = float(spread_input)

            recommendation = recommend_bet(over_under_line, spread_favorite)
            print("\nRecommendation:")
            for key, value in recommendation.items():
                print(f"  {key}: {value}")
            print("-" * 50)

        except ValueError:
            print("Invalid input. Please enter numeric values for Over/Under Line and Spread Favorite.")
        except Exception as e:
            print(f"An error occurred: {e}")

# 9. Add a call to run_interactive_tool() to start the interactive session
run_interactive_tool()


--- Interactive NFL Betting Recommendation Tool ---
Enter 'exit' at any prompt to quit.
Enter Over/Under Line (e.g., 47.5): 38
Enter Spread Favorite (e.g., -7.5 for favorite by 7.5, 7.5 for underdog by 7.5): 4.5

Recommendation:
  recommendation: No recommendation
  reason: This specific combination of total line and spread did not meet high-ROI and frequency criteria.
--------------------------------------------------
Enter Over/Under Line (e.g., 47.5): 39.5
Enter Spread Favorite (e.g., -7.5 for favorite by 7.5, 7.5 for underdog by 7.5): 1.5

Recommendation:
  recommendation: Place a bet
  details: Bet on total score to be within +/- 3 points of the over/under line. This combination has historically yielded an ROI of 14.71% (over 51 games) with a historical win rate of 25.49% and a profit factor of 3.50 for a winning bet.
  optimal_parlay_span: 3
  historical_roi: 14.71%
  historical_profit_loss: $7.50
  historical_win_rate: 25.49%
--------------------------------------------------


KeyboardInterrupt: Interrupted by user