In [28]:
# Configuration
BASE_DIRECTORY = r"C:\Users\howar\football_research_advanced\2.0\England\Football_Predictions\data"
DATABASE_PATH = os.path.join(BASE_DIRECTORY, "football_data.db")
BASE_URL = "https://v3.football.api-sports.io"
API_KEY = os.getenv("x-rapidapi-key")

In [29]:
# Configuration
BASE_DIRECTORY = r"C:\Users\howar\football_research_advanced\2.0\England\Football_Predictions\data"
DATABASE_PATH = os.path.join(BASE_DIRECTORY, "football_data.db")

import sqlite3
import pandas as pd
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

# Step 1: Fetch Training Data
def get_training_data():
    conn = sqlite3.connect(DATABASE_PATH)
    query = """
    SELECT
        f.fixture_id,
        f.date,
        f.home_team AS home_team_name,
        f.away_team AS away_team_name,
        f.home_goals,
        f.away_goals,
        s1.league_position AS home_league_position,
        s2.league_position AS away_league_position,
        s1.points AS home_points,
        s2.points AS away_points,
        rf_home.rolling_points AS home_rolling_points,
        rf_home.rolling_wins AS home_rolling_wins,
        rf_home.rolling_draws AS home_rolling_draws,
        rf_home.rolling_losses AS home_rolling_losses,
        rf_away.rolling_points AS away_rolling_points,
        rf_away.rolling_wins AS away_rolling_wins,
        rf_away.rolling_draws AS away_rolling_draws,
        rf_away.rolling_losses AS away_rolling_losses,
        fs1.shots_on_goal AS home_shots_on_goal,
        fs2.shots_on_goal AS away_shots_on_goal,
        fs1.shots_off_goal AS home_shots_off_goal,
        fs2.shots_off_goal AS away_shots_off_goal,
        fs1.total_shots AS home_total_shots,
        fs2.total_shots AS away_total_shots,
        fs1.blocked_shots AS home_blocked_shots,
        fs2.blocked_shots AS away_blocked_shots,
        fs1.shots_insidebox AS home_shots_insidebox,
        fs2.shots_insidebox AS away_shots_insidebox,
        fs1.shots_outsidebox AS home_shots_outsidebox,
        fs2.shots_outsidebox AS away_shots_outsidebox,
        fs1.ball_possession AS home_ball_possession,
        fs2.ball_possession AS away_ball_possession,
        fs1.yellow_cards AS home_yellow_cards,
        fs2.yellow_cards AS away_yellow_cards,
        fs1.red_cards AS home_red_cards,
        fs2.red_cards AS away_red_cards,
        fs1.goalkeeper_saves AS home_goalkeeper_saves,
        fs2.goalkeeper_saves AS away_goalkeeper_saves,
        fs1.total_passes AS home_total_passes,
        fs2.total_passes AS away_total_passes,
        fs1.passes_accurate AS home_passes_accurate,
        fs2.passes_accurate AS away_passes_accurate,
        fs1.passes_percentage AS home_passes_percentage,
        fs2.passes_percentage AS away_passes_percentage
    FROM fixtures f
    LEFT JOIN standings s1
        ON f.home_team_id = s1.team_id AND f.league_season = s1.season
    LEFT JOIN standings s2
        ON f.away_team_id = s2.team_id AND f.league_season = s2.season
    LEFT JOIN recent_form rf_home ON f.home_team_id = rf_home.team_id
    LEFT JOIN recent_form rf_away ON f.away_team_id = rf_away.team_id
    LEFT JOIN fixture_statistics fs1
        ON f.fixture_id = fs1.fixture_id AND f.home_team_id = fs1.team_id
    LEFT JOIN fixture_statistics fs2
        ON f.fixture_id = fs2.fixture_id AND f.away_team_id = fs2.team_id
    WHERE f.status = 'Match Finished';
    """
    df = pd.read_sql_query(query, conn)
    conn.close()
    return df

# Step 2: Prepare Features
def prepare_features(df):
    df['result'] = df.apply(
        lambda row: 0 if row['home_goals'] > row['away_goals'] else
        (2 if row['home_goals'] < row['away_goals'] else 1), axis=1
    )

    features = [
        'home_league_position', 'away_league_position', 'home_points', 'away_points',
        'home_rolling_points', 'home_rolling_wins', 'home_rolling_draws', 'home_rolling_losses',
        'away_rolling_points', 'away_rolling_wins', 'away_rolling_draws', 'away_rolling_losses',
        'home_shots_on_goal', 'away_shots_on_goal', 'home_shots_off_goal', 'away_shots_off_goal',
        'home_total_shots', 'away_total_shots', 'home_blocked_shots', 'away_blocked_shots',
        'home_shots_insidebox', 'away_shots_insidebox', 'home_shots_outsidebox', 'away_shots_outsidebox',
        'home_ball_possession', 'away_ball_possession', 'home_yellow_cards', 'away_yellow_cards',
        'home_red_cards', 'away_red_cards', 'home_goalkeeper_saves', 'away_goalkeeper_saves',
        'home_total_passes', 'away_total_passes', 'home_passes_accurate', 'away_passes_accurate',
        'home_passes_percentage', 'away_passes_percentage'
    ]

    X = df[features].fillna(0)
    y = df['result']
    return X, y, features

# Step 3: Train Models
def train_models(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    models = {
        "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'),
        "LightGBM": LGBMClassifier(),
        "CatBoost": CatBoostClassifier(verbose=0)
    }

    for model_name, model in models.items():
        print(f"Training {model_name}...")
        model.fit(X_train, y_train)

    return models, X.columns.tolist()

# Step 4: Predict Upcoming Fixtures and Save to Database
def predict_upcoming_fixtures(models, features):
    today = datetime.now()
    next_week = today + timedelta(days=7)

    conn = sqlite3.connect(DATABASE_PATH)
    query = f"""
    SELECT
        f.fixture_id,
        f.date,
        f.home_team AS home_team_name,
        f.away_team AS away_team_name,
        s1.league_position AS home_league_position,
        s2.league_position AS away_league_position,
        s1.points AS home_points,
        s2.points AS away_points,
        rf_home.rolling_points AS home_rolling_points,
        rf_home.rolling_wins AS home_rolling_wins,
        rf_home.rolling_draws AS home_rolling_draws,
        rf_home.rolling_losses AS home_rolling_losses,
        rf_away.rolling_points AS away_rolling_points,
        rf_away.rolling_wins AS away_rolling_wins,
        rf_away.rolling_draws AS away_rolling_draws,
        rf_away.rolling_losses AS away_rolling_losses
    FROM fixtures f
    LEFT JOIN standings s1
        ON f.home_team_id = s1.team_id AND f.league_season = s1.season
    LEFT JOIN standings s2
        ON f.away_team_id = s2.team_id AND f.league_season = s2.season
    LEFT JOIN recent_form rf_home ON f.home_team_id = rf_home.team_id
    LEFT JOIN recent_form rf_away ON f.away_team_id = rf_away.team_id
    WHERE f.status = 'Not Started'
      AND f.date BETWEEN '{today.strftime('%Y-%m-%d')}' AND '{next_week.strftime('%Y-%m-%d')}';
    """
    upcoming_fixtures = pd.read_sql_query(query, conn)

    for feature in features:
        if feature not in upcoming_fixtures.columns:
            upcoming_fixtures[feature] = 0  # Fill missing stats with 0

    X_upcoming = upcoming_fixtures[features].fillna(0)

    best_model = models["XGBoost"]
    predictions = best_model.predict(X_upcoming)
    predicted_probabilities = best_model.predict_proba(X_upcoming)

    prediction_labels = {0: 'Home Win', 1: 'Draw', 2: 'Away Win'}
    upcoming_fixtures['predicted_result'] = pd.Series(predictions).map(prediction_labels)
    upcoming_fixtures['home_probability'] = (predicted_probabilities[:, 0] * 100).round(2)
    upcoming_fixtures['draw_probability'] = (predicted_probabilities[:, 1] * 100).round(2)
    upcoming_fixtures['away_probability'] = (predicted_probabilities[:, 2] * 100).round(2)

    # Save predictions to the database
    cursor = conn.cursor()
    cursor.execute('''
        CREATE
        TABLE IF NOT EXISTS predictions (
            fixture_id INTEGER PRIMARY KEY,
            date TEXT,
            home_team_name TEXT,
            away_team_name TEXT,
            predicted_result TEXT,
            home_probability REAL,
            draw_probability REAL,
            away_probability REAL,
            last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        )
    ''')
    conn.commit()

    # Insert or replace predictions
    for _, row in upcoming_fixtures.iterrows():
        cursor.execute('''
            INSERT OR REPLACE INTO predictions (
                fixture_id, date, home_team_name, away_team_name, predicted_result, 
                home_probability, draw_probability, away_probability, last_updated
            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
        ''', (
            row['fixture_id'], row['date'], row['home_team_name'], row['away_team_name'],
            row['predicted_result'], row['home_probability'], row['draw_probability'], row['away_probability']
        ))
    
    conn.commit()
    conn.close()
    print("Predictions saved to the database.")

# Main Execution Flow
if __name__ == "__main__":
    print("Fetching training data...")
    training_data = get_training_data()
    print(f"Training data fetched: {len(training_data)} rows.")

    print("Preparing features...")
    X, y, features = prepare_features(training_data)

    print("Training models...")
    models, feature_order = train_models(X, y)

    print("Predicting upcoming fixtures...")
    predict_upcoming_fixtures(models, feature_order)

    print("Process complete!")


Fetching training data...
Training data fetched: 27408 rows.
Preparing features...
Training models...
Training XGBoost...


Parameters: { "use_label_encoder" } are not used.



Training LightGBM...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000718 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1764
[LightGBM] [Info] Number of data points in the train set: 21926, number of used features: 38
[LightGBM] [Info] Start training from score -0.837962
[LightGBM] [Info] Start training from score -1.348260
[LightGBM] [Info] Start training from score -1.178575
Training CatBoost...
Predicting upcoming fixtures...
Predictions saved to the database.
Process complete!


In [30]:
#Predict Odds

In [31]:
import sqlite3
import os
import requests
import datetime
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Configuration
BASE_DIRECTORY = r"C:\Users\howar\football_research_advanced\2.0\England\Football_Predictions\data"
DATABASE_PATH = os.path.join(BASE_DIRECTORY, "football_data.db")
BASE_URL = "https://v3.football.api-sports.io"
API_KEY = os.getenv("x-rapidapi-key")


def fetch_odds(fixture_id, bet=1):
    headers = {
        "x-rapidapi-host": "v3.football.api-sports.io",
        "x-rapidapi-key": API_KEY,
    }
    params = {
        "fixture": fixture_id,
        "bet": bet
    }

    try:
        response = requests.get(f"{BASE_URL}/odds", headers=headers, params=params)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching odds: {e}")
        return None


def save_odds_to_db(fixture_id, odds_data):
    conn = sqlite3.connect(DATABASE_PATH)
    cursor = conn.cursor()

    # Create the odds table if it doesn't exist
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS odds (
            fixture_id INTEGER PRIMARY KEY,
            best_home_odd REAL,
            best_draw_odd REAL,
            best_away_odd REAL,
            best_home_bookmaker TEXT,
            best_draw_bookmaker TEXT,
            best_away_bookmaker TEXT,
            last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        )
    ''')

    bookmakers = odds_data.get('response', [])[0].get('bookmakers', [])
    best_odds = {
        "Home": {"odd": 0, "bookmaker": None},
        "Draw": {"odd": 0, "bookmaker": None},
        "Away": {"odd": 0, "bookmaker": None},
    }

    for bookmaker in bookmakers:
        bookmaker_name = bookmaker['name']
        bets = bookmaker.get('bets', [])
        for bet in bets:
            if bet['id'] == 1:  # Match Winner bet type
                for value in bet['values']:
                    outcome = value['value']
                    odd = float(value['odd'])
                    if odd > best_odds[outcome]["odd"]:
                        best_odds[outcome] = {"odd": odd, "bookmaker": bookmaker_name}

    best_home_odd = best_odds["Home"]["odd"]
    best_home_bookmaker = best_odds["Home"]["bookmaker"]
    best_draw_odd = best_odds["Draw"]["odd"]
    best_draw_bookmaker = best_odds["Draw"]["bookmaker"]
    best_away_odd = best_odds["Away"]["odd"]
    best_away_bookmaker = best_odds["Away"]["bookmaker"]

    cursor.execute('''
        INSERT OR REPLACE INTO odds (
            fixture_id, best_home_odd, best_draw_odd, best_away_odd, 
            best_home_bookmaker, best_draw_bookmaker, best_away_bookmaker, last_updated
        ) VALUES (?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
    ''', (fixture_id, best_home_odd, best_draw_odd, best_away_odd,
          best_home_bookmaker, best_draw_bookmaker, best_away_bookmaker))

    conn.commit()
    conn.close()
    print(f"Odds for fixture ID {fixture_id} saved successfully.")


def get_fixtures_for_next_7_days():
    conn = sqlite3.connect(DATABASE_PATH)
    cursor = conn.cursor()

    today = datetime.date.today()
    next_week = today + datetime.timedelta(days=7)

    query = f'''
    SELECT fixture_id
    FROM fixtures
    WHERE date BETWEEN '{today}' AND '{next_week}';
    '''
    fixture_ids = cursor.execute(query).fetchall()
    conn.close()

    return [fixture_id[0] for fixture_id in fixture_ids]


def fetch_and_save_odds_for_fixtures():
    fixture_ids = get_fixtures_for_next_7_days()
    print(f"Found {len(fixture_ids)} fixtures in the next 7 days.")

    for fixture_id in fixture_ids:
        odds_response = fetch_odds(fixture_id, bet=1)

        if odds_response and odds_response.get("results", 0) > 0:
            save_odds_to_db(fixture_id, odds_response)
        else:
            print(f"No odds data available for fixture ID: {fixture_id}")


if __name__ == "__main__":
    fetch_and_save_odds_for_fixtures()


Found 64 fixtures in the next 7 days.
Odds for fixture ID 1208133 saved successfully.
Odds for fixture ID 1208134 saved successfully.
Odds for fixture ID 1208135 saved successfully.
Odds for fixture ID 1208136 saved successfully.
Odds for fixture ID 1208137 saved successfully.
Odds for fixture ID 1208138 saved successfully.
Odds for fixture ID 1208139 saved successfully.
Odds for fixture ID 1208140 saved successfully.
Odds for fixture ID 1208141 saved successfully.
Odds for fixture ID 1208142 saved successfully.
Odds for fixture ID 1216023 saved successfully.
Odds for fixture ID 1216024 saved successfully.
Odds for fixture ID 1216025 saved successfully.
Odds for fixture ID 1216026 saved successfully.
Odds for fixture ID 1216027 saved successfully.
Odds for fixture ID 1216028 saved successfully.
Odds for fixture ID 1216029 saved successfully.
Odds for fixture ID 1216030 saved successfully.
Odds for fixture ID 1216031 saved successfully.
Odds for fixture ID 1216032 saved successfully.
Od

In [32]:
# New script for saving to DB

In [34]:
import sqlite3
import pandas as pd

# Configuration
BASE_DIRECTORY = r"C:\Users\howar\football_research_advanced\2.0\England\Football_Predictions\data"
DATABASE_PATH = os.path.join(BASE_DIRECTORY, "football_data.db")

def ensure_predictions_table_columns():
    """
    Ensure the `predictions` table has all the necessary columns.
    """
    conn = sqlite3.connect(DATABASE_PATH)
    cursor = conn.cursor()
    
    # Add the required columns if they do not exist
    try:
        cursor.execute("ALTER TABLE predictions ADD COLUMN best_home_odd REAL;")
    except sqlite3.OperationalError:
        pass  # Column already exists

    try:
        cursor.execute("ALTER TABLE predictions ADD COLUMN best_draw_odd REAL;")
    except sqlite3.OperationalError:
        pass  # Column already exists

    try:
        cursor.execute("ALTER TABLE predictions ADD COLUMN best_away_odd REAL;")
    except sqlite3.OperationalError:
        pass  # Column already exists

    try:
        cursor.execute("ALTER TABLE predictions ADD COLUMN best_home_bookmaker TEXT;")
    except sqlite3.OperationalError:
        pass  # Column already exists

    try:
        cursor.execute("ALTER TABLE predictions ADD COLUMN best_draw_bookmaker TEXT;")
    except sqlite3.OperationalError:
        pass  # Column already exists

    try:
        cursor.execute("ALTER TABLE predictions ADD COLUMN best_away_bookmaker TEXT;")
    except sqlite3.OperationalError:
        pass  # Column already exists

    conn.commit()
    conn.close()

def get_predictions_with_odds():
    """
    Fetch predictions and odds, merge them, and save back to the database.
    """
    conn = sqlite3.connect(DATABASE_PATH)
    cursor = conn.cursor()

    # Fetch predictions joined with odds
    predictions_query = '''
    SELECT 
        p.fixture_id,
        p.date,
        p.home_team_name,
        p.away_team_name,
        p.predicted_result,
        p.home_probability,
        p.draw_probability,
        p.away_probability,
        o.best_home_odd,
        o.best_draw_odd,
        o.best_away_odd,
        o.best_home_bookmaker,
        o.best_draw_bookmaker,
        o.best_away_bookmaker
    FROM predictions p
    LEFT JOIN odds o
        ON p.fixture_id = o.fixture_id
    '''
    predictions = pd.read_sql_query(predictions_query, conn)

    # Update predictions table with odds
    for _, row in predictions.iterrows():
        cursor.execute('''
            INSERT OR REPLACE INTO predictions (
                fixture_id, date, home_team_name, away_team_name, predicted_result,
                home_probability, draw_probability, away_probability,
                best_home_odd, best_draw_odd, best_away_odd,
                best_home_bookmaker, best_draw_bookmaker, best_away_bookmaker,
                last_updated
            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
        ''', (
            row['fixture_id'], row['date'], row['home_team_name'], row['away_team_name'],
            row['predicted_result'], row['home_probability'], row['draw_probability'],
            row['away_probability'], row['best_home_odd'], row['best_draw_odd'],
            row['best_away_odd'], row['best_home_bookmaker'], row['best_draw_bookmaker'],
            row['best_away_bookmaker']
        ))
    conn.commit()
    conn.close()
    print("Predictions updated with odds and saved to the database.")

if __name__ == "__main__":
    print("Ensuring predictions table has required columns...")
    ensure_predictions_table_columns()

    print("Integrating odds into predictions...")
    get_predictions_with_odds()

    print("Integration complete!")


Ensuring predictions table has required columns...
Integrating odds into predictions...
Predictions updated with odds and saved to the database.
Integration complete!


In [35]:
import sqlite3
import pandas as pd

# Configuration
BASE_DIRECTORY = r"C:\Users\howar\football_research_advanced\2.0\England\Football_Predictions\data"
DATABASE_PATH = os.path.join(BASE_DIRECTORY, "football_data.db")

# Step 1: Load Predictions and Odds
def load_predictions_with_odds():
    conn = sqlite3.connect(DATABASE_PATH)
    query = """
    SELECT 
        p.fixture_id, 
        p.date, 
        p.home_team_name, 
        p.away_team_name, 
        p.predicted_result, 
        p.home_probability, 
        p.draw_probability, 
        p.away_probability,
        o.best_home_odd, 
        o.best_draw_odd, 
        o.best_away_odd
    FROM predictions p
    LEFT JOIN odds o ON p.fixture_id = o.fixture_id
    """
    df = pd.read_sql_query(query, conn)
    conn.close()
    return df

# Step 2: Calculate Implied Probabilities and Value Bets
def calculate_value_bets(predictions_df):
    # Implied probabilities from odds
    predictions_df['implied_home_prob'] = 1 / predictions_df['best_home_odd']
    predictions_df['implied_draw_prob'] = 1 / predictions_df['best_draw_odd']
    predictions_df['implied_away_prob'] = 1 / predictions_df['best_away_odd']
    
    # Flag value bets
    predictions_df['value_home'] = predictions_df['home_probability'] / 100 > predictions_df['implied_home_prob']
    predictions_df['value_draw'] = predictions_df['draw_probability'] / 100 > predictions_df['implied_draw_prob']
    predictions_df['value_away'] = predictions_df['away_probability'] / 100 > predictions_df['implied_away_prob']
    
    return predictions_df

# Step 3: Safely Update Predictions Table
def update_predictions_table(predictions_df):
    conn = sqlite3.connect(DATABASE_PATH)
    cursor = conn.cursor()
    
    # Add columns to the predictions table if they don't exist
    try:
        cursor.execute('ALTER TABLE predictions ADD COLUMN value_home BOOLEAN DEFAULT 0;')
    except sqlite3.OperationalError:
        pass  # Column already exists
    
    try:
        cursor.execute('ALTER TABLE predictions ADD COLUMN value_draw BOOLEAN DEFAULT 0;')
    except sqlite3.OperationalError:
        pass  # Column already exists
    
    try:
        cursor.execute('ALTER TABLE predictions ADD COLUMN value_away BOOLEAN DEFAULT 0;')
    except sqlite3.OperationalError:
        pass  # Column already exists
    
    # Update each row with the value bets
    for _, row in predictions_df.iterrows():
        cursor.execute('''
            UPDATE predictions
            SET value_home = ?,
                value_draw = ?,
                value_away = ?
            WHERE fixture_id = ?
        ''', (row['value_home'], row['value_draw'], row['value_away'], row['fixture_id']))
    
    conn.commit()
    conn.close()
    print("Value bets updated in the predictions table.")

# Main Script
if __name__ == "__main__":
    # Load predictions and odds
    predictions_df = load_predictions_with_odds()
    
    # Calculate value bets
    predictions_df = calculate_value_bets(predictions_df)
    
    # Update the predictions table
    update_predictions_table(predictions_df)
    print("Process complete: Value bets added to the predictions table.")



Value bets updated in the predictions table.
Process complete: Value bets added to the predictions table.
