In [1]:
import pandas as pd
from thefuzz import fuzz
from helpers import classification as pf
from helpers import stats as bf
from services import BetExplorerService, MySQLService
import os
from joblib import load
from termcolor import colored

from helpers.file import load_from_file
from helpers.options import filtered_cols, selected_stats, strategy



Setup Complete


In [2]:
season = 2024
league = "mls"
country = "usa"

In [3]:
def load_saved_predictor_utils(league):
    predictor_utils = load(f"../dist/betting/{league}.joblib")

    return predictor_utils

model_data = load_saved_predictor_utils(league)

pipeline = model_data["pipeline"]
best_model_name = model_data["best_model_name"]
features = model_data["filtered_cols"]
min_odds = model_data["min_odds"]
min_games_played = model_data["min_games_played"]
min_games_played_at = model_data["min_games_played_at"]
strategy = model_data["strategy"]

print(f"Model {best_model_name}, features, and min odds have been loaded successfully.")
print(f"Loaded features: {features}")
print(f"Loaded min odds: {min_odds}")
print(f"Loaded mininum games played: {min_games_played}")
print(f"Loaded mininum games played at: {min_games_played_at}")

Model voting_classifier, features, and min odds have been loaded successfully.
Loaded features: ['home_points_pct', 'home_win_pct', 'home_draw_pct', 'home_loss_pct', 'home_points_pct_last_games', 'home_win_pct_last_games', 'home_draw_pct_last_games', 'home_loss_pct_last_games', 'home_home_win_pct', 'home_home_draw_pct', 'home_home_loss_pct', 'home_team_score', 'home_opp_score', 'home_home_team_score', 'home_home_opp_score', 'away_points_pct', 'away_win_pct', 'away_draw_pct', 'away_loss_pct', 'away_points_pct_last_games', 'away_win_pct_last_games', 'away_draw_pct_last_games', 'away_loss_pct_last_games', 'away_away_win_pct', 'away_away_draw_pct', 'away_away_loss_pct', 'away_team_score', 'away_opp_score', 'away_away_opp_score', 'away_away_team_score', 'home_odds', 'away_odds', 'draw_odds', 'home_elo', 'away_elo']
Loaded min odds: 2.5
Loaded mininum games played: 10
Loaded mininum games played at: 5


In [4]:
mysql = MySQLService()

season_games, teams_elo = bf.initialize_matches(league, season)

teams_query = f"SELECT DISTINCT(home_team) as team FROM matches WHERE season = {season} and league = '{league}'"

teams = mysql.execute_query(teams_query)

Generating teams ELOs...


100%|██████████| 5242/5242 [00:00<00:00, 8507.54it/s]

Successfully generated teams ELOs.





In [5]:
display(season_games)

Unnamed: 0,season,league,date,week,home_team,home_xg,home_score,away_score,away_xg,away_team,home_odds,away_odds,draw_odds,result,home_elo,away_elo
4739,2024,mls,2024-02-21,1,Inter Miami,1.4,2,0,0.8,Real Salt Lake,1.55,5.11,4.60,H,1436.81,1509.20
4740,2024,mls,2024-02-24,1,Austin,1.1,1,2,3.0,Minnesota Utd,2.14,3.25,3.51,A,1465.35,1473.47
4741,2024,mls,2024-02-24,1,Charlotte,2.0,1,0,0.5,NYCFC,2.34,3.09,3.25,H,1476.60,1525.30
4742,2024,mls,2024-02-24,1,Columbus Crew,1.8,1,0,0.5,Atlanta Utd,1.73,4.27,4.06,H,1638.58,1547.04
4743,2024,mls,2024-02-24,1,D.C. United,4.5,3,1,0.8,NE Revolution,2.05,3.46,3.56,H,1438.14,1531.22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5237,2024,mls,2024-10-27,Round One,Orlando City,1.6,2,0,0.8,Charlotte,,,,H,1585.43,1542.86
5238,2024,mls,2024-10-28,Round One,FC Cincinnati,3.3,1,0,0.3,NYCFC,,,,H,1570.16,1527.92
5239,2024,mls,2024-10-28,Round One,Seattle Sounders FC,1.0,0,0,0.5,Houston Dynamo,,,,D,1610.47,1561.82
5240,2024,mls,2024-10-29,Round One,Columbus Crew,1.1,0,1,0.9,NY Red Bulls,,,,A,1668.25,1502.29


In [6]:
# Getting odds for next games

bet_explorer = BetExplorerService(country, league)

next_games = bet_explorer.get_next_games()

print(next_games)

[{'home_team': 'Los Angeles FC', 'away_team': 'Vancouver Whitecaps', 'home_odds': 1.55, 'draw_odds': 4.43, 'away_odds': 5.13, 'date': datetime.datetime(2024, 11, 9, 0, 0)}, {'home_team': 'FC Cincinnati', 'away_team': 'NYFC', 'home_odds': 1.81, 'draw_odds': 3.85, 'away_odds': 3.88, 'date': datetime.datetime(2024, 11, 9, 0, 0)}, {'home_team': 'Orlando City', 'away_team': 'Charlotte', 'home_odds': 1.83, 'draw_odds': 3.71, 'away_odds': 4.01, 'date': datetime.datetime(2024, 11, 10, 0, 0)}, {'home_team': 'Inter Miami', 'away_team': 'Atlanta Utd', 'home_odds': 1.39, 'draw_odds': 5.1, 'away_odds': 6.42, 'date': datetime.datetime(2024, 11, 10, 0, 0)}]


In [7]:
def get_most_compatible_team(team):
    team_compatibility = teams

    team_compatibility["score"] = team_compatibility.apply(
        lambda x: fuzz.ratio(team, x["team"]),
        axis=1,
    )
    team_compatibility = team_compatibility.sort_values(
        by="score", ascending=False
    ).reset_index(drop=True)
    
    return team_compatibility.iloc[0]["team"]

In [8]:
data_model = []
for game in next_games:
    home_team_compatible = get_most_compatible_team(game["home_team"])
    print(f"\n{game['home_team']} --> {home_team_compatible}")
    game["home_team_translated"] = home_team_compatible
    
    away_team_compatible = get_most_compatible_team(game["away_team"])
    print(f"{game['away_team']} --> {away_team_compatible}")
    game["away_team_translated"] = away_team_compatible
    
    home_stats_dict = bf.get_team_previous_games_stats(
        game["home_team_translated"], season, game["date"], "H", min_games_played, min_games_played_at, season_games
    )
    if not home_stats_dict:
        continue

    away_stats_dict = bf.get_team_previous_games_stats(
        game["away_team_translated"], season, game["date"], "A", min_games_played, min_games_played_at, season_games
    )
    if not away_stats_dict:
        continue

    game_info_keys = [
        "date",
        "season",
        "home_team_translated",
        "away_team_translated",
        "home_odds",
        "away_odds",
        "draw_odds",
        "result",
        "home_score",
        "away_score",
    ]
    game_info_dict = {key: game.get(key) for key in game_info_keys}

    home_elo = teams_elo.get(game["home_team_translated"])
    away_elo = teams_elo.get(game["away_team_translated"])

    data_model.append({**home_stats_dict, **away_stats_dict, **game_info_dict, "home_elo": home_elo, "away_elo": away_elo})

data_df = pd.DataFrame(data_model)


Los Angeles FC --> LAFC
Vancouver Whitecaps --> Vancouver W'caps

FC Cincinnati --> FC Cincinnati
NYFC --> NYCFC

Orlando City --> Orlando City
Charlotte --> Charlotte

Inter Miami --> Inter Miami
Atlanta Utd --> Atlanta Utd


In [9]:
display(data_df)

Unnamed: 0,home_points_pct,home_win_pct,home_draw_pct,home_loss_pct,home_points_pct_last_games,home_win_pct_last_games,home_draw_pct_last_games,home_loss_pct_last_games,home_home_win_pct,home_home_draw_pct,...,home_team_translated,away_team_translated,home_odds,away_odds,draw_odds,result,home_score,away_score,home_elo,away_elo
0,0.638095,0.571429,0.2,0.228571,0.633333,0.6,0.1,0.3,0.666667,0.222222,...,LAFC,Vancouver W'caps,1.55,5.13,4.43,,,,1631.94,1504.81
1,0.590476,0.542857,0.142857,0.314286,0.466667,0.4,0.2,0.4,0.444444,0.166667,...,FC Cincinnati,NYCFC,1.81,3.88,3.85,,,,1578.95,1519.13
2,0.52381,0.457143,0.2,0.342857,0.7,0.7,0.0,0.3,0.444444,0.222222,...,Orlando City,Charlotte,1.83,4.01,3.71,,,,1597.85,1530.44
3,0.733333,0.657143,0.228571,0.114286,0.8,0.7,0.3,0.0,0.666667,0.222222,...,Inter Miami,Atlanta Utd,1.39,6.42,5.1,,,,1634.47,1498.94


In [10]:
X = data_df[filtered_cols]

odds_cols = [
    "date",
    "season",
    "home_team_translated",
    "away_team_translated",
    "home_odds",
    "away_odds",
    "draw_odds",
    "home_elo",
    "away_elo",
]
odds_df = data_df[odds_cols]

for c in odds_cols:
    if "odds" in c:
        odds_df[c] = pd.to_numeric(odds_df[c], errors="coerce")

predictions = pipeline.predict(X)
probabilities = pipeline.predict_proba(X)

probs_test_df = pd.DataFrame(
    probabilities,
    index=data_df.index,
    columns=["away_probs", "draw_probs", "home_probs"],
)
preds_test_df = pd.DataFrame(predictions, index=data_df.index, columns=["pred"])
test_results_df = pd.concat([preds_test_df, probs_test_df, odds_df], axis=1)

test_results_df.dropna(subset=["home_odds"], inplace=True)
test_results_df = test_results_df[test_results_df["home_odds"] != " "]

test_results_df = test_results_df.astype(
    {"home_odds": float, "draw_odds": float, "away_odds": float}
)

In [11]:
today_bets = 0
for _, game in test_results_df.iterrows():
    bet_value = 1 # pf.get_bet_value_by_row(game, bankroll, strategy)
    odds, probs = pf.get_bet_odds_probs(game)
    
    bet_worth_it = pf.bet_worth_it(
        game["pred"],
        odds,
        1/probs,
        min_odds,
        bet_value
    )

    today_bets += 1

    if bet_worth_it:
        print(colored(f"{game['home_team_translated']} ({round(game['home_elo'], 2)}) x ({round(game['away_elo'], 2)}) {game['away_team_translated']}: {game['pred']} @ {odds}", "green"))
    else:
        print(colored(f"{game['home_team_translated']} ({round(game['home_elo'], 2)}) x ({round(game['away_elo'], 2)}) {game['away_team_translated']}: {game['pred']} @ {odds}", "red"))

if not today_bets:
    print("\nSorry, there are no bets for today.")

[31mLAFC (1631.94) x (1504.81) Vancouver W'caps: H @ 1.55[0m
[31mFC Cincinnati (1578.95) x (1519.13) NYCFC: H @ 1.81[0m
[31mOrlando City (1597.85) x (1530.44) Charlotte: H @ 1.83[0m
[31mInter Miami (1634.47) x (1498.94) Atlanta Utd: H @ 1.39[0m
