In [1]:
import utils.helper_functions as hf

import numpy as np
import requests
import json
import time
import pandas as pd
import mysql.connector
from IPython.display import clear_output
from datetime import datetime, timedelta
from config import api_football_key, conn_host, conn_database, conn_user, conn_password
import os
from termcolor import colored
import pickle
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup as soup
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from dateutil import tz
from joblib import load
import warnings
warnings.filterwarnings('ignore')

In [2]:
def get_winner(home_score, away_score):
    if home_score > away_score:
        return 'H'
    elif away_score > home_score:
        return 'A'
    else:
        return 'D'

In [3]:
leagues = [
    {'league_id': 71, 'country': 'brazil', 'league': 'serie-a'},
    {'league_id': 72, 'country': 'brazil', 'league': 'serie-b'},
    {'league_id': 253, 'country': 'usa', 'league': 'mls'},
    {'league_id': 128, 'country': 'argentina', 'league': 'liga-profesional'},
    {'league_id': 98, 'country': 'japan', 'league': 'j1-league'},
    {'league_id': 40, 'country': 'england', 'league': 'championship'},
    {'league_id': 39, 'country': 'england', 'league': 'premier-league'},
    {'league_id': 78, 'country': 'germany', 'league': 'bundesliga'},
    {'league_id': 140, 'country': 'spain', 'league': 'laliga'},
    {'league_id': 61, 'country': 'france', 'league': 'ligue-1'},
]

In [4]:
current_season = 2022

league_selected_index = -5
league_id, country, league = leagues[league_selected_index]['league_id'], leagues[league_selected_index]['country'], leagues[league_selected_index]['league']

n_last_games = 5
now = time.time()
now_datetime = datetime.today()

min_threshold = 2
max_threshold = 10

draw_threshold = 0.025
min_score_diff_threshold = 0.4

from_zone = tz.gettz('UTC')
to_zone = tz.gettz('America/Sao_Paulo')

headers = {
        'X-RapidAPI-Key': api_football_key,
        'X-RapidAPI-Host': 'api-football-v1.p.rapidapi.com'
    }

In [5]:
teams = hf.execute_query("SELECT * FROM teams")

In [6]:
def get_league_season_fixtures(season):
    response = requests.get(f"https://api-football-v1.p.rapidapi.com/v3/fixtures?league={league_id}&season={season}", headers=headers)
    response_parsed = json.loads(response.text)
    return response_parsed['response']

In [7]:
def add_match_info_to_db(fixture):
    fixture_id, league_id, league_name, fixture_date, season, home_id, home_name, away_id, away_name, home_score, away_score = fixture['fixture']['id'], fixture['league']['id'], f"{fixture['league']['name']} ({fixture['league']['country']})", datetime.utcfromtimestamp(fixture['fixture']['timestamp']).replace(tzinfo=from_zone).astimezone(to_zone), fixture['league']['season'], fixture['teams']['home']['id'], fixture['teams']['home']['name'], fixture['teams']['away']['id'], fixture['teams']['away']['name'], fixture['goals']['home'], fixture['goals']['away']
    fixture_date_converted = fixture_date.strftime('%Y-%m-%d %H:%M:%S')
    hf.execute_query(f"INSERT IGNORE INTO leagues (id, name) VALUES ({league_id}, '{league_name}')", False)
    hf.execute_query(f"INSERT IGNORE INTO teams (id, name) VALUES ({home_id}, '{home_name}')", False)
    hf.execute_query(f"INSERT IGNORE INTO teams (id, name) VALUES ({away_id}, '{away_name}')", False)
    hf.execute_query(f"INSERT IGNORE INTO matches (id, date, league_id, season, home_id, away_id, home_score, away_score) VALUES ({fixture_id}, '{fixture_date_converted}', {league_id}, {season}, {home_id}, {away_id}, '{home_score}', '{away_score}')", False)

In [8]:
for season in range(current_season, current_season+1):
    fixtures_resp = get_league_season_fixtures(season)
    fixtures = [fixture for fixture in fixtures_resp if fixture['fixture']['timestamp'] < now]
    if season == current_season:
        next_fixtures = [fixture for fixture in fixtures_resp if fixture['fixture']['timestamp'] >= now and fixture['fixture']['timestamp'] <= (now + 24*60*60)]
        next_fixtures = sorted(next_fixtures, key = lambda x: x['fixture']['timestamp'])
        
    for index, fixture in enumerate(fixtures):
        clear_output(wait=True)
        print(f"Loading fixtures for the {season} season: {index}/{len(fixtures)}")
        add_match_info_to_db(fixture)

Loading fixtures for the 2022 season: 272/273


In [9]:
query = f"select m.id from matches as m where m.league_id = {league_id} and m.season = {current_season} and m.home_passes_pct is null"
missing_values_fixtures = hf.execute_query(query)

In [10]:
missing_values_fixtures.tail()

Unnamed: 0,id
6,880989
7,880990
8,880991
9,880992
10,880993


In [11]:
def get_league_season_fixtures(fixture_id):
    response = requests.get(f"https://api-football-v1.p.rapidapi.com/v3/fixtures/statistics?fixture={fixture_id}", headers=headers)
    response_parsed = json.loads(response.text)
    return response_parsed['response']

def update_match_stats(fixture_id, home_stats, away_stats):
    home_update_query = f"SET home_shots_on_goal = {home_stats[0] or 0}, home_shots_off_goal = {home_stats[1] or 0}, home_total_shots = {home_stats[2] or 0}, home_blocked_shots = {home_stats[3] or 0}, home_shots_inside_box = {home_stats[4] or 0}, home_shots_outside_box = {home_stats[5] or 0}, home_fouls = {home_stats[6] or 0}, home_corners = {home_stats[7] or 0}, home_offsides = {home_stats[8] or 0}, home_possession = {home_stats[9].replace('%', '')}, home_yellow_cards = {home_stats[10] or 0}, home_red_cards = {home_stats[11] or 0}, home_saves = {home_stats[12] or 0}, home_total_passes = {home_stats[13] or 0}, home_passes_accurate = {home_stats[14] or 0}, home_passes_pct = {home_stats[15].replace('%', '')}"
    away_update_query = f"away_shots_on_goal = {away_stats[0] or 0}, away_shots_off_goal = {away_stats[1] or 0}, away_total_shots = {away_stats[2] or 0}, away_blocked_shots = {away_stats[3] or 0}, away_shots_inside_box = {away_stats[4] or 0}, away_shots_outside_box = {away_stats[5] or 0}, away_fouls = {away_stats[6] or 0}, away_corners = {away_stats[7] or 0}, away_offsides = {away_stats[8] or 0}, away_possession = {away_stats[9].replace('%', '')}, away_yellow_cards = {away_stats[10] or 0}, away_red_cards = {away_stats[11] or 0}, away_saves = {away_stats[12] or 0}, away_total_passes = {away_stats[13] or 0}, away_passes_accurate = {away_stats[14] or 0}, away_passes_pct = {away_stats[15].replace('%', '')}"
    update_query = f"UPDATE matches {home_update_query}, {away_update_query} WHERE id = {fixture_id}"
    return update_query

def get_fixtures_statistics(fixtures):
    update_queries = []
    
    for index, g in fixtures.iterrows():
        clear_output(wait=True)
        print(f"{index}/{len(fixtures.index)}")
        try:
            stats = get_league_season_fixtures(g['id'])
            home_stats = [s['value'] for s in stats[0]['statistics']]
            away_stats = [s['value'] for s in stats[1]['statistics']]
            query = update_match_stats(g['id'], home_stats, away_stats)
            update_queries.append(query)
        except Exception as e:
            print(e)
            continue
        
    hf.execute_multiple_queries(update_queries)

In [12]:
get_fixtures_statistics(missing_values_fixtures)

10/11


In [13]:
query = f"SELECT m.id, m.date, m.season, l.name AS league, ht.id as home_id, at.id as away_id, ht.name as home_team, at.name as away_team, m.home_score, m.away_score, m.home_odds, m.away_odds, m.draw_odds, " + \
                            "m.home_shots_on_goal, m.home_shots_off_goal, m.home_total_shots, m.home_blocked_shots, m.home_shots_inside_box, m.home_shots_outside_box, m.home_fouls, m.home_corners, m.home_offsides, m.home_possession, m.home_yellow_cards, m.home_red_cards, m.home_saves, m.home_total_passes, m.home_passes_accurate, m.home_passes_pct, " + \
                            "m.away_shots_on_goal, m.away_shots_off_goal, m.away_total_shots, m.away_blocked_shots, m.away_shots_inside_box, m.away_shots_outside_box, m.away_fouls, m.away_corners, m.away_offsides, m.away_possession, m.away_yellow_cards, m.away_red_cards, m.away_saves, m.away_total_passes, m.away_passes_accurate, m.away_passes_pct " + \
                            f"FROM matches AS m INNER JOIN teams AS ht ON (m.home_id = ht.id) INNER JOIN teams AS at ON (m.away_id = at.id) INNER JOIN leagues AS l ON (m.league_id = l.id) WHERE (l.id = {league_id} AND m.season = {current_season}) ORDER BY m.date ASC"
fixtures_df = hf.execute_query(query)
fixtures_df['winner'] = fixtures_df.apply(lambda x: get_winner(x['home_score'], x['away_score']), axis=1)

In [14]:
fixtures_df.head()

Unnamed: 0,id,date,season,league,home_id,away_id,home_team,away_team,home_score,away_score,...,away_corners,away_offsides,away_possession,away_yellow_cards,away_red_cards,away_saves,away_total_passes,away_passes_accurate,away_passes_pct,winner
0,874671,2022-07-29 16:00:00,2022,Championship (England),37,44,Huddersfield,Burnley,0,1,...,5.0,3.0,69.0,2.0,0.0,0.0,599.0,506.0,84.0,A
1,874672,2022-07-30 11:00:00,2022,Championship (England),67,72,Blackburn,QPR,1,0,...,0.0,3.0,50.0,1.0,0.0,3.0,529.0,430.0,81.0,H
2,874673,2022-07-30 11:00:00,2022,Championship (England),1356,53,Blackpool,Reading,1,0,...,10.0,1.0,52.0,0.0,0.0,4.0,422.0,321.0,76.0,H
3,874674,2022-07-30 11:00:00,2022,Championship (England),43,71,Cardiff,Norwich,1,0,...,7.0,2.0,55.0,2.0,1.0,1.0,432.0,336.0,78.0,H
4,874675,2022-07-30 11:00:00,2022,Championship (England),64,56,Hull,Bristol City,2,1,...,3.0,4.0,45.0,0.0,0.0,2.0,382.0,306.0,80.0,H


In [15]:
def get_betting_odds():
    months = dict(Jan=1,Feb=2,Mar=3,Apr=4,May=5,Jun=6,Jul=7,Aug=8,Sep=9,Oct=10,Nov=11,Dec=12)
    year = datetime.now().year
    base_url = f"https://www.oddsportal.com/soccer/{country}/{league}/"
    option = Options()
    option.headless = True
    driver = webdriver.Chrome(ChromeDriverManager().install(), options=option)
    driver.get(base_url)
    time.sleep(5)
    
    element = driver.find_element('id', "tournamentTable")
    htmlContent = element.get_attribute('outerHTML')
    page_soup = soup(htmlContent, "html.parser")
    
    games = []
    
    trs = page_soup.findAll('tr')
    games_registered = 0

    for tr in trs:
        try:
            tr_class = tr.get('class')
            if tr_class == ['center', 'nob-border']:
                date_info_splitted = tr.contents[0].text.split(',')
                if len(date_info_splitted) == 1:
                    break
                date_text = date_info_splitted[1].strip()
            elif not tr_class or tr_class == ['odd']:
                unix_date = datetime(year, 
                                    months[date_text.split(' ')[1]], 
                                    int(date_text.split(' ')[0]), 
                                    hour=int(tr.contents[0].text.strip().split(':')[0]), 
                                    minute=int(tr.contents[0].text.strip().split(':')[1]), 
                                    second=0).replace(tzinfo=from_zone).astimezone(to_zone)
                home_team_string = tr.contents[1].text.split(' - ')[0].strip()
                away_team_string = tr.contents[1].text.split(' - ')[1].strip()
                home_team = teams.loc[teams['name'].str.lower() == home_team_string.lower()].iloc[0]
                away_team = teams.loc[teams['name'].str.lower() == away_team_string.lower()].iloc[0]
                game_parsed = {'date': unix_date,
                                  'home_id': home_team['id'], # Home Team Id
                                  'away_id': away_team['id'], # Away Team Id
                                  'home_name': home_team['name'], # Team A Name
                                  'away_name': away_team['name'], # Team B Name
                                  'home_odds': float(tr.contents[2].text),  # Team A Odds
                                  'draw_odds': float(tr.contents[3].text),  # Draw Odds
                                  'away_odd': float(tr.contents[4].text)}  # Team B Odds
                games.append(game_parsed)
        except Exception as e:
            print(e)
            continue
    
    driver.quit()
    return sorted(games, key=lambda d: d['date'])

In [16]:
games_odds = get_betting_odds()




In [17]:
def get_match_with_odds(home_id, away_id):
    match = next(filter(lambda x: x['home_id'] == home_id and x['away_id'] == away_id, games_odds))
    return match['home_odds'], match['draw_odds'], match['away_odd'], match['date']

In [18]:
model = load(f"leagues/{league_id}/model.joblib")

In [20]:
parlay = []
parlay_odds = 1
for fixture in next_fixtures:
    fixture_id, league_id, league_name, fixture_date, season, home_id, home_name, away_id, away_name, home_score, away_score = fixture['fixture']['id'], fixture['league']['id'], f"{fixture['league']['name']} ({fixture['league']['country']})", fixture['fixture']['timestamp'], fixture['league']['season'], fixture['teams']['home']['id'], fixture['teams']['home']['name'], fixture['teams']['away']['id'], fixture['teams']['away']['name'], fixture['goals']['home'], fixture['goals']['away']
    fixture_date_converted = datetime.fromtimestamp(fixture_date)
    
    try:
        home_odds, draw_odds, away_odds, parsed_date = get_match_with_odds(home_id, away_id)
    except:
        continue
    
    home_stats = hf.get_team_previous_games_stats(home_id, season, fixture_date_converted, 'H', n_last_games, fixtures_df)
    if home_stats == None:
        continue
        
    away_stats = hf.get_team_previous_games_stats(away_id, season, fixture_date_converted, 'A', n_last_games, fixtures_df)
    if away_stats == None:
        continue
    
    game_stats = [home_stats + away_stats]
    
    predictions = model.predict(game_stats)
    
    home_score_pred, away_score_pred = predictions[0]
    
    prediction = 'D'
    selected_odds = draw_odds
    prediction_text = 'Draw'
    score_pred_diff = abs(home_score_pred - away_score_pred)
    if home_score_pred > away_score_pred and score_pred_diff > draw_threshold:
        prediction = 'H'
        selected_odds = home_odds
        prediction_text = home_name
    elif home_score_pred < away_score_pred and score_pred_diff > draw_threshold:
        prediction = 'A'
        selected_odds = away_odds
        prediction_text = away_name
        
    print('-'*10)
    print(f"{home_name} x {away_name}")
    if score_pred_diff > draw_threshold and score_pred_diff < min_score_diff_threshold:
        print(colored(f"BAD BET: {prediction_text} @ {selected_odds} ({round(home_score_pred, 2)} x {round(away_score_pred, 2)})\n", 'red'))
    elif selected_odds < min_threshold or selected_odds > max_threshold:
        ml_good_bet_conditions_text = f"Good bet if {min_threshold} < odds < {max_threshold}"
        print(colored(f"BAD BET: {prediction_text} @ {selected_odds} ({ml_good_bet_conditions_text})\n", 'red'))
    else:
        print(colored(f'GOOD BET: {prediction_text} @ {selected_odds}\n', 'green'))
        

----------
Wigan x Sheffield Utd
[31mBAD BET: Sheffield Utd @ 1.94 (1.03 x 1.37)
[0m
