In [76]:
import pandas as pd 
import numpy as np 
import json 
import bs4 
from bs4 import BeautifulSoup 
import datetime 
import requests
import os

## Bet on Match statistics (number of goals, goal gaps, etc)

We have scraped two major betting website https://www.winamax.fr/ and https://www.betclic.fr/. Since we are located in France we only have access to the french version, the names of the bettings are in French, we acknowledge that it's not optimal for an international application. 

In [77]:
def get_list_team_premier_league():
  link = "https://onefootball.com/en/competition/premier-league-9/table"
  rep = requests.get(link)
  pl_teams = []
  if rep.status_code == 200:
    page = BeautifulSoup(rep.text, "html.parser")
    rankings = page.find_all("li", class_="Standing_standings__row__5sdZG Standing_standings__rowLink__Skr86")
    for pos , results in enumerate(rankings):
      team = results.find("a").get("aria-label")
      pl_teams.append(team)
    
  return pl_teams

In [78]:
pl_teams = set(get_list_team_premier_league())

In [80]:
def has_subwords(w, words_set):
    for word in words_set:
        if word.find(w) !=-1:
            return True
    return False  

def replace_null_with_zero(data):
    if isinstance(data, dict):
        return {key: replace_null_with_zero(value) if value is not None else 0 for key, value in data.items()}
    elif isinstance(data, list):
        return [replace_null_with_zero(item) if item is not None else 0 for item in data]
    else:
        return data

In [81]:
def analyse_winamax(winamax_match, pl_teams):

    d = {"home_team": [] , "away_team": [], "bet_type":[], "outcome":[], "odd":[], "percentage":[]}
    with open(winamax_match, 'r') as json_file:
        match_odds = json.load(json_file)

        if "Résultat" not in set(match_odds.keys()):
            return None

        home_team = list(match_odds["Résultat"].keys())[0]
        away_team = list(match_odds["Résultat"].keys())[2]

        if has_subwords(home_team,pl_teams) and has_subwords(away_team,pl_teams):
            for k in match_odds.keys():
                for t in match_odds[k].keys():
                    
                    d["home_team"].append(home_team)
                    d["away_team"].append(away_team)
                    d["bet_type"].append(k) 
                    d["outcome"].append(t)
                    s = match_odds[k][t]
                    d["odd"].append(s["odds"])
                    d["percentage"].append(s["percentage"])
    
            return pd.DataFrame(d) 
        else :
            return None

In [82]:
import glob

list_bets = glob.glob("tutorial/*Winamax.json")
table_winamax = pd.DataFrame()
for match  in list_bets:
    df = analyse_winamax(match, pl_teams=pl_teams)
    table_winamax = pd.concat([table_winamax, df] , ignore_index=True)

table_winamax.to_csv("winamax_bets_PL_matches.csv")
table_winamax

Unnamed: 0,home_team,away_team,bet_type,outcome,odd,percentage
0,Arsenal,Crystal Palace,Résultat,Arsenal,1.28,98
1,Arsenal,Crystal Palace,Résultat,Match nul,4.30,1
2,Arsenal,Crystal Palace,Résultat,Crystal Palace,7.25,1
3,Arsenal,Crystal Palace,Double chance,Arsenal ou match nul,1.03,87
4,Arsenal,Crystal Palace,Double chance,Arsenal ou Crystal Palace,1.13,13
...,...,...,...,...,...,...
5065,Wolverhampton,Manchester United,Mi-temps - Double chance et les deux équipes ...,Wolverhampton/match nul et oui,4.50,0
5066,Wolverhampton,Manchester United,Mi-temps - Double chance et les deux équipes ...,Wolverhampton/Manchester United et oui,7.50,0
5067,Wolverhampton,Manchester United,Mi-temps avec le plus de buts,1re mi-temps,2.95,0
5068,Wolverhampton,Manchester United,Mi-temps avec le plus de buts,Égalité,3.35,0


In [83]:
def analyse_betclic(betclic_match, pl_teams):

    d = {"home_team": [] , "away_team": [], "bet_type":[], "outcome":[], "odd":[]}
    with open(betclic_match, 'r') as json_file:
        match_odds = json.load(json_file)

        if "Double chance" not in set(match_odds.keys()):
            return None
        
        teams_involved = list(match_odds["Double chance"].keys())[1]
        teams_involved = teams_involved.split(" ou ")

        home_team = teams_involved[0]
        away_team = teams_involved[1]

        if has_subwords(home_team,pl_teams) and has_subwords(away_team,pl_teams):
            for k in match_odds.keys():
                for t in match_odds[k].keys():
                    
                    d["home_team"].append(home_team)
                    d["away_team"].append(away_team)
                    d["bet_type"].append(k) 
                    d["outcome"].append(t)
                    s = match_odds[k][t].replace(",", ".")
                    d["odd"].append(s)
            return pd.DataFrame(d) 
        else :
            return None

In [84]:
list_bets = glob.glob("tutorial/*Betclic.json")
table_betclic = pd.DataFrame()
for match  in list_bets:
    df = analyse_betclic(match, pl_teams=pl_teams)
    table_betclic = pd.concat([table_betclic, df] , ignore_index=True)

table_betclic.to_csv("betclic_bets_PL_matches.csv")
table_betclic

Unnamed: 0,home_team,away_team,bet_type,outcome,odd
0,Arsenal,Crystal Palace,Résultat du match (temps réglementaire),,8.00
1,Arsenal,Crystal Palace,Double chance,Arsenal ou Nul,1.06
2,Arsenal,Crystal Palace,Double chance,Arsenal ou Crystal Palace,1.11
3,Arsenal,Crystal Palace,Double chance,Nul ou Crystal Palace,2.97
4,Arsenal,Crystal Palace,Nombre total de buts,"+ de 1,5",1.19
...,...,...,...,...,...
857,Wolverhampton,Manchester United,Score exact,2 - 2,12
858,Wolverhampton,Manchester United,Score exact,1 - 2,9.60
859,Wolverhampton,Manchester United,1ère mi-temps - Résultat,Wolverhampton,2.98
860,Wolverhampton,Manchester United,1ère mi-temps - Résultat,Nul,2.13
