In [1]:
import pandas as pd 
import numpy as np 
import json 
import bs4 
from bs4 import BeautifulSoup 
import datetime 
import requests

In [2]:
def get_list_team_premier_league():
  link = "https://onefootball.com/en/competition/premier-league-9/table"
  rep = requests.get(link)
  pl_teams = []
  if rep.status_code == 200:
    page = BeautifulSoup(rep.text, "html.parser")
    rankings = page.find_all("li", class_="Standing_standings__row__5sdZG Standing_standings__rowLink__Skr86")
    for pos , results in enumerate(rankings):
      team = results.find("a").get("aria-label")
      pl_teams.append(team)
    
  return pl_teams

In [3]:
pl_teams = set(get_list_team_premier_league())

In [8]:
import os
def has_subwords(w, words_set):
    for word in words_set:
        if word.find(w) !=-1:
            return True
    return False  

def replace_null_with_zero(data):
    if isinstance(data, dict):
        return {key: replace_null_with_zero(value) if value is not None else 0 for key, value in data.items()}
    elif isinstance(data, list):
        return [replace_null_with_zero(item) if item is not None else 0 for item in data]
    else:
        return data


def betsWinamax_json_to_csv(link="tutorial/betsWinamax.json", pl_teams=pl_teams):
    with open(link, 'r') as json_file:
        data_dict = json.load(json_file)
        data_dict = replace_null_with_zero(data_dict)
        
        match_pl = []
        for key in data_dict.keys():
            s = key.split(" - ")
            if len(s) == 2:
                if has_subwords(s[0],pl_teams) and has_subwords(s[1],pl_teams):
                    match_pl.append(key)
        odds_pl = [data_dict[k] for k in match_pl]
        
        odd_list = []
        for m in odds_pl:
            d = {"team_home": None , "team_away":None, "datetime":None, "odd_home_win":None, "odd_draw":None,"odd_away_win":None}
            features = list(m.keys())
            d["team_home"] = features[0]
            d["team_away"] = features[-1]
            d["datetime"] = datetime.datetime.strftime(datetime.datetime.strptime(m["formated_time"], '%Y-%m-%d %H:%M:%S'), "%d/%m/%Y")
            d["odd_home_win"] = m[d["team_home"]][0] 
            d["odd_draw"] = m['Match nul'][0]
            d["odd_away_win"] = m[d["team_away"]][0] 
        
            num_bets = m[d["team_home"]][1] + m[d["team_away"]][1] + m['Match nul'][1]

            d["bets_home_win_percentage"] = int(m[d["team_home"]][1] /  num_bets * 100)
            d["bets_draw_win_percentage"] = int(m['Match nul'][1] / num_bets * 100)
            d["bets_away_win_percentage"] = int(m[d["team_away"]][1] / num_bets * 100)
           
            
            odd_list.append(pd.DataFrame(d, index=[0]))

        df = pd.concat(odd_list,ignore_index=True)
        df.to_csv("betsWinamax.csv")
        return df
    


In [9]:
betsWinamax_json_to_csv()

Unnamed: 0,team_home,team_away,datetime,odd_home_win,odd_draw,odd_away_win,bets_home_win_percentage,bets_draw_win_percentage,bets_away_win_percentage
0,Tottenham,Burnley,05/01/2024,1.4,5.2,6.25,99,1,0
1,Chelsea,Fulham,13/01/2024,1.6,4.2,5.2,52,45,3
2,Sheffield United,West Ham,21/01/2024,3.55,3.3,1.78,1,1,98
3,Brentford,Nottingham Forest,20/01/2024,1.88,3.2,3.25,4,92,4
4,Brighton,Wolverhampton,22/01/2024,1.56,3.65,4.3,32,52,16
5,Newcastle,Manchester City,13/01/2024,5.4,4.3,1.56,2,10,88
6,Burnley,Luton Town,15/01/2024,1.94,3.55,3.85,33,60,7
7,Manchester United,Tottenham,14/01/2024,2.15,3.8,3.0,28,26,46
8,Everton,Aston Villa,14/01/2024,2.75,3.5,2.45,6,28,65
9,Bournemouth,Liverpool,21/01/2024,4.2,3.8,1.54,0,3,97


In [10]:
def betsBetclic_json_to_csv(link="tutorial/betsBetclic.json", pl_teams=pl_teams):
    with open(link, 'r') as json_file:
        data_dict = json.load(json_file)
        data_dict = replace_null_with_zero(data_dict)
        
        match_pl = []
        for key in data_dict.keys():
            s = key.split(" - ")
            if len(s) == 2:
                if has_subwords(s[0],pl_teams) and has_subwords(s[1],pl_teams):
                    match_pl.append(key)

        odds_pl = [k for k in match_pl]
        
        odd_list = []
        for m_key in odds_pl:
            d = {"team_home": None , "team_away":None, "datetime":None, "odd_home_win":None, "odd_draw":None,"odd_away_win":None}
            
            features = m_key.split(" - ")
            
            d["team_home"] = features[0]
            d["team_away"] = features[1]

            match = data_dict[m_key]

            d["datetime"] = datetime.datetime.strftime(datetime.datetime.strptime(match["match_time"], '%d/%m/%Y %H:%M'), "%d/%m/%Y")

            d["odd_home_win"] = match["odds"][0].replace(',','.')
            d["odd_draw"] = match["odds"][1].replace(',','.')
            d["odd_away_win"] = match["odds"][2].replace(',','.')
        
            d["bets_home_win_percentage"] = match["bet_percentage"][0]
            d["bets_draw_win_percentage"] = match["bet_percentage"][1]
            d["bets_away_win_percentage"] = match["bet_percentage"][2]
             
            odd_list.append(pd.DataFrame(d, index=[0]))

        df = pd.concat(odd_list,ignore_index=True)
        df.to_csv("betsBetclic.csv")
        return df

In [11]:
betsBetclic_json_to_csv()

Unnamed: 0,team_home,team_away,datetime,odd_home_win,odd_draw,odd_away_win,bets_home_win_percentage,bets_draw_win_percentage,bets_away_win_percentage
0,Chelsea,Fulham,13/01/2024,1.56,3.9,4.9,97,2,1
1,Newcastle,Manchester City,13/01/2024,5.05,4.05,1.52,3,1,96
2,Everton,Aston Villa,14/01/2024,2.66,3.35,2.35,19,6,75
3,Burnley,Luton,15/01/2024,1.88,3.35,3.7,83,6,11
4,Arsenal,Crystal Palace,20/01/2024,1.31,4.65,8.0,99,1,0
5,Brentford,Nottingham Forest,20/01/2024,1.94,3.33,3.52,60,7,33
6,Bournemouth,Liverpool,21/01/2024,4.33,3.93,1.58,1,0,99
7,Brighton,Wolverhampton,22/01/2024,1.58,3.73,4.55,91,2,7
8,Nottingham Forest,Arsenal,30/01/2024,5.9,4.1,1.41,1,0,99
9,Fulham,Everton,30/01/2024,2.42,3.12,2.59,87,7,6
