# Web Scraper

In [1]:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
import numpy as np
import os
import warnings
import time
from scipy.stats import poisson


# Functions from Module created

# Scraping Functions
from module import scraper_understat, scrape_fbref_xG, scrape_standings_mls, scrape_fbref_NonxG, fixtures_api, fixtures_scraper

# Poisson functions
from module import ud_predict_game_results, ud_predict_game_winner, fbref_predict_game_result, fbref_predict_game_winner, fbref_predict_game_result_Goals, fbref_predict_game_winner_Goals

warnings.filterwarnings('ignore')

## Understat.com

In [47]:
# Defining URLs and their corresponding league names
understat_urls = {
    'La Liga': 'https://understat.com/league/La_liga/',
    'EPL': 'https://understat.com/league/EPL/',
    'Bundesliga': 'https://understat.com/league/Bundesliga/',
    'Serie A': 'https://understat.com/league/Serie_A/',
    'Ligue 1': 'https://understat.com/league/Ligue_1/',
    #'Russia': 'https://understat.com/league/RFPL/'
}

# Initializing empty dictionary to store frames
dfs_u = {}

# Defining target directory
output_dir = 'Standings'
os.makedirs(output_dir, exist_ok=True)

# For loop to o over the URLs and scrape data
for league, url in understat_urls.items():
    try:
        dfs_u[league] = scraper_understat(url)
        # Save the dataframe to a CSV file
        output_path = os.path.join(output_dir, f"{league}_standings.csv")
        dfs_u[league].to_csv(output_path, index=False)
        print(f"Successfully scraped and saved {league} data.")
    except Exception as e:
        print(f"Failed to scrape {league} data: {e}")

Successfully scraped and saved La Liga data.
Successfully scraped and saved EPL data.
Successfully scraped and saved Bundesliga data.
Successfully scraped and saved Serie A data.
Successfully scraped and saved Ligue 1 data.


## Fbref.com - xG Data

In [2]:
# Defining URLs and their corresponding league names
fbref_urls = {
    'Eredivisie': 'https://fbref.com/en/comps/23/2023-2024/2023-2024-Eredivisie-Stats',
    'Bundesliga_2': 'https://fbref.com/en/comps/33/2-Bundesliga-Stats',
    'Jupiler': 'https://fbref.com/en/comps/37/Belgian-Pro-League-Stats',
    'Liga MX': 'https://fbref.com/en/comps/31/Liga-MX-Stats',
    'Primeira Liga': 'https://fbref.com/en/comps/32/Primeira-Liga-Stats',
    'Liga Argentina': 'https://fbref.com/en/comps/21/Primera-Division-Stats',
    'Brasileirao': 'https://fbref.com/en/comps/24/Serie-A-Stats',
    'MLS': 'https://fbref.com/en/comps/22/Major-League-Soccer-Stats',
    
    # Females
    'Premier W': 'https://fbref.com/en/comps/189/Womens-Super-League-Stats',
    'MLS W': 'https://fbref.com/en/comps/182/NWSL-Stats',
    'Spain W': 'https://fbref.com/en/comps/230/Liga-F-Stats',
    'Bundesliga W': 'https://fbref.com/en/comps/183/Frauen-Bundesliga-Stats'
}

# Initializing empty dictionary to store frames
dfs = {}

# Defining target directory
output_dir = 'Standings'
os.makedirs(output_dir, exist_ok=True)

# For loop to o over the URLs and scrape data
for league, url in fbref_urls.items(): 
    
    if league == 'MLS':
        
        try:
            mls_eastern = scrape_fbref_xG(url)
            mls_western = scrape_standings_mls(url)
            mls_st = pd.concat([mls_eastern, mls_western], ignore_index=True)
            dfs[league] = mls_st
            # Save the dataframe to a CSV file
            output_path = os.path.join(output_dir, f"{league}_standings.csv")
            dfs[league].to_csv(output_path, index=False)
            print(f"Successfully scraped and saved {league} data.")
        except Exception as e:
            print(f"Failed to scrape {league} data: {e}")
    else:
        try:
            dfs[league] = scrape_fbref_xG(url)
            # Save the dataframe to a CSV file
            output_path = os.path.join(output_dir, f"{league}_standings.csv")
            dfs[league].to_csv(output_path, index=False)
            print(f"Successfully scraped and saved {league} data.")
        except Exception as e:
            print(f"Failed to scrape {league} data: {e}")
            
    time.sleep(5)

Successfully scraped and saved Eredivisie data.
Successfully scraped and saved Bundesliga_2 data.
Failed to scrape Jupiler data: could not convert string to float: ''
Successfully scraped and saved Liga MX data.
Successfully scraped and saved Primeira Liga data.
Successfully scraped and saved Liga Argentina data.
Successfully scraped and saved Brasileirao data.
Successfully scraped and saved MLS data.
Successfully scraped and saved Premier W data.
Successfully scraped and saved MLS W data.
Failed to scrape Spain W data: could not convert string to float: ''
Failed to scrape Bundesliga W data: could not convert string to float: ''


## FBRef.com - Goals

In [3]:
# Defining URLs and their corresponding league names
fbref_G_urls = {
    'Peru': 'https://fbref.com/en/comps/44/Liga-1-Stats',                      # Apertura, Clausura
    'Ecuador': 'https://fbref.com/en/comps/58/Serie-A-Stats',                  # Apertura, Clausura
    'Paraguay': 'https://fbref.com/en/comps/61/Primera-Division-Stats',        # Apertura, Clausura
    'Uruguay': 'https://fbref.com/en/comps/45/Primera-Division-Stats',         # Apertura, Clausura
    'Chile': 'https://fbref.com/en/comps/35/Primera-Division-Stats',
    'Hungary': 'https://fbref.com/en/comps/46/NB-I-Stats',
    'Romania': 'https://fbref.com/en/comps/47/Liga-I-Stats',
    'Serbia': 'https://fbref.com/en/comps/54/Serbian-SuperLiga-Stats',
    'Turkey': 'https://fbref.com/en/comps/26/Super-Lig-Stats',
    'Ukraine': 'https://fbref.com/en/comps/39/Ukrainian-Premier-League-Stats',
    'Poland': 'https://fbref.com/en/comps/36/Ekstraklasa-Stats',
    'Sweden': 'https://fbref.com/en/comps/29/Allsvenskan-Stats',              # Allsvenskan - Year Calendar
    'Norway': 'https://fbref.com/en/comps/28/Eliteserien-Stats', 
    'Switzerland': 'https://fbref.com/en/comps/57/Swiss-Super-League-Stats',  # Will not work, the table index should be 3 here
    'Bulgaria': 'https://fbref.com/en/comps/67/Bulgarian-First-League-Stats',
    'Austria': 'https://fbref.com/en/comps/56/Austrian-Bundesliga-Stats',
    'Greece': 'https://fbref.com/en/comps/27/Super-League-Greece-Stats',
    'Czechia': 'https://fbref.com/en/comps/66/Czech-First-League-Stats',
    'Croatia': 'https://fbref.com/en/comps/63/Hrvatska-NL-Stats',
    'South Korea': 'https://fbref.com/en/comps/55/K-League-1-Stats',           # Year Calendar
    'Japan': 'https://fbref.com/en/comps/25/J1-League-Stats',                  # Year Calendar
    'Saudi': 'https://fbref.com/en/comps/70/Saudi-Professional-League-Stats', 
    'Denmark': 'https://fbref.com/en/comps/50/Danish-Superliga-Stats',
    
    # Females
    'Brasil W': 'https://fbref.com/en/comps/206/Serie-A1-Stats',
    'Denmark W': 'https://fbref.com/en/comps/340/Kvindeligaen-Stats',
    

}

# Initializing empty dictionary to store frames
# dfs = {}

# Defining target directory
output_dir = 'Standings'
os.makedirs(output_dir, exist_ok=True)

# For loop to o over the URLs and scrape data
for league, url in fbref_G_urls.items():
    try:
        dfs[league] = scrape_fbref_NonxG(url)
        # Save the dataframe to a CSV file
        output_path = os.path.join(output_dir, f"{league}_standings.csv")
        dfs[league].to_csv(output_path, index=False)
        print(f"Successfully scraped and saved {league} data.")
    except Exception as e:
        print(f"Failed to scrape {league} data: {e}")
        
    time.sleep(5)

Successfully scraped and saved Peru data.
Successfully scraped and saved Ecuador data.
Successfully scraped and saved Paraguay data.
Successfully scraped and saved Uruguay data.
Successfully scraped and saved Chile data.
Successfully scraped and saved Hungary data.
Failed to scrape Romania data: could not convert string to float: ''
Successfully scraped and saved Serbia data.
Failed to scrape Turkey data: could not convert string to float: ''
Failed to scrape Ukraine data: could not convert string to float: ''
Successfully scraped and saved Poland data.
Successfully scraped and saved Sweden data.
Successfully scraped and saved Norway data.
Failed to scrape Switzerland data: Length mismatch: Expected axis has 15 elements, new values have 19 elements
Failed to scrape Bulgaria data: could not convert string to float: ''
Failed to scrape Austria data: could not convert string to float: ''
Failed to scrape Greece data: could not convert string to float: ''
Successfully scraped and saved Cze

## Fixture Scraper

### Football-data.org API

In [4]:
## For understat.com data we will be leveraging the API (Except Netherlands and Russia)

# site: https://www.football-data.org/client/home
# Defining the api_key and the url where the requests will be sent
api_key = "c16a0945a1f741b8a1ac14b7246c7595"

# Different leagues url
epl_url = f"https://api.football-data.org/v2/competitions/PL/matches?status=SCHEDULED"
laliga_url = f"https://api.football-data.org/v2/competitions/PD/matches?status=SCHEDULED"
bundesliga_url = f"https://api.football-data.org/v2/competitions/BL1/matches?status=SCHEDULED"
seriea_url = f"https://api.football-data.org/v2/competitions/SA/matches?status=SCHEDULED"
ligue1_url = f"https://api.football-data.org/v2/competitions/FL1/matches?status=SCHEDULED"

# Defining headers
headers = {"X-Auth-Token": api_key}

# Extracting data
epl_fixtures = fixtures_api(epl_url, headers)
laliga_fixtures = fixtures_api(laliga_url, headers)
bundesliga_fixtures = fixtures_api(bundesliga_url, headers)
seriea_fixtures = fixtures_api(seriea_url, headers)
ligue1_fixtures = fixtures_api(ligue1_url, headers)
#russia_fixtures = get_fixtures_url('https://fbref.com/en/comps/30/Russian-Premier-League-Stats')

# Stores in memory only ~ for now

Sucessfully extracted data for https://api.football-data.org/v2/competitions/PL/matches?status=SCHEDULED
Sucessfully extracted data for https://api.football-data.org/v2/competitions/PD/matches?status=SCHEDULED
Sucessfully extracted data for https://api.football-data.org/v2/competitions/BL1/matches?status=SCHEDULED
Sucessfully extracted data for https://api.football-data.org/v2/competitions/SA/matches?status=SCHEDULED
Sucessfully extracted data for https://api.football-data.org/v2/competitions/FL1/matches?status=SCHEDULED


### Fbref.com Fixtures

In [5]:
# Defining a function to extract pattern from standings url and transform url into fixtures url
# No need to store in Module since it is small
def get_fixtures_url(standings_url):
    base_url = standings_url.rsplit('/', 1)[0]
    competition_id = standings_url.split('/')[-2]
    return f"{base_url}/schedule/{competition_id}-Scores-and-Fixtures"

# Defining new dictionary
fixtures_url = {}

# Getting new urls for FBRef leagues with xG data
for country, url in fbref_urls.items():
    fixtures_url[country] = get_fixtures_url(url)
    
# Getting new urls for FBRef leagues with Non-xG data
for country, url in fbref_G_urls.items():
    fixtures_url[country] = get_fixtures_url(url)
    
fixtures_url

{'Eredivisie': 'https://fbref.com/en/comps/23/2023-2024/schedule/2023-2024-Scores-and-Fixtures',
 'Bundesliga_2': 'https://fbref.com/en/comps/33/schedule/33-Scores-and-Fixtures',
 'Jupiler': 'https://fbref.com/en/comps/37/schedule/37-Scores-and-Fixtures',
 'Liga MX': 'https://fbref.com/en/comps/31/schedule/31-Scores-and-Fixtures',
 'Primeira Liga': 'https://fbref.com/en/comps/32/schedule/32-Scores-and-Fixtures',
 'Liga Argentina': 'https://fbref.com/en/comps/21/schedule/21-Scores-and-Fixtures',
 'Brasileirao': 'https://fbref.com/en/comps/24/schedule/24-Scores-and-Fixtures',
 'MLS': 'https://fbref.com/en/comps/22/schedule/22-Scores-and-Fixtures',
 'Premier W': 'https://fbref.com/en/comps/189/schedule/189-Scores-and-Fixtures',
 'MLS W': 'https://fbref.com/en/comps/182/schedule/182-Scores-and-Fixtures',
 'Spain W': 'https://fbref.com/en/comps/230/schedule/230-Scores-and-Fixtures',
 'Bundesliga W': 'https://fbref.com/en/comps/183/schedule/183-Scores-and-Fixtures',
 'Peru': 'https://fbref.c

In [6]:
# Creating new dictionary for storing scraped data
fixtures_data = {}

# Scrapping the data and storing n new dictionary
for league, url in fixtures_url.items():
    key = f'{league} Fixture'
    try:
        fixtures_data[key] = fixtures_scraper(url)
        print(f'Success: Fixtures for {league}')
    except Exception as e:
        print(f'Failed to scrape {league} data: {e}')
        continue
    time.sleep(5)

Success: Fixtures for Eredivisie
Success: Fixtures for Bundesliga_2
Success: Fixtures for Jupiler
Success: Fixtures for Liga MX
Success: Fixtures for Primeira Liga
Success: Fixtures for Liga Argentina
Success: Fixtures for Brasileirao
Success: Fixtures for MLS
Success: Fixtures for Premier W
Success: Fixtures for MLS W
Success: Fixtures for Spain W
Success: Fixtures for Bundesliga W
Success: Fixtures for Peru
Success: Fixtures for Ecuador
Success: Fixtures for Paraguay
Success: Fixtures for Uruguay
Success: Fixtures for Chile
Success: Fixtures for Hungary
Success: Fixtures for Romania
Success: Fixtures for Serbia
Success: Fixtures for Turkey
Success: Fixtures for Ukraine
Success: Fixtures for Poland
Success: Fixtures for Sweden
Success: Fixtures for Norway
Success: Fixtures for Switzerland
Success: Fixtures for Bulgaria
Success: Fixtures for Austria
Success: Fixtures for Greece
Success: Fixtures for Czechia
Success: Fixtures for Croatia
Success: Fixtures for South Korea
Success: Fixtur

##### ** Data Engineering section finalized

## Poisson Modeling

In [7]:
leagues_with_8_teams = []
leagues_with_9_teams = []

predictions_dict_ou = {}
predictions_dict_h2h = {}

### Understat

In [8]:
# For loop to o over the URLs and scrape data
for league, df in dfs_u.items():
    
    # Dynamically access fixture dataframe based on league name
    df_fixtures = globals()[f'{league.lower().replace(" ","")}_fixtures']
    
    # Determine fixture records to process - in relation to the number of teams in the league
    if league in leagues_with_8_teams:
        df_matchdays = df_fixtures.loc[0:7]
    elif league in leagues_with_9_teams:
        df_matchdays = df_fixtures.loc[0:8]
    else:
        df_matchdays = df_fixtures.loc[0:0]
    
    # ******************* O/U Predictions ******************* #
    
    try:
        
        # creating an empty list to store the predictions for each game
        predictions = []

        for i, row in df_matchdays.iterrows():
            home_team = row['home_team']
            away_team = row['away_team']
            #print(f"Match {i+1}: {home_team} vs {away_team}")
            #print(" ")
            result = ud_predict_game_results(home_team, away_team, df)
            predictions.append({
                'league': league,
                'Source': 'ud',
                'home_team': home_team,
                'away_team': away_team,
                '+1.5(%)' : result['prob_over_1_goal'],
                '+2.5(%)': result['prob_over_2_goals'],
                '+3.5(%)': result['prob_over_3_goals'],
                'H+1.5(%)': result['h_+1.5'],
                'A+1.5(%)': result['a_+1.5'],
                #'AA(%)': result['AA'],
                'xG': result['expected_goals']

            })

        
        # Appending into the dictionary
        predictions_dict_ou[league] = predictions
        print(f'Sucess O/U for {league}') 
        
    except Exception as e:
        print(f"Failed to generate O/U predictions for {league} data: {e}")
        
    # ******************* H2H Predictions ******************* #
    
    try:
        
        # creating an empty list to store the predictions for each game
        predictions_h2h = []

        for i, row in df_matchdays.iterrows():
            home_team = row['home_team']
            away_team = row['away_team']


            #print(f"Match {i+1}: {home_team} vs {away_team}")
            #print(" ")
            result_h2h = ud_predict_game_winner(home_team, away_team, df)
            predictions_h2h.append({
                'League': league,
                'Source': 'ud',
                'home_team': home_team,
                'away_team': away_team,
                'Home (%)': result_h2h['home_win_prob'],
                'Draw (%)': result_h2h['draw_prob'],
                'Away (%)': result_h2h['away_win_prob'],
            })
            #print(result_h2h)
            #print(" ")

            # Appending into dictionary
            predictions_dict_h2h[league] = predictions
            print(f'Success H2H for {league}')
        
    except Exception as e:
        print(f"Failed to generate H2H predictions for {league} data: {e}")

NameError: name 'dfs_u' is not defined

### FBref xG

In [66]:
# For loop to o over the URLs and scrape data
for league, df in dfs.items():
    
    # Dynamically access fixture dataframe based on league name
    # df_fixtures = globals()[f'{league.lower().replace(" ","")}_fixtures']
    df_fixtures = fixtures_data[f'{league} Fixture']
    
    # Determine fixture records to process - in relation to the number of teams in the league
    if league in leagues_with_8_teams:
        df_matchdays = df_fixtures.loc[0:7]
    elif league in leagues_with_9_teams:
        df_matchdays = df_fixtures.loc[0:8]
    else:
        df_matchdays = df_fixtures.loc[0:0]
    
    # ******************* O/U Predictions ******************* #
    
    # ****************** Predictions for Leagues with xG data ******************* #
    
    try:
        
        # creating an empty list to store the predictions for each game
        predictions = []

        for i, row in df_matchdays.iterrows():
            home_team = row['home_team']
            away_team = row['away_team']
            #print(f"Match {i+1}: {home_team} vs {away_team}")
            #print(" ")
            result = fbref_predict_game_result(home_team, away_team, df)
            predictions.append({
                'league': league,
                'Source': 'fbxg',
                'home_team': home_team,
                'away_team': away_team,
                '+1.5(%)' : result['prob_over_1_goal'],
                '+2.5(%)': result['prob_over_2_goals'],
                '+3.5(%)': result['prob_over_3_goals'],
                'H+1.5(%)': result['h_+1.5'],
                'A+1.5(%)': result['a_+1.5'],
                #'AA(%)': result['AA'],
                'xG': result['expected_goals']

            })

        
        # Appending into the dictionary
        predictions_dict_ou[league] = predictions
        print(f'Sucess O/U for {league}') 
        
    
        
    except Exception as e:
        
        try:
            
            # creating an empty list to store the predictions for each game
            predictions = []

            for i, row in df_matchdays.iterrows():
                home_team = row['home_team']
                away_team = row['away_team']
                #print(f"Match {i+1}: {home_team} vs {away_team}")
                #print(" ")
                result = fbref_predict_game_result_Goals(home_team, away_team, df)
                predictions.append({
                    'league': league,
                    'Source': 'fbxg_n',
                    'home_team': home_team,
                    'away_team': away_team,
                    '+1.5(%)' : result['prob_over_1_goal'],
                    '+2.5(%)': result['prob_over_2_goals'],
                    '+3.5(%)': result['prob_over_3_goals'],
                    'H+1.5(%)': result['h_+1.5'],
                    'A+1.5(%)': result['a_+1.5'],
                    #'AA(%)': result['AA'],
                    'xG': result['expected_goals']

                })


            # Appending into the dictionary
            predictions_dict_ou[league] = predictions
            print(f'Sucess O/U for {league}') 
            
        except Exception as e:
            print(f"Failed to generate O/U predictions for {league} data: {e}")
        
        
    # ******************* H2H Predictions ******************* #
    
    try:
        
        # creating an empty list to store the predictions for each game
        predictions_h2h = []

        for i, row in df_matchdays.iterrows():
            home_team = row['home_team']
            away_team = row['away_team']


            #print(f"Match {i+1}: {home_team} vs {away_team}")
            #print(" ")
            result_h2h = fbref_predict_game_winner(home_team, away_team, df)
            predictions_h2h.append({
                'League': league,
                'Source': 'fbxg',
                'home_team': home_team,
                'away_team': away_team,
                'Home (%)': result_h2h['home_win_prob'],
                'Draw (%)': result_h2h['draw_prob'],
                'Away (%)': result_h2h['away_win_prob'],
            })
            #print(result_h2h)
            #print(" ")

            # Appending into dictionary
            predictions_dict_h2h[league] = predictions
            print(f'Success H2H for {league}')
        
    except Exception as e:
        
        try:
        
            # creating an empty list to store the predictions for each game
            predictions_h2h = []

            for i, row in df_matchdays.iterrows():
                home_team = row['home_team']
                away_team = row['away_team']


                #print(f"Match {i+1}: {home_team} vs {away_team}")
                #print(" ")
                result_h2h = fbref_predict_game_winner_Goals(home_team, away_team, df)
                predictions_h2h.append({
                    'League': league,
                    'Source': 'fbxg',
                    'home_team': home_team,
                    'away_team': away_team,
                    'Home (%)': result_h2h['home_win_prob'],
                    'Draw (%)': result_h2h['draw_prob'],
                    'Away (%)': result_h2h['away_win_prob'],
                })
                #print(result_h2h)
                #print(" ")

                # Appending into dictionary
                predictions_dict_h2h[league] = predictions
                print(f'Success H2H for {league}')
                
        except Exception as e:
            print(f"Failed to generate H2H predictions for {league} data: {e}") 

Sucess O/U for Eredivisie
Sucess O/U for Liga MX
Success H2H for Liga MX
Sucess O/U for Liga Argentina
Success H2H for Liga Argentina
Sucess O/U for Brasileirao
Success H2H for Brasileirao
Sucess O/U for MLS
Success H2H for MLS
Sucess O/U for Premier W
Sucess O/U for MLS W
Success H2H for MLS W
Sucess O/U for Spain W
Sucess O/U for Bundesliga W
Sucess O/U for Peru
Success H2H for Peru
Sucess O/U for Ecuador
Success H2H for Ecuador
Sucess O/U for Paraguay
Success H2H for Paraguay
Sucess O/U for Uruguay
Success H2H for Uruguay
Sucess O/U for Chile
Success H2H for Chile
Sucess O/U for Poland
Success H2H for Poland
Sucess O/U for Sweden
Success H2H for Sweden
Sucess O/U for Norway
Success H2H for Norway
Sucess O/U for Czechia
Success H2H for Czechia
Sucess O/U for Japan
Success H2H for Japan


# Data Consolidation

In [73]:
flat_data_ou = [item for sublist in predictions_dict_ou.values() for item in sublist]

# Creating DataFrame
df_ou = pd.DataFrame(flat_data_ou)

# Displaying DataFrame
df_ou

Unnamed: 0,league,Source,home_team,away_team,+1.5(%),+2.5(%),+3.5(%),H+1.5(%),A+1.5(%),xG
0,La Liga,ud,Athletic Club,Getafe,0.920103,0.746304,0.523122,0.718437,0.098971,4.819945
1,EPL,ud,Manchester United,Fulham,0.922523,0.72531,0.464594,0.615335,0.285897,5.038781
2,Bundesliga,ud,Borussia M.Gladbach,Bayer Leverkusen,0.928202,0.761308,0.538863,0.12368,0.727621,4.99308
3,Serie A,ud,Genoa,Inter,0.806997,0.541225,0.302305,0.050744,0.5167,3.565097
4,Ligue 1,ud,Brest,Marseille,0.778519,0.483161,0.241452,0.436066,0.083511,3.439446
5,Liga MX,fbxg,Necaxa,FC Juárez,0.992955,0.958067,0.8704,0.938031,0.323324,8.0
6,Liga Argentina,fbxg,Instituto,Platense,0.667974,0.334933,0.128502,0.269379,0.089724,2.85
7,Brasileirao,fbxg,Juventude,Botafogo (RJ),0.603599,0.267889,0.088939,0.19803,0.08711,2.563636
8,MLS,fbxg,Vancouver W'caps,LAFC,0.945805,0.777561,0.518787,0.556137,0.498855,5.578512
9,MLS W,fbxg,Wave,Angel City,0.718143,0.384584,0.154824,0.284724,0.13961,3.142857


In [72]:
flat_data_h2h = [item for sublist in predictions_dict_h2h.values() for item in sublist]

# Creating DataFrame
df_h2h = pd.DataFrame(flat_data_h2h)

# Displaying DataFrame
df_h2h

Unnamed: 0,league,Source,home_team,away_team,+1.5(%),+2.5(%),+3.5(%),H+1.5(%),A+1.5(%),xG
0,La Liga,ud,Athletic Club,Getafe,0.920103,0.746304,0.523122,0.718437,0.098971,4.819945
1,EPL,ud,Manchester United,Fulham,0.922523,0.72531,0.464594,0.615335,0.285897,5.038781
2,Bundesliga,ud,Borussia M.Gladbach,Bayer Leverkusen,0.928202,0.761308,0.538863,0.12368,0.727621,4.99308
3,Serie A,ud,Genoa,Inter,0.806997,0.541225,0.302305,0.050744,0.5167,3.565097
4,Ligue 1,ud,Brest,Marseille,0.778519,0.483161,0.241452,0.436066,0.083511,3.439446
5,Liga MX,fbxg,Necaxa,FC Juárez,0.992955,0.958067,0.8704,0.938031,0.323324,8.0
6,Liga Argentina,fbxg,Instituto,Platense,0.667974,0.334933,0.128502,0.269379,0.089724,2.85
7,Brasileirao,fbxg,Juventude,Botafogo (RJ),0.603599,0.267889,0.088939,0.19803,0.08711,2.563636
8,MLS,fbxg,Vancouver W'caps,LAFC,0.945805,0.777561,0.518787,0.556137,0.498855,5.578512
9,MLS W,fbxg,Wave,Angel City,0.718143,0.384584,0.154824,0.284724,0.13961,3.142857


In [74]:
'''

import nbformat
from nbconvert import PythonExporter

# Load the notebook
with open('Scraper.ipynb', 'r') as notebook_file:
    notebook_content = notebook_file.read()

# Parse the notebook
notebook = nbformat.reads(notebook_content, as_version=4)

# Convert notebook to Python script
python_exporter = PythonExporter()
python_script, _ = python_exporter.from_notebook_node(notebook)

# Write the Python script to a file
with open('ScraperScript.py', 'w') as script_file:
    script_file.write(python_script)
'''

In [79]:
from datetime import datetime

### Saving dataframes into csv's #####
today = datetime.today()

today = today.strftime("%m/%d/%Y")

In [80]:
today

'08/23/2024'