In [57]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from datetime import datetime
from scrapping import *

In [22]:

def rate_limit():
    """
    Fonction pour limiter le taux de requêtes et éviter de surcharger le serveur.
    Utilise un délai d'attente entre les requêtes pour respecter les limitations.
    """
    MIN_REQUEST_INTERVAL = 2.5
    last_request_time = getattr(rate_limit, "last_request_time", None)
    if last_request_time is not None:
        elapsed_time = time.time() - last_request_time
        if elapsed_time < MIN_REQUEST_INTERVAL:
            time.sleep(MIN_REQUEST_INTERVAL - elapsed_time)
    rate_limit.last_request_time = time.time()


In [23]:
def scrape_latest_ligue1_data():
    """
    Récupère les dernières données disponibles pour chaque équipe de la Ligue 1.

    Returns:
        DataFrame: Un DataFrame Pandas avec les dernières données de chaque équipe.
    """
    # Configuration initiale similaire à scrape_ligue1_data()
    url_ligue1 = "https://fbref.com/en/comps/13/Ligue-1-Stats"
    headers = {'User-Agent': 'Mozilla/5.0'}
    latest_data = []

    response = requests.get(url_ligue1, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    teams_urls = ["https://fbref.com" + equipe.get("href") 
                  for equipe in soup.select("table.stats_table")[0].find_all("a") 
                  if "squads" in equipe.get("href", "")]

    # Traitement similaire à scrape_ligue1_data() pour chaque équipe
    for team_url in teams_urls:
        rate_limit()
        team_response = requests.get(team_url, headers=headers)
        team_data = pd.read_html(team_response.text, match="Scores")[0]
        team_name = team_url.split("/")[-1].replace("-Stats", "").replace("-", " ")
        team_data["Team"] = team_name
        latest_data.append(team_data)

    # Retourne les données concaténées de toutes les équipes
    return pd.concat(latest_data, ignore_index=True)


In [48]:
A = scrape_latest_ligue1_data()

  team_data = pd.read_html(team_response.text, match="Scores")[0]
  team_data = pd.read_html(team_response.text, match="Scores")[0]
  team_data = pd.read_html(team_response.text, match="Scores")[0]
  team_data = pd.read_html(team_response.text, match="Scores")[0]
  team_data = pd.read_html(team_response.text, match="Scores")[0]
  team_data = pd.read_html(team_response.text, match="Scores")[0]
  team_data = pd.read_html(team_response.text, match="Scores")[0]
  team_data = pd.read_html(team_response.text, match="Scores")[0]
  team_data = pd.read_html(team_response.text, match="Scores")[0]
  team_data = pd.read_html(team_response.text, match="Scores")[0]
  team_data = pd.read_html(team_response.text, match="Scores")[0]
  team_data = pd.read_html(team_response.text, match="Scores")[0]
  team_data = pd.read_html(team_response.text, match="Scores")[0]
  team_data = pd.read_html(team_response.text, match="Scores")[0]
  team_data = pd.read_html(team_response.text, match="Scores")[0]
  team_dat

In [49]:
A['DateTime'] = pd.to_datetime(A['Date'] + ' ' + A['Time'])
A.drop(["Date", "Time"], axis=1, inplace=True)

In [50]:
A = A[['DateTime'] + [col for col in A if col != 'DateTime']]


In [51]:

mapping_equipe = {
    'Nimes': 'Nîmes',
    'Paris S-G': 'Paris Saint Germain',
    'Saint Etienne': 'Saint-Étienne'
}

In [52]:

A['Team'] = A['Team'].map(mapping_equipe).fillna(A['Team'])

# Garder que les matchs 'Ligue 1' (pas de matchs de Coupe)
A = A[A["Comp"] == "Ligue 1"]

# Extraire uniquement le numéro de chaque journée
A['Round'] = A['Round'].str.extract(r'(\d+)').astype(int)


In [53]:
A.sort_values(by="DateTime")

Unnamed: 0,DateTime,Comp,Round,Day,Venue,Result,GF,GA,Opponent,xG,xGA,Poss,Attendance,Captain,Formation,Referee,Match Report,Notes,Team
149,2023-08-11 21:00:00,Ligue 1,1,Fri,Away,D,1.0,1.0,Nice,1.3,1.2,60.0,29436.0,Benjamin André,4-2-3-1,Benoît Bastien,Match Report,,Lille
44,2023-08-11 21:00:00,Ligue 1,1,Fri,Home,D,1.0,1.0,Lille,1.2,1.3,40.0,29436.0,Dante,4-3-3,Benoît Bastien,Match Report,,Nice
193,2023-08-12 17:00:00,Ligue 1,1,Sat,Home,W,2,1,Reims,1.0,0.8,42.0,63787.0,Valentin Rongier,4-4-2,Eric Wattellier,Match Report,,Marseille
281,2023-08-12 17:00:00,Ligue 1,1,Sat,Away,L,1.0,2.0,Marseille,0.8,1.0,58.0,63787.0,Yunis Abdelhamid,4-2-3-1,Eric Wattellier,Match Report,,Reims
0,2023-08-12 21:00:00,Ligue 1,1,Sat,Home,D,0.0,0.0,Lorient,1.2,0.1,78.0,47000.0,Danilo Pereira,4-3-3,Bastien Dechepy,Match Report,,Paris Saint Germain
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
676,NaT,Ligue 1,28,Sun,Away,,,,Paris S-G,,,,,,,,Head-to-Head,,Clermont Foot
677,NaT,Ligue 1,29,Sun,Home,,,,Montpellier,,,,,,,,Head-to-Head,,Clermont Foot
678,NaT,Ligue 1,30,Sun,Away,,,,Lens,,,,,,,,Head-to-Head,,Clermont Foot
679,NaT,Ligue 1,31,Sun,Home,,,,Reims,,,,,,,,Head-to-Head,,Clermont Foot


In [54]:
aujourdhui = datetime.now()

A = A[(A['DateTime'] >= aujourdhui) & (A["Round"] == 18)]

In [56]:
A.to_csv("A.csv")

In [59]:
a = scrape_latest_ligue1_data()

  team_data = pd.read_html(team_response.text, match="Scores")[0]
  detailed_stats = pd.read_html(stats_response.text)[0]
  detailed_stats = pd.read_html(stats_response.text)[0]
  detailed_stats = pd.read_html(stats_response.text)[0]
  detailed_stats = pd.read_html(stats_response.text)[0]
  detailed_stats = pd.read_html(stats_response.text)[0]
  team_data = pd.read_html(team_response.text, match="Scores")[0]
  detailed_stats = pd.read_html(stats_response.text)[0]
  detailed_stats = pd.read_html(stats_response.text)[0]
  detailed_stats = pd.read_html(stats_response.text)[0]
  detailed_stats = pd.read_html(stats_response.text)[0]
  detailed_stats = pd.read_html(stats_response.text)[0]
  team_data = pd.read_html(team_response.text, match="Scores")[0]
  detailed_stats = pd.read_html(stats_response.text)[0]
  detailed_stats = pd.read_html(stats_response.text)[0]
  detailed_stats = pd.read_html(stats_response.text)[0]
  detailed_stats = pd.read_html(stats_response.text)[0]
  detailed_stats =

In [68]:
D = add_new_matches(base_initiale=b, base_nouvelle=a)

In [60]:
b = pd.read_csv("/home/onyxia/work/Fbref_model/Projet-python/Fbref_alex/SOCCER_201223_18h.csv")