<a href="https://colab.research.google.com/github/fopamesmin/4MAc/blob/main/_league1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests as req
from bs4 import BeautifulSoup as BS
import logging
import pandas as pd
from google.colab import drive

# Constantes
sofifa_url = "https://www.ligue1.com/ranking"
COLUMNS = ["position", "team", "points", "played", "won", "drawn", "lost", "gf", "ga"]
ID_seasonId = ["2020/2021", "2021/2022", "2022/2023"]
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
logging.basicConfig(level=logging.INFO)

def fetch_data(season):
    teams_list = []
    response = req.get(sofifa_url, headers=headers)
    if response.status_code == req.codes.ok:
        soup = BS(response.content, "lxml")
        ul_list = soup.select("div.classement-table-body ul")

        for ul in ul_list:
            li_list = ul.find_all("li", class_="GeneralStats-row")

            for li in li_list:
                team_data = []
                try:
                    position = li.find("div", class_="GeneralStats-item--position").text.strip()
                    team = li.find("div", class_="GeneralStats-item--club").find("span", class_="desktop-item").text.strip()
                    points = li.find("div", class_="GeneralStats-item--points").text.strip()
                    stats = li.find_all("div", class_="GeneralStats-item")

                    if len(stats) >= 11:  # Vérifier qu'il y a suffisamment d'éléments dans la liste
                        played = stats[3].text.strip()
                        won = stats[4].text.strip()
                        drawn = stats[5].text.strip()
                        lost = stats[6].text.strip()
                        gf = stats[7].text.strip()
                        ga = stats[8].text.strip()
                    else:
                        logging.error(f"Not enough stats found for team: {team}")
                        continue

                    team_data.extend([season, position, team, points, played, won, drawn, lost, gf, ga])
                    teams_list.append(team_data)
                except Exception as e:
                    logging.error(f"Error parsing row: {e}")
                    continue
    else:
        logging.error(f"Failed to retrieve data for season {season}")
    return teams_list

# Monter Google Drive
drive.mount('/content/drive')

# Extraction des données pour toutes les saisons
all_teams_list = []
for season in ID_seasonId:
    all_teams_list.extend(fetch_data(season))

# Convertir la liste en DataFrame pandas
df = pd.DataFrame(all_teams_list, columns=["Season", "Position", "Team", "Points", "Played", "Won", "Drawn", "Lost", "GF", "GA"])

# Convertir les colonnes en types numériques
df['Points'] = pd.to_numeric(df['Points'], errors='coerce')
df['Played'] = pd.to_numeric(df['Played'], errors='coerce')
df['Won'] = pd.to_numeric(df['Won'], errors='coerce')
df['Drawn'] = pd.to_numeric(df['Drawn'], errors='coerce')
df['Lost'] = pd.to_numeric(df['Lost'], errors='coerce')
df['GF'] = pd.to_numeric(df['GF'], errors='coerce')
df['GA'] = pd.to_numeric(df['GA'], errors='coerce')

# Enregistrer le dataframe dans Google Drive
file_path = '/content/drive/My Drive/ligue1_data.csv'
df.to_csv(file_path, index=False)

print(f"Données enregistrées dans Google Drive à l'emplacement: {file_path}")

Mounted at /content/drive
Données enregistrées dans Google Drive à l'emplacement: /content/drive/My Drive/ligue1_data.csv
