<a href="https://colab.research.google.com/github/fopamesmin/4MAc/blob/main/_premi%C3%A8re_league.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import requests as req
from bs4 import BeautifulSoup as BS
import logging
import pandas as pd
from google.colab import drive

# Constantes
premier_league_url = "https://www.premierleague.com/tables"
COLUMNS = ["Position", "Club", "Played", "Won", "Drawn", "Lost", "GF", "GA", "GD", "Points"]
ID_seasonId = ["2020/2021", "2021/2022", "2022/2023"]
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'
}
logging.basicConfig(level=logging.INFO)

def fetch_data(season):
    teams_list = []
    response = req.get(premier_league_url, headers=headers)
    if response.status_code == req.codes.ok:
        soup = BS(response.content, "html.parser")
        tbody = soup.find("tbody", class_="league-table__tbody")

        if tbody:
            rows = tbody.find_all("tr")

            for row in rows:
                team_data = []
                try:
                    position_span = row.find("span", class_="league-table__value")
                    team_td = row.find("td", class_="league-table__team")
                    stats = row.find_all("td")

                    if position_span and team_td and len(stats) >= 10:
                        position = position_span.text.strip()
                        team = team_td.text.strip()

                        played = stats[2].text.strip()
                        won = stats[3].text.strip()
                        drawn = stats[4].text.strip()
                        lost = stats[5].text.strip()
                        gf = stats[6].text.strip()
                        ga = stats[7].text.strip()
                        gd = stats[8].text.strip()
                        points = stats[9].text.strip()

                        team_data.extend([season, position, team, played, won, drawn, lost, gf, ga, gd, points])
                        teams_list.append(team_data)
                except Exception as e:
                    logging.error(f"Error parsing row: {e}")
                    continue
        else:
            logging.error("Table body not found.")
    else:
        logging.error("Failed to retrieve data.")

    return teams_list



# Extraction des données pour toutes les saisons
all_teams_list = []
for season in ID_seasonId:
    all_teams_list.extend(fetch_data(season))

# Convertir la liste en DataFrame pandas
df = pd.DataFrame(all_teams_list, columns=["Season"] + COLUMNS)

# Convertir les colonnes en types numériques
df['Played'] = pd.to_numeric(df['Played'], errors='coerce')
df['Won'] = pd.to_numeric(df['Won'], errors='coerce')
df['Drawn'] = pd.to_numeric(df['Drawn'], errors='coerce')
df['Lost'] = pd.to_numeric(df['Lost'], errors='coerce')
df['GF'] = pd.to_numeric(df['GF'], errors='coerce')
df['GA'] = pd.to_numeric(df['GA'], errors='coerce')
df['GD'] = pd.to_numeric(df['GD'], errors='coerce')
df['Points'] = pd.to_numeric(df['Points'], errors='coerce')

# Monter Google Drive
drive.mount('/content/drive')

# Enregistrer le dataframe dans Google Drive
file_path = '/content/drive/My Drive/test10premier_league_data.csv'
df.to_csv(file_path, header=True, index=False)

print(f"Données enregistrées dans Google Drive à l'emplacement: {file_path}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Données enregistrées dans Google Drive à l'emplacement: /content/drive/My Drive/test10premier_league_data.csv
