<a href="https://colab.research.google.com/github/fopamesmin/movie-project/blob/main/premi%C3%A8religue.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import requests as req
from bs4 import BeautifulSoup as BS
import logging
import pandas as pd
from tabulate import tabulate

# Constantes
premier_league_url = "https://www.premierleague.com/tables"
COLUMNS = ["Position", "Club", "Played", "Won", "Drawn", "Lost", "GF", "GA", "GD", "Points"]
ID_seasonId = ["2020/2021", "2021/2022", "2022/2023"]
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'
}
logging.basicConfig(level=logging.INFO)

def fetch_data(season):
    teams_list = []
    response = req.get(premier_league_url, headers=headers)
    if response.status_code == req.codes.ok:
        soup = BS(response.content, "html.parser")
        tbody = soup.find("tbody", class_="league-table__tbody")

        if tbody:
            rows = tbody.find_all("tr")

            for row in rows:
                team_data = []
                try:
                    position_span = row.find("span", class_="league-table__value")
                    team_td = row.find("td", class_="league-table__team")
                    stats = row.find_all("td")

                    if position_span and team_td and len(stats) >= 10:
                        position = position_span.text.strip()
                        team = team_td.text.strip()

                        played = stats[2].text.strip()
                        won = stats[3].text.strip()
                        drawn = stats[4].text.strip()
                        lost = stats[5].text.strip()
                        gf = stats[6].text.strip()
                        ga = stats[7].text.strip()
                        gd = stats[8].text.strip()
                        points = stats[9].text.strip()

                        team_data.extend([season, position, team, played, won, drawn, lost, gf, ga, gd, points])
                        teams_list.append(team_data)
                except Exception as e:
                    logging.error(f"Error parsing row: {e}")
                    continue
        else:
            logging.error("Table body not found.")
    else:
        logging.error("Failed to retrieve data.")

    return teams_list

# Extraction des données pour toutes les saisons
all_teams_list = []
for season in ID_seasonId:
    all_teams_list.extend(fetch_data(season))

# Convertir la liste en DataFrame pandas
df = pd.DataFrame(all_teams_list, columns=["Season", "Position", "Club", "Played", "Won", "Drawn", "Lost", "GF", "GA", "GD", "Points"])

# Convertir les colonnes en types numériques
df['Played'] = pd.to_numeric(df['Played'], errors='coerce')
df['Won'] = pd.to_numeric(df['Won'], errors='coerce')
df['Drawn'] = pd.to_numeric(df['Drawn'], errors='coerce')
df['Lost'] = pd.to_numeric(df['Lost'], errors='coerce')
df['GF'] = pd.to_numeric(df['GF'], errors='coerce')
df['GA'] = pd.to_numeric(df['GA'], errors='coerce')
df['GD'] = pd.to_numeric(df['GD'], errors='coerce')
df['Points'] = pd.to_numeric(df['Points'], errors='coerce')

# Calculer les statistiques totales
total_played = df['Played'].sum()
total_drawn = df['Drawn'].sum()
total_won = df['Won'].sum()

# Calculer les pourcentages totaux
total_draw_percentage = (total_drawn / total_played) * 100 if total_played > 0 else 0
total_win_percentage = (total_won / total_played) * 100 if total_played > 0 else 0

# Afficher les résultats
print(tabulate(df, headers="keys", tablefmt="grid"))

print("\nPourcentage total de matchs nuls:", total_draw_percentage)
print("Pourcentage total de matchs gagnés:", total_win_percentage)

+----+-----------+------------+--------------------------+----------+-------+---------+--------+------+------+------+----------+
|    | Season    |   Position | Club                     |   Played |   Won |   Drawn |   Lost |   GF |   GA |   GD |   Points |
|  0 | 2020/2021 |          1 | Manchester City          |       38 |    28 |       7 |      3 |   96 |   34 |   62 |       91 |
|    |           |            | MCI                      |          |       |         |        |      |      |      |          |
+----+-----------+------------+--------------------------+----------+-------+---------+--------+------+------+------+----------+
|  1 | 2020/2021 |          2 | Arsenal                  |       38 |    28 |       5 |      5 |   91 |   29 |   62 |       89 |
|    |           |            | ARS                      |          |       |         |        |      |      |      |          |
+----+-----------+------------+--------------------------+----------+-------+---------+--------+-

In [5]:
import pandas as pd

# Exemple de données extraites précédemment pour les saisons 2020/2021, 2021/2022 et 2022/2023
# Assurez-vous d'avoir ces données sous forme de DataFrame pandas avec les colonnes nécessaires

# Exemple de DataFrame, remplacez-le par les données réelles extraites
data = {
    'Season': ['2020/2021']*20 + ['2021/2022']*20 + ['2022/2023']*20,
    'Played': [38]*60,  # Remplacez par les données réelles
    'Won': [20, 18, 22, 24, 19, 20, 16, 15, 17, 18, 15, 14, 13, 12, 10, 8, 7, 6, 5, 4]*3,  # Remplacez par les données réelles
    'Drawn': [8, 10, 6, 4, 9, 8, 12, 13, 11, 10, 13, 14, 15, 16, 18, 20, 21, 22, 23, 24]*3,  # Remplacez par les données réelles
    'Lost': [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10]*3,  # Remplacez par les données réelles
}

df = pd.DataFrame(data)

# Calcul des moyennes historiques des pourcentages de matchs nuls et de matchs gagnés
average_draw_percentage = (df.groupby('Season')['Drawn'].sum() / df.groupby('Season')['Played'].sum()) * 100
average_win_percentage = (df.groupby('Season')['Won'].sum() / df.groupby('Season')['Played'].sum()) * 100

# Prédiction pour la saison 2024
predicted_draw_percentage_2024 = average_draw_percentage.mean()
predicted_win_percentage_2024 = average_win_percentage.mean()

# Affichage des prédictions
print("\nPrédictions pour la saison 2024:")
print(f"Pourcentage prédit de matchs nuls: {predicted_draw_percentage_2024:.2f}%")
print(f"Pourcentage prédit de matchs gagnés: {predicted_win_percentage_2024:.2f}%")


Prédictions pour la saison 2024:
Pourcentage prédit de matchs nuls: 36.45%
Pourcentage prédit de matchs gagnés: 37.24%
