<a href="https://colab.research.google.com/github/fopamesmin/movie-project/blob/main/ligue.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install colorama

Collecting colorama
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: colorama
Successfully installed colorama-0.4.6


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
dataset_url = "/content/drive/MyDrive/DataSet"

In [None]:
import requests as req
from bs4 import BeautifulSoup as BS
import logging
import pandas as pd
from tabulate import tabulate

# Constantes
sofifa_url = "https://www.ligue1.com/ranking"
COLUMNS = ["position", "team", "points", "played", "won", "drawn", "lost", "gf", "ga"]
ID_seasonId = ["2020/2021", "2021/2022", "2022/2023"]
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
logging.basicConfig(level=logging.INFO)

def fetch_data(season):
    teams_list = []
    response = req.get(sofifa_url, headers=headers)
    if response.status_code == req.codes.ok:
        soup = BS(response.content, "lxml")
        ul_list = soup.select("div.classement-table-body ul")

        for ul in ul_list:
            li_list = ul.find_all("li", class_="GeneralStats-row")

            for li in li_list:
                team_data = []
                try:
                    position = li.find("div", class_="GeneralStats-item--position").text.strip()
                    team = li.find("div", class_="GeneralStats-item--club").find("span", class_="desktop-item").text.strip()
                    points = li.find("div", class_="GeneralStats-item--points").text.strip()
                    stats = li.find_all("div", class_="GeneralStats-item")

                    if len(stats) >= 11:  # Vérifier qu'il y a suffisamment d'éléments dans la liste
                        played = stats[3].text.strip()
                        won = stats[4].text.strip()
                        drawn = stats[5].text.strip()
                        lost = stats[6].text.strip()
                        gf = stats[7].text.strip()
                        ga = stats[8].text.strip()
                    else:
                        logging.error(f"Not enough stats found for team: {team}")
                        continue

                    team_data.extend([season, position, team, points, played, won, drawn, lost, gf, ga])
                    teams_list.append(team_data)
                except Exception as e:
                    logging.error(f"Error parsing row: {e}")
                    continue
    else:
        logging.error(f"Failed to retrieve data for season {season}")
    return teams_list

# Extraction des données pour toutes les saisons
all_teams_list = []
for season in ID_seasonId:
    all_teams_list.extend(fetch_data(season))

# Convertir la liste en DataFrame pandas
df = pd.DataFrame(all_teams_list, columns=["Season", "Position", "Team", "Points", "Played", "Won", "Drawn", "Lost", "GF", "GA"])

# Convertir les colonnes en types numériques
df['Points'] = pd.to_numeric(df['Points'], errors='coerce')
df['Played'] = pd.to_numeric(df['Played'], errors='coerce')
df['Won'] = pd.to_numeric(df['Won'], errors='coerce')
df['Drawn'] = pd.to_numeric(df['Drawn'], errors='coerce')
df['Lost'] = pd.to_numeric(df['Lost'], errors='coerce')
df['GF'] = pd.to_numeric(df['GF'], errors='coerce')
df['GA'] = pd.to_numeric(df['GA'], errors='coerce')

# Calculer le nombre total de matchs joués, de matchs nuls et de matchs gagnés
total_played = df['Played'].sum()
total_drawn = df['Drawn'].sum()
total_won = df['Won'].sum()

# Calculer les pourcentages totaux
total_draw_percentage = (total_drawn / total_played) * 100 if total_played > 0 else 0
total_win_percentage = (total_won / total_played) * 100 if total_played > 0 else 0

# Afficher les résultats
print("Données extraites pour les saisons:")
print(tabulate(all_teams_list, headers=["Season", "Position", "Team", "Points", "Played", "Won", "Drawn", "Lost", "GF", "GA"]))

print("\nPourcentage total de matchs nuls:", total_draw_percentage)
print("Pourcentage total de matchs gagnés:", total_win_percentage)

Données extraites pour les saisons:
Season       Position  Team                      Points    Played    Won    Drawn    Lost    GF    GA
---------  ----------  ----------------------  --------  --------  -----  -------  ------  ----  ----
2020/2021           1  PARIS SAINT-GERMAIN           76        34     22       10       2    81    33
2020/2021           2  AS MONACO                     67        34     20        7       7    68    42
2020/2021           3  STADE BRESTOIS 29             61        34     17       10       7    53    34
2020/2021           4  LOSC LILLE                    59        34     16       11       7    52    34
2020/2021           5  OGC NICE                      55        34     15       10       9    40    29
2020/2021           6  OLYMPIQUE LYONNAIS            53        34     16        5      13    49    55
2020/2021           7  RC LENS                       51        34     14        9      11    45    37
2020/2021           8  OLYMPIQUE DE MARSEILLE 

In [None]:
import pandas as pd

# Calculer le nombre total de matchs joués, de matchs nuls et de matchs gagnés
total_played = df['Played'].sum()
total_drawn = df['Drawn'].sum()
total_won = df['Won'].sum()

# Calculer les pourcentages totaux
total_draw_percentage = (total_drawn / total_played) * 100 if total_played > 0 else 0
total_win_percentage = (total_won / total_played) * 100 if total_played > 0 else 0

# Prédiction pour l'année 2024
predicted_draw_percentage_2024 = total_draw_percentage
predicted_win_percentage_2024 = total_win_percentage

print("Prédiction pour l'année 2024:")
print("Pourcentage prédit de matchs nuls:", predicted_draw_percentage_2024)
print("Pourcentage prédit de matchs gagnés:", predicted_win_percentage_2024)

Prédiction pour l'année 2024:
Pourcentage prédit de matchs nuls: 26.47058823529412
Pourcentage prédit de matchs gagnés: 36.76470588235294
