<a href="https://colab.research.google.com/github/fopamesmin/4MAc/blob/main/_modele_IA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from google.colab import drive
import numpy as np

# Monter Google Drive
drive.mount('/content/drive')

# Chemins vers les fichiers CSV dans Google Drive
file_paths = {
    "Premier League": "/content/drive/My Drive/premier_league_data.csv",
    "Ligue 1": "/content/drive/My Drive/ligue1_data.csv",
    "La Liga": "/content/drive/My Drive/laliga_data.csv"
}

# Fonction pour charger, renommer et préparer les données
def load_and_prepare_data(path):
    df = pd.read_csv(path)

    # Sélectionner les colonnes pertinentes et les renommer si nécessaire
    df = df[['Played', 'Won', 'Drawn', 'Lost']]  # Sélection des colonnes spécifiques

    # Vérifier les colonnes attendues
    expected_columns = ['Played', 'Won', 'Drawn', 'Lost']
    for col in expected_columns:
        if col not in df.columns:
            raise ValueError(f"Missing expected column: {col}")

    # Conversion des colonnes en types numériques si nécessaire
    for col in expected_columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')

    # Calcul des pourcentages
    df['Win_Percentage'] = (df['Won'] / df['Played']) * 100
    df['Draw_Percentage'] = (df['Drawn'] / df['Played']) * 100
    df['Lost_Percentage'] = (df['Lost'] / df['Played']) * 100

    return df

# Fonction pour entraîner un modèle de régression linéaire
def train_model(df, target_column):
    X = df[['Played', 'Won', 'Drawn', 'Lost']]  # Utiliser uniquement ces colonnes comme features
    y = df[target_column]

    # Vérifier les NaN dans y
    if y.isnull().any():
        raise ValueError(f"La variable cible '{target_column}' contient des valeurs NaN.")

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = LinearRegression()
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)

    return model, mae

# Charger les données pour toutes les ligues
league_data = {}
for league, path in file_paths.items():
    try:
        df = load_and_prepare_data(path)
        league_data[league] = df
    except Exception as e:
        print(f"Error loading data for {league}: {e}")

# Entraîner des modèles pour chaque ligue et chaque pourcentage
models = {}
maes = {}
for league, df in league_data.items():
    models[league] = {}
    maes[league] = {}
    for target in ['Win_Percentage', 'Draw_Percentage', 'Lost_Percentage']:
        try:
            model, mae = train_model(df, target)
            models[league][target] = model
            maes[league][target] = mae
        except ValueError as ve:
            print(f"Skipping training for {league} - {target}: {ve}")

# Comparer les ligues en fonction des MAE (Mean Absolute Error)
best_league = min(maes, key=lambda league: sum(maes[league].values()))

# Afficher les résultats
print("Meilleure ligue en fonction des performances de prédiction:")
print(best_league)

# Afficher les MAE pour chaque ligue
for league, errors in maes.items():
    print(f"\n{league} MAE:")
    for target, mae in errors.items():
        print(f"  {target}: {mae}")

# Prédictions pour une nouvelle saison (exemple)
hypothetical_data = np.array([[38, 80, 40, 40]])  # Exemple: [Played, Won, Drawn, Lost]
predictions = {}
for league, model_dict in models.items():
    predictions[league] = {}
    for target, model in model_dict.items():
        predictions[league][target] = model.predict(hypothetical_data)[0]

print("\nPrédictions pour une nouvelle saison:")
for league, preds in predictions.items():
    print(f"\n{league}:")
    for target, pred in preds.items():
        print(f"  {target}: {pred:.2f}%")

# Sauvegarder les données avec les noms de colonnes spécifiés
for league, df in league_data.items():
    save_path = f"/content/drive/My Drive/{league.lower().replace(' ', '_')}_prepared_data.csv"
    df[['Played', 'Won', 'Drawn', 'Lost']].to_csv(save_path, index=False)
    print(f"Data saved for {league} at {save_path}")

Mounted at /content/drive
Meilleure ligue en fonction des performances de prédiction:
Ligue 1

Premier League MAE:
  Win_Percentage: 1.080617077301819e-14
  Draw_Percentage: 2.812564995717063e-15
  Lost_Percentage: 7.919590908992783e-15

Ligue 1 MAE:
  Win_Percentage: 7.751375299201092e-15
  Draw_Percentage: 5.490557503600774e-15
  Lost_Percentage: 2.5837917664003644e-15

La Liga MAE:
  Win_Percentage: 2.197142131058917
  Draw_Percentage: 3.02014916115481
  Lost_Percentage: 1.475790049452165

Prédictions pour une nouvelle saison:

Premier League:
  Win_Percentage: 103.51%
  Draw_Percentage: -1.75%
  Lost_Percentage: -1.75%

Ligue 1:
  Win_Percentage: 111.76%
  Draw_Percentage: -5.88%
  Lost_Percentage: -5.88%

La Liga:
  Win_Percentage: 82.63%
  Draw_Percentage: 5.84%
  Lost_Percentage: 11.53%




Data saved for Premier League at /content/drive/My Drive/premier_league_prepared_data.csv
Data saved for Ligue 1 at /content/drive/My Drive/ligue_1_prepared_data.csv
Data saved for La Liga at /content/drive/My Drive/la_liga_prepared_data.csv
