In [1]:
!pip install scikit-learn==1.4.2



In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import joblib

# Charger le dataset
data = pd.read_csv('/content/matchss.csv')

# Encoder les noms des équipes
encoder = LabelEncoder()
data['team_encoded'] = encoder.fit_transform(data['team_name'])
data['opponent_encoded'] = encoder.fit_transform(data['Opponent'])

# Fonction pour calculer les confrontations directes
def get_direct_matches(data, home_team, away_team, current_season):
    current_season_year = int(current_season.split('-')[0])
    data['season_start_year'] = data['saison'].apply(lambda x: int(x.split('-')[0]))
    past_matches = data[((data['team_name'] == home_team) & (data['Opponent'] == away_team) |
                         (data['team_name'] == away_team) & (data['Opponent'] == home_team)) &
                        (data['season_start_year'] < current_season_year)]
    home_wins = len(past_matches[(past_matches['team_name'] == home_team) & (past_matches['Result'] == 'Win')])
    away_wins = len(past_matches[(past_matches['team_name'] == away_team) & (past_matches['Result'] == 'Win')])
    draws = len(past_matches[past_matches['Result'] == 'Draw'])
    return home_wins, away_wins, draws

# Appliquer la fonction pour chaque ligne dans le DataFrame
data['direct_matches_stats'] = data.apply(lambda row: get_direct_matches(data, row['team_name'], row['Opponent'], row['saison']), axis=1)

# Extraire les statistiques dans des colonnes séparées
data['home_team_direct_wins'] = data['direct_matches_stats'].apply(lambda x: x[0])
data['away_team_direct_wins'] = data['direct_matches_stats'].apply(lambda x: x[1])
data['direct_draws'] = data['direct_matches_stats'].apply(lambda x: x[2])
data.drop(['direct_matches_stats', 'season_start_year'], axis=1, inplace=True)

# Sélection des features pour l'entraînement
features = ['team_encoded', 'opponent_encoded', 'scored', 'conceded', 'xG', 'xGA', 'Prob Win', 'Prob Draw', 'home_team_direct_wins', 'away_team_direct_wins', 'direct_draws']
X = data[features]
y = data['Result']

# Diviser les données en sets d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Construire et entraîner le modèle
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Évaluer le modèle
accuracy = accuracy_score(y_test, model.predict(X_test))
print(f"Accuracy of the model: {accuracy * 100:.2f}%")

# Sauvegarder le modèle et l'encodeur
joblib.dump(model, 'football_prediction_model.pkl')
joblib.dump(encoder, 'team_encoder.pkl')


Accuracy of the model: 99.67%


['team_encoder.pkl']

In [3]:
def predict_match():
    print("Hello! I'm your football match result predictor.")

    # Charger le modèle et l'encodeur
    model = joblib.load('football_prediction_model.pkl')
    encoder = joblib.load('team_encoder.pkl')

    # Demander les noms des équipes
    home_team = input("Enter the name of the home team: ")
    away_team = input("Enter the name of the away team: ")

    # Encoder les noms des équipes
    try:
        home_encoded = encoder.transform([home_team])[0]
        away_encoded = encoder.transform([away_team])[0]
    except ValueError:
        print("One of the team names is not recognized. Please enter valid team names.")
        return

    # Utiliser des valeurs neutres pour les autres features (Exemple, ces valeurs devraient être basées sur des données réelles ou estimées)
    features = [[home_encoded, away_encoded, 0, 0, 0, 0, 50, 25, 0, 0, 0]]  # Exemple fictif avec 50% win, 25% draw

    # Obtenir les prédictions de probabilités
    probability = model.predict_proba(features)[0]

    # Afficher les résultats prédits
    print(f"Predicted Probability of Home Win for {home_team}: {probability[2]*100:.2f}%")
    print(f"Predicted Probability of Away Win for {away_team}: {probability[1]*100:.2f}%")
    print(f"Predicted Probability of Draw: {probability[0]*100:.2f}%")

# Vous pouvez maintenant appeler cette fonction pour tester vos prédictions
predict_match()


Hello! I'm your football match result predictor.
Enter the name of the home team: Chelsea
Enter the name of the away team: Liverpool
Predicted Probability of Home Win for Chelsea: 5.00%
Predicted Probability of Away Win for Liverpool: 3.00%
Predicted Probability of Draw: 92.00%


