In [None]:

import pandas as pd # type: ignore

# Load the dataset
matches = pd.read_csv('totaldata.csv')

# Drop rows with missing values
matches.dropna(inplace=True)

# Define the target variables for scores
y_home = matches['FTHG']
y_away = matches['FTAG']

# Define features
matches['goal_difference'] = matches['FTHG'] - matches['FTAG']
matches['home_team_form'] = matches.groupby('HomeTeam')['goal_difference'].rolling(5).mean().reset_index(level=0, drop=True)
matches['away_team_form'] = matches.groupby('AwayTeam')['goal_difference'].rolling(5).mean().reset_index(level=0, drop=True)


features = ['HomeTeam', 'AwayTeam', 'home_team_form', 'away_team_form']
X = matches[features]
X.fillna(0, inplace=True)

from sklearn.preprocessing import StandardScaler, OneHotEncoder # type: ignore
from sklearn.compose import ColumnTransformer # type: ignore
from sklearn.model_selection import train_test_split # type: ignore

# Split the data into training and testing sets
X_train, X_test, y_home_train, y_home_test = train_test_split(X, y_home, test_size=0.2, random_state=42)
_, _, y_away_train, y_away_test = train_test_split(X, y_away, test_size=0.2, random_state=42)

# Preprocessing pipeline
numeric_features = ['home_team_form', 'away_team_form']
categorical_features = ['HomeTeam', 'AwayTeam']

numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

from sklearn.ensemble import RandomForestRegressor # type: ignore
from sklearn.metrics import root_mean_squared_error # type: ignore

# Train a RandomForestRegressor for home team goals
home_goal_model = RandomForestRegressor(n_estimators=100, random_state=42)
home_goal_model.fit(X_train, y_home_train)

# Train a RandomForestRegressor for away team goals
away_goal_model = RandomForestRegressor(n_estimators=100, random_state=42)
away_goal_model.fit(X_train, y_away_train)

# Evaluate the models
y_home_pred = home_goal_model.predict(X_test)
y_away_pred = away_goal_model.predict(X_test)


def custom_round(value):
    # Check if the decimal part is .8 or higher
    if value - int(value) >= 0.8:
        return int(value) + 1
    else:
        return int(value)


def calculate_average_form(team, historical_matches, home_or_away):

    if home_or_away == 'home':
        avg_form = historical_matches[historical_matches['HomeTeam'] == team]['goal_difference'].rolling(5).mean().iloc[-1]
    else:
        avg_form = historical_matches[historical_matches['AwayTeam'] == team]['goal_difference'].rolling(5).mean().iloc[-1]

    return avg_form

# A numerical value representing the average goal difference over the last 5 matches. This could be positive, negative, or zero.

def predict_match(HomeTeam, AwayTeam, historical_matches):
    # Check if there's enough data to calculate form
    if historical_matches[historical_matches['HomeTeam'] == HomeTeam].shape[0] < 5:
        home_team_form = historical_matches[historical_matches['HomeTeam'] == HomeTeam]['goal_difference'].mean()
    else:
        home_team_form = calculate_average_form(HomeTeam, historical_matches, 'home')

    if historical_matches[historical_matches['AwayTeam'] == AwayTeam].shape[0] < 5:
        away_team_form = historical_matches[historical_matches['AwayTeam'] == AwayTeam]['goal_difference'].mean()
    else:
        away_team_form = calculate_average_form(AwayTeam, historical_matches, 'away')

    # Handle case with no previous meetings
    if pd.isna(home_team_form):
        home_team_form = 0  # or some default value
    if pd.isna(away_team_form):
        away_team_form = 0  # or some default value

    # Create a dataframe for the new match
    new_match = pd.DataFrame({
        'HomeTeam': [HomeTeam],
        'AwayTeam': [AwayTeam],
        'home_team_form': [home_team_form],
        'away_team_form': [away_team_form]
    })

    # Preprocess the new match data
    new_match_preprocessed = preprocessor.transform(new_match)

    # Predict the goals
    home_goals = custom_round(home_goal_model.predict(new_match_preprocessed)[0])
    away_goals = custom_round(away_goal_model.predict(new_match_preprocessed)[0])

    print(f'Predicted goals: {HomeTeam} {home_goals:} - {away_goals:} {AwayTeam}')

    # Determine the outcome based on predicted goals
    if home_goals > away_goals:
        return 'Home Win'
    elif home_goals < away_goals:
        return 'Away Win'
    else:
        return 'Draw'

# Example usage
historical_matches = matches.copy()
HomeTeam = 'Arsenal'
AwayTeam = 'Leeds'
result = predict_match(HomeTeam, AwayTeam, historical_matches)

# Assuming result is determined elsewhere, e.g., result = 'Home Win' or 'Away Win'
if result == 'Home Win':
    winning_team = HomeTeam
    print(f'The match result prediction: {winning_team} wins the match!')

elif result == 'Away Win':
    winning_team = AwayTeam
    print(f'The match result prediction: {winning_team} wins the match!')
else:
    winning_team = 'Draw'
    print(f'The match result prediction: The match ends in a draw!')

FileNotFoundError: [Errno 2] No such file or directory: 'totaldata.csv'

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor
import warnings

# Ignorer les warnings
warnings.filterwarnings('ignore')

# Charger le dataset
matches = pd.read_csv('epldata.csv')

# Supprimer les lignes avec des valeurs manquantes
matches.dropna(inplace=True)

# Définir les variables cibles pour les scores
y_home = matches['FTHG']  # Full Time Home Goals
y_away = matches['FTAG']  # Full Time Away Goals

# Créer de nouvelles caractéristiques
matches['goal_difference'] = matches['FTHG'] - matches['FTAG']
matches['home_team_form'] = matches.groupby('HomeTeam')['goal_difference'].rolling(5).mean().reset_index(level=0, drop=True)
matches['away_team_form'] = matches.groupby('AwayTeam')['goal_difference'].rolling(5).mean().reset_index(level=0, drop=True)

# Définir les caractéristiques
features = ['HomeTeam', 'AwayTeam', 'home_team_form', 'away_team_form']
X = matches[features]

# Remplacer les valeurs manquantes
X = X.fillna(0)

# Prétraitement des données
numeric_features = ['home_team_form', 'away_team_form']
categorical_features = ['HomeTeam', 'AwayTeam']

numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Division des données en ensembles d'entraînement et de test
X_train, X_test, y_home_train, y_home_test = train_test_split(X, y_home, test_size=0.2, random_state=42)
_, _, y_away_train, y_away_test = train_test_split(X, y_away, test_size=0.2, random_state=42)

# Pipeline pour le modèle XGBoost
home_goal_model = XGBRegressor(objective='reg:squarederror', random_state=42)
pipeline_home = Pipeline(steps=[('preprocessor', preprocessor),
                                 ('model', home_goal_model)])

# Recherche d'hyperparamètres pour le modèle à domicile
param_grid_home = {
    'model__n_estimators': [100, 200, 500],
    'model__max_depth': [3, 5, 7],
    'model__learning_rate': [0.01, 0.1, 0.2],
    'model__subsample': [0.8, 1.0]
}

grid_search_home = GridSearchCV(pipeline_home, param_grid_home, cv=5, scoring='neg_mean_squared_error', n_jobs=-1, verbose=2)
grid_search_home.fit(X_train, y_home_train)

best_pipeline_home = grid_search_home.best_estimator_
y_home_pred = best_pipeline_home.predict(X_test)
rmse_home = np.sqrt(mean_squared_error(y_home_test, y_home_pred))
print(f'Optimized RMSE pour les buts à domicile: {rmse_home:.2f}')

# Pipeline pour le modèle XGBoost à l'extérieur
away_goal_model = XGBRegressor(objective='reg:squarederror', random_state=42)
pipeline_away = Pipeline(steps=[('preprocessor', preprocessor),
                                 ('model', away_goal_model)])

# Recherche d'hyperparamètres pour le modèle à l'extérieur
param_grid_away = {
    'model__n_estimators': [100, 200, 500],
    'model__max_depth': [3, 5, 7],
    'model__learning_rate': [0.01, 0.1, 0.2],
    'model__subsample': [0.8, 1.0]
}

grid_search_away = GridSearchCV(pipeline_away, param_grid_away, cv=5, scoring='neg_mean_squared_error', n_jobs=-1, verbose=2)
grid_search_away.fit(X_train, y_away_train)

best_pipeline_away = grid_search_away.best_estimator_
y_away_pred = best_pipeline_away.predict(X_test)
rmse_away = np.sqrt(mean_squared_error(y_away_test, y_away_pred))
print(f'Optimized RMSE pour les buts à l’extérieur: {rmse_away:.2f}')

# Fonction de prédiction de match
def custom_round(value):
    return max(0, int(round(value)))

def predict_match(HomeTeam, AwayTeam, historical_matches):
    home_team_form = historical_matches[historical_matches['HomeTeam'] == HomeTeam]['goal_difference'].mean()
    away_team_form = historical_matches[historical_matches['AwayTeam'] == AwayTeam]['goal_difference'].mean()

    if pd.isna(home_team_form):
        home_team_form = 0
    if pd.isna(away_team_form):
        away_team_form = 0

    new_match = pd.DataFrame({
        'HomeTeam': [HomeTeam],
        'AwayTeam': [AwayTeam],
        'home_team_form': [home_team_form],
        'away_team_form': [away_team_form]
    })

    # Utiliser les pipelines pour transformer et prédire
    home_goals = custom_round(best_pipeline_home.predict(new_match)[0])
    away_goals = custom_round(best_pipeline_away.predict(new_match)[0])

    print(f'Predicted goals: {HomeTeam} {home_goals} - {away_goals} {AwayTeam}')

    if home_goals > away_goals:
        return 'Home Win'
    elif home_goals < away_goals:
        return 'Away Win'
    else:
        return 'Draw'
# À la fin de votre script d'entraînement actuel, ajoutez :
import joblib

# Sauvegarder les modèles
joblib.dump(best_pipeline_home, 'home_model.joblib')
joblib.dump(best_pipeline_away, 'away_model.joblib')

# Sauvegarder aussi le dataset pour les prédictions futures
matches.to_csv('epldata.csv', index=False)
# Exemple d'utilisation
historical_matches = matches.copy()
HomeTeam = 'Liverpool'
AwayTeam = 'Leicester'
result = predict_match(HomeTeam, AwayTeam, historical_matches)

if result == 'Home Win':
    print(f'La prédiction du résultat du match : {HomeTeam} gagne !')
elif result == 'Away Win':
    print(f'La prédiction du résultat du match : {AwayTeam} gagne !')
else:
    print('La prédiction du résultat du match : Match nul !')



FileNotFoundError: [Errno 2] No such file or directory: 'epldata.csv'

In [None]:

# Exemple d'utilisation
historical_matches = matches.copy()
HomeTeam = 'Liverpool'
AwayTeam = 'Arsenal'
result = predict_match(HomeTeam, AwayTeam, historical_matches)

if result == 'Home Win':
    print(f'La prédiction du résultat du match : {HomeTeam} gagne !')
elif result == 'Away Win':
    print(f'La prédiction du résultat du match : {AwayTeam} gagne !')
else:
    print('La prédiction du résultat du match : Match nul !')

Predicted goals: Liverpool 1 - 1 Arsenal
La prédiction du résultat du match : Match nul !
