# Championship 2023-2024 Predictions

In [161]:
# import data
import pandas as pd
import numpy as np

#df = pd.read_csv('F2.csv')
df = pd.read_csv('ligue_2.csv')
#df = pd.read_csv('epl/E0 4.csv')
df.tail()

Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,AvgC<2.5,AHCh,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA
195,F2,13/01/2024,18:00,Quevilly Rouen,Guingamp,0,1,A,0,0,...,1.51,0.0,2.05,1.8,2.14,1.78,2.14,1.84,2.03,1.79
196,F2,13/01/2024,18:00,Rodez,Pau FC,2,1,H,0,0,...,2.1,-0.5,1.83,2.03,1.86,2.03,1.9,2.07,1.81,1.99
197,F2,13/01/2024,18:00,Troyes,Ajaccio,3,1,H,1,1,...,1.45,0.0,1.98,1.88,1.99,1.9,2.04,1.92,1.95,1.86
198,F2,13/01/2024,18:00,Valenciennes,Amiens,0,1,A,0,1,...,1.46,0.25,1.8,2.05,1.81,2.09,1.87,2.12,1.77,2.05
199,F2,15/01/2024,19:45,Auxerre,Bordeaux,3,1,H,1,1,...,2.09,-0.5,1.93,1.93,1.92,1.97,1.93,2.15,1.84,1.95


In [162]:
## Data Cleaning

In [163]:
# steps:
# 1. Check for missing values
missing_values = df.isnull().sum()

# 2. Check for duplicates
duplicate_rows = df.duplicated().sum()

# Display the results of the checks
missing_values, duplicate_rows

(Div         0
 Date        0
 Time        0
 HomeTeam    0
 AwayTeam    0
            ..
 PCAHA       0
 MaxCAHH     0
 MaxCAHA     0
 AvgCAHH     0
 AvgCAHA     0
 Length: 105, dtype: int64,
 0)

In [164]:
# Checking for missing values
missing_values = df.isnull().sum()




# Checking the balance of the target variable 'FTR'
target_distribution = df['FTR'].value_counts()

# drop the div column
df.drop('Div', axis=1, inplace=True)
df.drop('Time', axis=1, inplace=True)

missing_values, target_distribution


(Div         0
 Date        0
 Time        0
 HomeTeam    0
 AwayTeam    0
            ..
 PCAHA       0
 MaxCAHH     0
 MaxCAHA     0
 AvgCAHH     0
 AvgCAHA     0
 Length: 105, dtype: int64,
 FTR
 H    80
 A    61
 D    59
 Name: count, dtype: int64)

In [165]:
# covert date to datetime
df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y')

## Feature engineering

### steps:
* Encode Team Names: Use label encoding for 'HomeTeam' and 'AwayTeam'. This will convert team names into numeric values, making them usable for the model.

* Recent Form: Calculate the recent form for each team based on the last 5 matches. We'll use the 'FTR' column to determine wins (W), losses (L), and draws (D). This feature will provide insight into the current performance of the teams.

* Average Goals per Game: Compute the average goals scored per game for both home and away teams. This feature helps understand the offensive strength of the teams.

* Team Points: Calculate the total points accumulated by each team so far in the season. Points are awarded based on wins (3 points), draws (1 point), and losses (0 points).

* Head-to-Head Statistics: Analyze the outcomes of matches between the same pairs of teams earlier in the season.

* Other Statistical Features: Depending on the data available, we can include additional features like average possession, number of shots on target, defensive strength, etc.

In [166]:
# Reset the index of the DataFrame (if necessary)
df.reset_index(drop=True, inplace=True)

# Rolling averages for goals
window_sizes = [3, 5]
for window in window_sizes:
    df[f'HomeGoalsScoredAvg_{window}'] = df.groupby('HomeTeam')['FTHG'].transform(lambda x: x.rolling(window, min_periods=1).mean())
    df[f'AwayGoalsScoredAvg_{window}'] = df.groupby('AwayTeam')['FTAG'].transform(lambda x: x.rolling(window, min_periods=1).mean())
    df[f'HomeGoalsConcededAvg_{window}'] = df.groupby('HomeTeam')['FTAG'].transform(lambda x: x.rolling(window, min_periods=1).mean())
    df[f'AwayGoalsConcededAvg_{window}'] = df.groupby('AwayTeam')['FTHG'].transform(lambda x: x.rolling(window, min_periods=1).mean())



In [167]:

# Variance in team performance
df['HomeGoalsScoredVariance'] = df.groupby('HomeTeam')['FTHG'].transform(lambda x: x.rolling(5, min_periods=1).var())
df['AwayGoalsScoredVariance'] = df.groupby('AwayTeam')['FTAG'].transform(lambda x: x.rolling(5, min_periods=1).var())




In [168]:


from sklearn.preprocessing import LabelEncoder

# Initialize the LabelEncoder
label_encoder = LabelEncoder()

# Combine all unique team names from both HomeTeam and AwayTeam columns
all_teams = pd.concat([df['HomeTeam'], df['AwayTeam']]).unique()

# Fit the LabelEncoder with all unique team names
label_encoder.fit(all_teams)

# Transform both HomeTeam and AwayTeam using the fitted LabelEncoder
df['HomeTeam_encoded'] = label_encoder.transform(df['HomeTeam'])
df['AwayTeam_encoded'] = label_encoder.transform(df['AwayTeam'])

# # Now you can transform individual team names
# home_team_encoded = label_encoder.transform(['St Etienne'])[0]
# away_team_encoded = label_encoder.transform(['Laval'])[0]

# # Debugging: Print encoded values
# print(f"Encoded Home Team: {home_team_encoded}, Encoded Away Team: {away_team_encoded}")

print(label_encoder.classes_)


['Ajaccio' 'Amiens' 'Angers' 'Annecy' 'Auxerre' 'Bastia' 'Bordeaux' 'Caen'
 'Concarneau' 'Dunkerque' 'Grenoble' 'Guingamp' 'Laval' 'Paris FC'
 'Pau FC' 'Quevilly Rouen' 'Rodez' 'St Etienne' 'Troyes' 'Valenciennes']


In [169]:
df.tail()

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,HS,...,HomeGoalsConcededAvg_3,AwayGoalsConcededAvg_3,HomeGoalsScoredAvg_5,AwayGoalsScoredAvg_5,HomeGoalsConcededAvg_5,AwayGoalsConcededAvg_5,HomeGoalsScoredVariance,AwayGoalsScoredVariance,HomeTeam_encoded,AwayTeam_encoded
195,2024-01-13,Quevilly Rouen,Guingamp,0,1,A,0,0,D,10,...,1.0,0.666667,1.4,1.0,1.2,0.4,2.3,1.5,15,11
196,2024-01-13,Rodez,Pau FC,2,1,H,0,0,D,11,...,1.0,1.333333,1.4,1.2,0.8,1.4,0.8,0.7,16,14
197,2024-01-13,Troyes,Ajaccio,3,1,H,1,1,D,12,...,1.666667,2.333333,1.6,1.0,1.4,1.8,1.3,1.5,18,0
198,2024-01-13,Valenciennes,Amiens,0,1,A,0,1,A,11,...,0.666667,1.333333,0.4,1.0,1.0,1.0,0.8,0.5,19,1
199,2024-01-15,Auxerre,Bordeaux,3,1,H,1,1,D,18,...,1.0,2.0,2.6,1.6,1.2,2.0,3.3,0.3,4,6


In [170]:
# Encoding the target variable 'FTR'
from sklearn.preprocessing import LabelEncoder

# Initialize the LabelEncoder
label_encoder = LabelEncoder()

# Fit and transform the target variable using label_encoder_y
df['FTR_encoded'] = label_encoder.fit_transform(df['FTR'])




In [171]:

# Average points per game
df['HomeTeamPoints'] = df.groupby('HomeTeam')['FTR_encoded'].transform(lambda x: x.expanding().sum().shift(1)) / df.groupby('HomeTeam')['FTR_encoded'].transform(lambda x: x.expanding().count().shift(1))
df['AwayTeamPoints'] = df.groupby('AwayTeam')['FTR_encoded'].transform(lambda x: x.expanding().sum().shift(1)) / df.groupby('AwayTeam')['FTR_encoded'].transform(lambda x: x.expanding().count().shift(1))



In [172]:
# Average goals per game
df['HomeTeamAvgGoals'] = df.groupby('HomeTeam')['FTHG'].transform(lambda x: x.expanding().mean().shift(1)) # Average goals scored by the home team
df['AwayTeamAvgGoals'] = df.groupby('AwayTeam')['FTAG'].transform(lambda x: x.expanding().mean().shift(1)) # Average goals scored by the away team



In [173]:
# recent form
def calculate_form_points(team, df):
    # Get the last 5 matches of the home and away teams
    home_matches = df[df['HomeTeam'] == team].tail(5)
    away_matches = df[df['AwayTeam'] == team].tail(5)

    # Calculate the points obtained in the last 5 matches
    home_points = home_matches['FTR_encoded'].sum()
    away_points = away_matches['FTR_encoded'].sum()

    # Calculate the average points obtained
    home_avg_points = home_points / 15
    away_avg_points = away_points / 15

    # Return the average points
    return home_avg_points, away_avg_points

# Calculate the average points obtained by each team in the last 5 matches
df['HomeTeamRecentForm'], df['AwayTeamRecentForm'] = zip(*df['HomeTeam'].apply(lambda x: calculate_form_points(x, df)))



### Recent Form

In [174]:
from sklearn.model_selection import train_test_split

# Convert 'Date' to datetime and extract useful features
df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y')
df['DayOfWeek'] = df['Date'].dt.dayofweek  # 0: Monday, 6: Sunday
df['IsWeekend'] = df['DayOfWeek'].isin([5, 6]).astype(int)  # 1 for weekend, 0 for weekdays



In [175]:
#select relevant columns or features and the target
features = [
    'HomeTeam_encoded', 'AwayTeam_encoded', 'HomeTeamRecentForm', 'AwayTeamRecentForm', 
    'HomeTeamAvgGoals', 'AwayTeamAvgGoals', 'HomeTeamPoints', 'AwayTeamPoints', 'HomeGoalsScoredVariance', 'AwayGoalsScoredVariance', 'HomeGoalsScoredAvg_3', 'AwayGoalsScoredAvg_3', 'HomeGoalsConcededAvg_3', 'AwayGoalsConcededAvg_3', 'HomeGoalsScoredAvg_5', 'AwayGoalsScoredAvg_5', 'HomeGoalsConcededAvg_5', 'AwayGoalsConcededAvg_5',
]

target = 'FTR_encoded'

# Split the data into training and test sets
X = df[features]
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
train_test_split(X, y, test_size=0.2, random_state=42)

# Previewing the processed features
X_train.head(7), y_train.head(7)

# encoding the categorical variables
from sklearn.preprocessing import LabelEncoder

# Initialize the LabelEncoder
label_encoder = LabelEncoder()

# Fit and transform the target variable using label_encoder_y
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)


In [176]:
# scaling the features
from sklearn.preprocessing import StandardScaler

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit the StandardScaler
scaler.fit(X_train)

# Transform both the training and testing sets
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)



### Create Model


In [177]:
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Initialize the models
rf_model = RandomForestClassifier(random_state=42)
xgb_model = XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='mlogloss')
lgbm_model = LGBMClassifier(random_state=42)   
catboost_model = CatBoostClassifier(random_state=42, verbose=False)


# Train the models
#rf_model.fit(X_train, y_train_encoded)
xgb_model.fit(X_train_scaled, y_train_encoded)
lgbm_model.fit(X_train_scaled, y_train_encoded)
catboost_model.fit(X_train_scaled, y_train_encoded)







[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000081 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 350
[LightGBM] [Info] Number of data points in the train set: 160, number of used features: 18
[LightGBM] [Info] Start training from score -1.225026
[LightGBM] [Info] Start training from score -1.086190
[LightGBM] [Info] Start training from score -0.997636


<catboost.core.CatBoostClassifier at 0x195040df0d0>

In [178]:
from sklearn.metrics import roc_auc_score

#evaluate the models using the metrics
models = [ xgb_model, lgbm_model, catboost_model]
model_names = [ 'XGBoost', 'LightGBM', 'CatBoost', 'AdaBoost']

for model, name in zip(models, model_names):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro', zero_division=0)
    recall = recall_score(y_test, y_pred, average='macro', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='macro', zero_division=0)
    print(f"{name} Model Metrics")
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1 Score: {f1}")
    print("\n")
    
    

XGBoost Model Metrics
Accuracy: 0.275
Precision: 0.4656084656084656
Recall: 0.3428571428571428
F1 Score: 0.27414021164021163


LightGBM Model Metrics
Accuracy: 0.25
Precision: 0.5106732348111659
Recall: 0.3349206349206349
F1 Score: 0.2607282913165266


CatBoost Model Metrics
Accuracy: 0.225
Precision: 0.38095238095238093
Recall: 0.3460317460317461
F1 Score: 0.209013209013209



# creating objective function and hyper opts for hyperparaemters

In [179]:
from hyperopt import hp
import numpy as np
# Define the hyperparameter space
space = {
    'iterations': hp.quniform('iterations', 100, 1000, 50),
    'depth': hp.choice('depth', np.arange(3, 11, dtype=int)),
    'learning_rate': hp.loguniform('learning_rate', np.log(0.01), np.log(0.2)),
    'l2_leaf_reg': hp.uniform('l2_leaf_reg', 1, 10),
    'bagging_temperature': hp.uniform('bagging_temperature', 0, 1),
    'random_strength': hp.uniform('random_strength', 0, 1),

    
    
    
}


In [180]:
from sklearn.model_selection import cross_val_score
from hyperopt import STATUS_OK
from sklearn.model_selection import StratifiedKFold


def objective(params):
    params['iterations'] = int(params['iterations'])
    clf = CatBoostClassifier(**params, loss_function='MultiClass', verbose=False)
    score = cross_val_score(clf, X_train_scaled, y_train_encoded, scoring='accuracy', cv=StratifiedKFold(10)).mean()
    return {'loss': -score, 'status': STATUS_OK}



In [181]:
# run the hyperparameter optimization
from hyperopt import tpe, Trials, fmin

trials = Trials()
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=10, trials=trials)


100%|██████████| 10/10 [05:19<00:00, 31.90s/trial, best loss: -0.65625]


In [182]:
from sklearn.metrics import classification_report

best_params = {k: int(v) if k in ['iterations', 'depth'] else v for k, v in best.items()}
final_model = CatBoostClassifier(**best_params, loss_function='MultiClass', eval_metric='Accuracy', verbose=False)
final_model.fit(X_train_scaled, y_train_encoded)

# Evaluate the model
y_pred = final_model.predict(X_test_scaled)
print(classification_report(y_test, y_pred, zero_division=0))

              precision    recall  f1-score   support

           0       1.00      0.64      0.78        14
           1       0.31      0.80      0.44         5
           2       0.89      0.76      0.82        21

    accuracy                           0.73        40
   macro avg       0.73      0.73      0.68        40
weighted avg       0.86      0.72      0.76        40


# Xgboost with params

In [191]:
# hyperparameter tuning for xgboost using hyperopt
from hyperopt import hp
import numpy as np
# Define the hyperparameter space

space = {
    'n_estimators': hp.quniform('n_estimators', 100, 1000, 50),
    'max_depth': hp.choice('max_depth', np.arange(3, 11, dtype=int)),
    'learning_rate': hp.loguniform('learning_rate', np.log(0.01), np.log(0.2)),
    'reg_lambda': hp.uniform('reg_lambda', 1, 10),
    'reg_alpha': hp.uniform('reg_alpha', 1, 10),
    'gamma': hp.uniform('gamma', 0, 1),
    'min_child_weight': hp.uniform('min_child_weight', 0, 10)
    
}

In [192]:
from hyperopt import STATUS_OK
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from xgboost import XGBClassifier

def objective(params):
    params['n_estimators'] = int(params['n_estimators'])
    clf = XGBClassifier(**params, objective='multi:softmax', num_class=3, eval_metric='mlogloss', verbosity=0)
    score = cross_val_score(clf, X_train_scaled, y_train_encoded, scoring='accuracy', cv=StratifiedKFold(10)).mean()
    return {'loss': -score, 'status': STATUS_OK}

In [193]:
# run the hyperparameter optimization
from hyperopt import tpe, Trials, fmin

trials = Trials()
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=10, trials=trials)

from sklearn.metrics import classification_report

best_params = {k: int(v) if k in ['n_estimators', 'max_depth'] else v for k, v in best.items()}
best_model_2 = XGBClassifier(**best_params, objective='multi:softmax', num_class=3, eval_metric='mlogloss', verbosity=0)
best_model_2.fit(X_train_scaled, y_train_encoded)

100%|██████████| 10/10 [00:36<00:00,  3.66s/trial, best loss: -0.60625]


In [196]:
# Evaluate the model
y_pred = best_model_2.predict(X_test_scaled)
print(classification_report(y_test, y_pred, zero_division=0))


      

              precision    recall  f1-score   support

           0       0.80      0.57      0.67        14
           1       0.22      0.40      0.29         5
           2       0.76      0.76      0.76        21

    accuracy                           0.65        40
   macro avg       0.59      0.58      0.57        40
weighted avg       0.71      0.65      0.67        40


In [183]:

from sklearn.preprocessing import LabelEncoder

# Initialize the LabelEncoder
label_encoder = LabelEncoder()

# Combine all unique team names from both HomeTeam and AwayTeam columns
all_teams = pd.concat([df['HomeTeam'], df['AwayTeam']]).unique()

# Fit the LabelEncoder with all unique team names
label_encoder.fit(all_teams)

# Transform both HomeTeam and AwayTeam using the fitted LabelEncoder
df['HomeTeam_encoded'] = label_encoder.transform(df['HomeTeam'])
df['AwayTeam_encoded'] = label_encoder.transform(df['AwayTeam'])

# # Now you can transform individual team names
# home_team_encoded = label_encoder.transform(['St Etienne'])[0]
# away_team_encoded = label_encoder.transform(['Laval'])[0]
# 
# # Debugging: Print encoded values
# print(f"Encoded Home Team: {home_team_encoded}, Encoded Away Team: {away_team_encoded}")

print(label_encoder.classes_)


['Ajaccio' 'Amiens' 'Angers' 'Annecy' 'Auxerre' 'Bastia' 'Bordeaux' 'Caen'
 'Concarneau' 'Dunkerque' 'Grenoble' 'Guingamp' 'Laval' 'Paris FC'
 'Pau FC' 'Quevilly Rouen' 'Rodez' 'St Etienne' 'Troyes' 'Valenciennes']


In [184]:

def predict_match(home_team, away_team, xgb_model, df, label_encoder):
     # Debugging: Check if the team names are in label_encoder's classes
    if home_team not in label_encoder.classes_:
        raise ValueError(f"Home team name '{home_team}' not recognized.")
    if away_team not in label_encoder.classes_:
        raise ValueError(f"Away team name '{away_team}' not recognized.")

    # Transform team names
    home_team_encoded = label_encoder.transform([home_team])[0]
    away_team_encoded = label_encoder.transform([away_team])[0]

    # Debugging: Print encoded values
    print(f"Encoded Home Team: {home_team_encoded}, Encoded Away Team: {away_team_encoded}")

    # Prepare the match data with relevant features
    match_data = {
        'HomeTeam_encoded': label_encoder.transform([home_team])[0],
        'AwayTeam_encoded': label_encoder.transform([away_team])[0],
        'HomeTeamRecentForm': calculate_form_points(home_team, df)[0],
        'AwayTeamRecentForm': calculate_form_points(away_team, df)[1],
        'HomeTeamAvgGoals': df[df['HomeTeam'] == home_team]['FTHG'].mean(),
        'AwayTeamAvgGoals': df[df['AwayTeam'] == away_team]['FTAG'].mean(),
        'HomeTeamPoints': df[df['HomeTeam'] == home_team]['FTR_encoded'].sum() / df[df['HomeTeam'] == home_team]['FTR_encoded'].count(),
        'AwayTeamPoints': df[df['AwayTeam'] == away_team]['FTR_encoded'].sum() / df[df['AwayTeam'] == away_team]['FTR_encoded'].count(),
        'HomeGoalsScoredVariance': df[df['HomeTeam'] == home_team]['FTHG'].var(),
        'AwayGoalsScoredVariance': df[df['AwayTeam'] == away_team]['FTAG'].var(),
        'HomeGoalsScoredAvg_3': df[df['HomeTeam'] == home_team]['FTHG'].rolling(3, min_periods=1).mean().iloc[-1],
        'AwayGoalsScoredAvg_3': df[df['AwayTeam'] == away_team]['FTAG'].rolling(3, min_periods=1).mean().iloc[-1],
        'HomeGoalsConcededAvg_3': df[df['HomeTeam'] == home_team]['FTAG'].rolling(3, min_periods=1).mean().iloc[-1],
        'AwayGoalsConcededAvg_3': df[df['AwayTeam'] == away_team]['FTHG'].rolling(3, min_periods=1).mean().iloc[-1],
        'HomeGoalsScoredAvg_5': df[df['HomeTeam'] == home_team]['FTHG'].rolling(5, min_periods=1).mean().iloc[-1],
        'AwayGoalsScoredAvg_5': df[df['AwayTeam'] == away_team]['FTAG'].rolling(5, min_periods=1).mean().iloc[-1],
        'HomeGoalsConcededAvg_5': df[df['HomeTeam'] == home_team]['FTAG'].rolling(5, min_periods=1).mean().iloc[-1],
        'AwayGoalsConcededAvg_5': df[df['AwayTeam'] == away_team]['FTHG'].rolling(5, min_periods=1).mean().iloc[-1],
        
        # Add other features as required by the model, calculated or retrieved as done during training
        # ...
    }

    match_df = pd.DataFrame([match_data])

    # Make predictions
    probabilities = xgb_model.predict_proba(match_df)[0]
    home_team_win_prob = probabilities[0]
    draw_prob = probabilities[1]
    away_team_win_prob = probabilities[2]

    print(f"{home_team} win probability: {home_team_win_prob}")
    print(f"Draw probability: {draw_prob}")
    print(f"{away_team} win probability: {away_team_win_prob}")

    return home_team_win_prob, draw_prob, away_team_win_prob



print('##########################')
print('Today\'s matches')

predict_match('Amiens', 'Annecy', xgb_model, df, label_encoder)

predict_match('Angers', 'Quevilly Rouen', xgb_model, df, label_encoder)

predict_match('Bordeaux', 'Valenciennes', xgb_model, df, label_encoder)

predict_match('Concarneau', 'Troyes', xgb_model, df, label_encoder)

predict_match('Dunkerque', 'Ajaccio', xgb_model, df, label_encoder)

predict_match('Guingamp', 'Rodez', xgb_model, df, label_encoder)

predict_match('Laval', 'Paris FC', xgb_model, df, label_encoder)

predict_match('Pau FC', 'St Etienne', xgb_model, df, label_encoder)

predict_match('Bastia', 'Caen', xgb_model, df, label_encoder)

# predict_match('Sheffield United', 'West Ham', xgb_model, df, label_encoder)
# 
# predict_match('Bournemouth', 'Liverpool', xgb_model, df, label_encoder)

##########################
Today's matches
Encoded Home Team: 1, Encoded Away Team: 3
Amiens win probability: 0.3297712504863739
Draw probability: 0.6173827648162842
Annecy win probability: 0.05284600704908371
Encoded Home Team: 2, Encoded Away Team: 15
Angers win probability: 0.12891191244125366
Draw probability: 0.5698744654655457
Quevilly Rouen win probability: 0.30121365189552307
Encoded Home Team: 6, Encoded Away Team: 19
Bordeaux win probability: 0.4814999997615814
Draw probability: 0.42215490341186523
Valenciennes win probability: 0.09634514153003693
Encoded Home Team: 8, Encoded Away Team: 18
Concarneau win probability: 0.17260992527008057
Draw probability: 0.8124164938926697
Troyes win probability: 0.01497358363121748
Encoded Home Team: 9, Encoded Away Team: 0
Dunkerque win probability: 0.7940241694450378
Draw probability: 0.08733724057674408
Ajaccio win probability: 0.11863861232995987
Encoded Home Team: 11, Encoded Away Team: 16
Guingamp win probability: 0.1839970350265503
D

(0.29326, 0.493308, 0.21343201)

# xgboost with hyperprams predictions

In [197]:

def predict_match(home_team, away_team, best_model_2, df, label_encoder):
     # Debugging: Check if the team names are in label_encoder's classes
    if home_team not in label_encoder.classes_:
        raise ValueError(f"Home team name '{home_team}' not recognized.")
    if away_team not in label_encoder.classes_:
        raise ValueError(f"Away team name '{away_team}' not recognized.")

    # Transform team names
    home_team_encoded = label_encoder.transform([home_team])[0]
    away_team_encoded = label_encoder.transform([away_team])[0]

    # Debugging: Print encoded values
    print(f"Encoded Home Team: {home_team_encoded}, Encoded Away Team: {away_team_encoded}")

    # Prepare the match data with relevant features
    match_data = {
        'HomeTeam_encoded': label_encoder.transform([home_team])[0],
        'AwayTeam_encoded': label_encoder.transform([away_team])[0],
        'HomeTeamRecentForm': calculate_form_points(home_team, df)[0],
        'AwayTeamRecentForm': calculate_form_points(away_team, df)[1],
        'HomeTeamAvgGoals': df[df['HomeTeam'] == home_team]['FTHG'].mean(),
        'AwayTeamAvgGoals': df[df['AwayTeam'] == away_team]['FTAG'].mean(),
        'HomeTeamPoints': df[df['HomeTeam'] == home_team]['FTR_encoded'].sum() / df[df['HomeTeam'] == home_team]['FTR_encoded'].count(),
        'AwayTeamPoints': df[df['AwayTeam'] == away_team]['FTR_encoded'].sum() / df[df['AwayTeam'] == away_team]['FTR_encoded'].count(),
        'HomeGoalsScoredVariance': df[df['HomeTeam'] == home_team]['FTHG'].var(),
        'AwayGoalsScoredVariance': df[df['AwayTeam'] == away_team]['FTAG'].var(),
        'HomeGoalsScoredAvg_3': df[df['HomeTeam'] == home_team]['FTHG'].rolling(3, min_periods=1).mean().iloc[-1],
        'AwayGoalsScoredAvg_3': df[df['AwayTeam'] == away_team]['FTAG'].rolling(3, min_periods=1).mean().iloc[-1],
        'HomeGoalsConcededAvg_3': df[df['HomeTeam'] == home_team]['FTAG'].rolling(3, min_periods=1).mean().iloc[-1],
        'AwayGoalsConcededAvg_3': df[df['AwayTeam'] == away_team]['FTHG'].rolling(3, min_periods=1).mean().iloc[-1],
        'HomeGoalsScoredAvg_5': df[df['HomeTeam'] == home_team]['FTHG'].rolling(5, min_periods=1).mean().iloc[-1],
        'AwayGoalsScoredAvg_5': df[df['AwayTeam'] == away_team]['FTAG'].rolling(5, min_periods=1).mean().iloc[-1],
        'HomeGoalsConcededAvg_5': df[df['HomeTeam'] == home_team]['FTAG'].rolling(5, min_periods=1).mean().iloc[-1],
        'AwayGoalsConcededAvg_5': df[df['AwayTeam'] == away_team]['FTHG'].rolling(5, min_periods=1).mean().iloc[-1],
        
        # Add other features as required by the model, calculated or retrieved as done during training
        # ...
    }

    match_df = pd.DataFrame([match_data])

    # Make predictions
    probabilities = best_model_2.predict_proba(match_df)[0]
    home_team_win_prob = probabilities[0]
    draw_prob = probabilities[1]
    away_team_win_prob = probabilities[2]

    print(f"{home_team} win probability: {home_team_win_prob}")
    print(f"Draw probability: {draw_prob}")
    print(f"{away_team} win probability: {away_team_win_prob}")

    return home_team_win_prob, draw_prob, away_team_win_prob



print('##########################')
print('Today\'s matches')

predict_match('Amiens', 'Annecy', best_model_2, df, label_encoder)

predict_match('Angers', 'Quevilly Rouen', best_model_2, df, label_encoder)

predict_match('Bordeaux', 'Valenciennes', best_model_2, df, label_encoder)

predict_match('Concarneau', 'Troyes', best_model_2, df, label_encoder)

predict_match('Dunkerque', 'Ajaccio', best_model_2, df, label_encoder)

predict_match('Guingamp', 'Rodez', best_model_2, df, label_encoder)

predict_match('Laval', 'Paris FC', best_model_2, df, label_encoder)

predict_match('Pau FC', 'St Etienne', best_model_2, df, label_encoder)

predict_match('Bastia', 'Caen', best_model_2, df, label_encoder)

predict_match('Grenoble', 'Auxerre', best_model_2, df, label_encoder)

##########################
Today's matches
Encoded Home Team: 1, Encoded Away Team: 3
Amiens win probability: 0.29702356457710266
Draw probability: 0.29851263761520386
Annecy win probability: 0.4044637382030487
Encoded Home Team: 2, Encoded Away Team: 15
Angers win probability: 0.2844446301460266
Draw probability: 0.28587064146995544
Quevilly Rouen win probability: 0.42968469858169556
Encoded Home Team: 6, Encoded Away Team: 19
Bordeaux win probability: 0.3038293719291687
Draw probability: 0.29562264680862427
Valenciennes win probability: 0.4005480110645294
Encoded Home Team: 8, Encoded Away Team: 18
Concarneau win probability: 0.3087896704673767
Draw probability: 0.3004489541053772
Troyes win probability: 0.3907614052295685
Encoded Home Team: 9, Encoded Away Team: 0
Dunkerque win probability: 0.4879855811595917
Draw probability: 0.2985893487930298
Ajaccio win probability: 0.21342508494853973
Encoded Home Team: 11, Encoded Away Team: 16
Guingamp win probability: 0.3038293719291687
Draw

(0.30117095, 0.3037606, 0.39506847)

# catboost with hyperparameter

In [185]:
def predict_match(home_team, away_team, final_model, df, label_encoder):
     # Debugging: Check if the team names are in label_encoder's classes
    if home_team not in label_encoder.classes_:
        raise ValueError(f"Home team name '{home_team}' not recognized.")
    if away_team not in label_encoder.classes_:
        raise ValueError(f"Away team name '{away_team}' not recognized.")

    # Transform team names
    home_team_encoded = label_encoder.transform([home_team])[0]
    away_team_encoded = label_encoder.transform([away_team])[0]

    # Debugging: Print encoded values
    print(f"Encoded Home Team: {home_team_encoded}, Encoded Away Team: {away_team_encoded}")

    # Prepare the match data with relevant features
    match_data = {
        'HomeTeam_encoded': label_encoder.transform([home_team])[0],
        'AwayTeam_encoded': label_encoder.transform([away_team])[0],
        'HomeTeamRecentForm': calculate_form_points(home_team, df)[0],
        'AwayTeamRecentForm': calculate_form_points(away_team, df)[1],
        'HomeTeamAvgGoals': df[df['HomeTeam'] == home_team]['FTHG'].mean(),
        'AwayTeamAvgGoals': df[df['AwayTeam'] == away_team]['FTAG'].mean(),
        'HomeTeamPoints': df[df['HomeTeam'] == home_team]['FTR_encoded'].sum() / df[df['HomeTeam'] == home_team]['FTR_encoded'].count(),
        'AwayTeamPoints': df[df['AwayTeam'] == away_team]['FTR_encoded'].sum() / df[df['AwayTeam'] == away_team]['FTR_encoded'].count(),
        'HomeGoalsScoredVariance': df[df['HomeTeam'] == home_team]['FTHG'].var(),
        'AwayGoalsScoredVariance': df[df['AwayTeam'] == away_team]['FTAG'].var(),
        'HomeGoalsScoredAvg_3': df[df['HomeTeam'] == home_team]['FTHG'].rolling(3, min_periods=1).mean().iloc[-1],
        'AwayGoalsScoredAvg_3': df[df['AwayTeam'] == away_team]['FTAG'].rolling(3, min_periods=1).mean().iloc[-1],
        'HomeGoalsConcededAvg_3': df[df['HomeTeam'] == home_team]['FTAG'].rolling(3, min_periods=1).mean().iloc[-1],
        'AwayGoalsConcededAvg_3': df[df['AwayTeam'] == away_team]['FTHG'].rolling(3, min_periods=1).mean().iloc[-1],
        'HomeGoalsScoredAvg_5': df[df['HomeTeam'] == home_team]['FTHG'].rolling(5, min_periods=1).mean().iloc[-1],
        'AwayGoalsScoredAvg_5': df[df['AwayTeam'] == away_team]['FTAG'].rolling(5, min_periods=1).mean().iloc[-1],
        'HomeGoalsConcededAvg_5': df[df['HomeTeam'] == home_team]['FTAG'].rolling(5, min_periods=1).mean().iloc[-1],
        'AwayGoalsConcededAvg_5': df[df['AwayTeam'] == away_team]['FTHG'].rolling(5, min_periods=1).mean().iloc[-1],
        
        # Add other features as required by the model, calculated or retrieved as done during training
        # ...
    }

    match_df = pd.DataFrame([match_data])

    # Make predictions
    probabilities = final_model.predict_proba(match_df)[0]
    home_team_win_prob = probabilities[0]
    draw_prob = probabilities[1]
    away_team_win_prob = probabilities[2]

    print(f"{home_team} win probability: {home_team_win_prob}")
    print(f"Draw probability: {draw_prob}")
    print(f"{away_team} win probability: {away_team_win_prob}")

    return home_team_win_prob, draw_prob, away_team_win_prob

print('Today\'s matches')

predict_match('Amiens', 'Annecy', final_model, df, label_encoder)

predict_match('Angers', 'Quevilly Rouen', final_model, df, label_encoder)

predict_match('Bordeaux', 'Valenciennes', final_model, df, label_encoder)

predict_match('Concarneau', 'Troyes', final_model, df, label_encoder)

predict_match('Dunkerque', 'Ajaccio', final_model, df, label_encoder)

predict_match('Guingamp', 'Rodez', final_model, df, label_encoder)

predict_match('Laval', 'Paris FC', final_model, df, label_encoder)

predict_match('Pau FC', 'St Etienne', final_model, df, label_encoder)

predict_match('Bastia', 'Caen', final_model, df, label_encoder)

predict_match('Grenoble', 'Auxerre', final_model, df, label_encoder)

Today's matches
Encoded Home Team: 1, Encoded Away Team: 3
Amiens win probability: 0.2761832621146922
Draw probability: 0.34683061143588423
Annecy win probability: 0.37698612644942353
Encoded Home Team: 2, Encoded Away Team: 15
Angers win probability: 0.285857621591056
Draw probability: 0.32580422267780695
Quevilly Rouen win probability: 0.3883381557311371
Encoded Home Team: 6, Encoded Away Team: 19
Bordeaux win probability: 0.2749811969814781
Draw probability: 0.3499158274161195
Valenciennes win probability: 0.37510297560240247
Encoded Home Team: 8, Encoded Away Team: 18
Concarneau win probability: 0.27640567800701127
Draw probability: 0.3749205156972292
Troyes win probability: 0.3486738062957595
Encoded Home Team: 9, Encoded Away Team: 0
Dunkerque win probability: 0.33224779935400117
Draw probability: 0.35225227973837886
Ajaccio win probability: 0.31549992090762
Encoded Home Team: 11, Encoded Away Team: 16
Guingamp win probability: 0.28017029808163113
Draw probability: 0.352675663533

(0.2753972886339138, 0.3682836562919781, 0.35631905507410816)

In [186]:

def predict_match(home_team, away_team, catboost_model, df, label_encoder):
     # Debugging: Check if the team names are in label_encoder's classes
    if home_team not in label_encoder.classes_:
        raise ValueError(f"Home team name '{home_team}' not recognized.")
    if away_team not in label_encoder.classes_:
        raise ValueError(f"Away team name '{away_team}' not recognized.")

    # Transform team names
    home_team_encoded = label_encoder.transform([home_team])[0]
    away_team_encoded = label_encoder.transform([away_team])[0]

    # Debugging: Print encoded values
    print(f"Encoded Home Team: {home_team_encoded}, Encoded Away Team: {away_team_encoded}")

    # Prepare the match data with relevant features
    match_data = {
        'HomeTeam_encoded': label_encoder.transform([home_team])[0],
        'AwayTeam_encoded': label_encoder.transform([away_team])[0],
        'HomeTeamRecentForm': calculate_form_points(home_team, df)[0],
        'AwayTeamRecentForm': calculate_form_points(away_team, df)[1],
        'HomeTeamAvgGoals': df[df['HomeTeam'] == home_team]['FTHG'].mean(),
        'AwayTeamAvgGoals': df[df['AwayTeam'] == away_team]['FTAG'].mean(),
        'HomeTeamPoints': df[df['HomeTeam'] == home_team]['FTR_encoded'].sum() / df[df['HomeTeam'] == home_team]['FTR_encoded'].count(),
        'AwayTeamPoints': df[df['AwayTeam'] == away_team]['FTR_encoded'].sum() / df[df['AwayTeam'] == away_team]['FTR_encoded'].count(),
        'HomeGoalsScoredVariance': df[df['HomeTeam'] == home_team]['FTHG'].var(),
        'AwayGoalsScoredVariance': df[df['AwayTeam'] == away_team]['FTAG'].var(),
        'HomeGoalsScoredAvg_3': df[df['HomeTeam'] == home_team]['FTHG'].rolling(3, min_periods=1).mean().iloc[-1],
        'AwayGoalsScoredAvg_3': df[df['AwayTeam'] == away_team]['FTAG'].rolling(3, min_periods=1).mean().iloc[-1],
        'HomeGoalsConcededAvg_3': df[df['HomeTeam'] == home_team]['FTAG'].rolling(3, min_periods=1).mean().iloc[-1],
        'AwayGoalsConcededAvg_3': df[df['AwayTeam'] == away_team]['FTHG'].rolling(3, min_periods=1).mean().iloc[-1],
        'HomeGoalsScoredAvg_5': df[df['HomeTeam'] == home_team]['FTHG'].rolling(5, min_periods=1).mean().iloc[-1],
        'AwayGoalsScoredAvg_5': df[df['AwayTeam'] == away_team]['FTAG'].rolling(5, min_periods=1).mean().iloc[-1],
        'HomeGoalsConcededAvg_5': df[df['HomeTeam'] == home_team]['FTAG'].rolling(5, min_periods=1).mean().iloc[-1],
        'AwayGoalsConcededAvg_5': df[df['AwayTeam'] == away_team]['FTHG'].rolling(5, min_periods=1).mean().iloc[-1],
        
        # Add other features as required by the model, calculated or retrieved as done during training
        # ...
    }

    match_df = pd.DataFrame([match_data])

    # Make predictions
    probabilities = catboost_model.predict_proba(match_df)[0]
    home_team_win_prob = probabilities[0]
    draw_prob = probabilities[1]
    away_team_win_prob = probabilities[2]

    print(f"{home_team} win probability: {home_team_win_prob}")
    print(f"Draw probability: {draw_prob}")
    print(f"{away_team} win probability: {away_team_win_prob}")

    return home_team_win_prob, draw_prob, away_team_win_prob



print('##########################')
print('Today\'s matches')

predict_match('Amiens', 'Annecy', catboost_model, df, label_encoder)

predict_match('Angers', 'Quevilly Rouen', catboost_model, df, label_encoder)

predict_match('Bordeaux', 'Valenciennes', catboost_model, df, label_encoder)

predict_match('Concarneau', 'Troyes', catboost_model, df, label_encoder)

predict_match('Dunkerque', 'Ajaccio', catboost_model, df, label_encoder)

predict_match('Guingamp', 'Rodez', catboost_model, df, label_encoder)

predict_match('Laval', 'Paris FC', catboost_model, df, label_encoder)

predict_match('Pau FC', 'St Etienne', catboost_model, df, label_encoder)

predict_match('Bastia', 'Caen', catboost_model, df, label_encoder)

# predict_match('Sheffield United', 'West Ham', catboost_model, df, label_encoder)
# 
# predict_match('Bournemouth', 'Liverpool', catboost_model, df, label_encoder)


##########################
Today's matches
Encoded Home Team: 1, Encoded Away Team: 3
Amiens win probability: 0.437402478311337
Draw probability: 0.3608135005431558
Annecy win probability: 0.20178402114550714
Encoded Home Team: 2, Encoded Away Team: 15
Angers win probability: 0.36771253137539134
Draw probability: 0.3454135584347304
Quevilly Rouen win probability: 0.2868739101898782
Encoded Home Team: 6, Encoded Away Team: 19
Bordeaux win probability: 0.4036275505763567
Draw probability: 0.40156919408249653
Valenciennes win probability: 0.19480325534114687
Encoded Home Team: 8, Encoded Away Team: 18
Concarneau win probability: 0.28516116388787066
Draw probability: 0.5685205471346554
Troyes win probability: 0.146318288977474
Encoded Home Team: 9, Encoded Away Team: 0
Dunkerque win probability: 0.5432330463279129
Draw probability: 0.31439639214400367
Ajaccio win probability: 0.14237056152808333
Encoded Home Team: 11, Encoded Away Team: 16
Guingamp win probability: 0.3980515676352216
Draw 

(0.43520478221026415, 0.32926718127872423, 0.2355280365110116)

In [187]:
from sklearn.impute import SimpleImputer
import numpy as np

# Initialize the imputer (you can change the strategy to 'median' or 'most_frequent' if more appropriate)
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')

# Fit on the training data and transform both training and testing data
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)




## using random forest

In [188]:
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV



# Then scale the imputed data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_imputed)
X_test_scaled = scaler.transform(X_test_imputed)

# Hyperparameter Tuning
model = RandomForestClassifier(random_state=42)
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
    
}
grid_search = GridSearchCV(model, param_grid, cv=3, scoring='f1_macro')
grid_search.fit(X_train_scaled, y_train_encoded)

# Best Model Evaluation
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test_scaled)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.89      0.57      0.70        14
           1       0.25      0.60      0.35         5
           2       0.89      0.81      0.85        21

    accuracy                           0.70        40
   macro avg       0.68      0.66      0.63        40
weighted avg       0.81      0.70      0.73        40


In [189]:

def predict_match(home_team, away_team, best_model, df, label_encoder,scaler):
     # Debugging: Check if the team names are in label_encoder's classes
    if home_team not in label_encoder.classes_:
        raise ValueError(f"Home team name '{home_team}' not recognized.")
    if away_team not in label_encoder.classes_:
        raise ValueError(f"Away team name '{away_team}' not recognized.")

    # Transform team names
    home_team_encoded = label_encoder.transform([home_team])[0]
    away_team_encoded = label_encoder.transform([away_team])[0]

    # Debugging: Print encoded values
    print(f"Encoded Home Team: {home_team_encoded}, Encoded Away Team: {away_team_encoded}")

    # Prepare the match data with relevant features
    match_data = {
        'HomeTeam_encoded': label_encoder.transform([home_team])[0],
        'AwayTeam_encoded': label_encoder.transform([away_team])[0],
        'HomeTeamRecentForm': calculate_form_points(home_team, df)[0],
        'AwayTeamRecentForm': calculate_form_points(away_team, df)[1],
        'HomeTeamAvgGoals': df[df['HomeTeam'] == home_team]['FTHG'].mean(),
        'AwayTeamAvgGoals': df[df['AwayTeam'] == away_team]['FTAG'].mean(),
        'HomeTeamPoints': df[df['HomeTeam'] == home_team]['FTR_encoded'].sum() / df[df['HomeTeam'] == home_team]['FTR_encoded'].count(),
        'AwayTeamPoints': df[df['AwayTeam'] == away_team]['FTR_encoded'].sum() / df[df['AwayTeam'] == away_team]['FTR_encoded'].count(),
        'HomeGoalsScoredVariance': df[df['HomeTeam'] == home_team]['FTHG'].var(),
        'AwayGoalsScoredVariance': df[df['AwayTeam'] == away_team]['FTAG'].var(),
        'HomeGoalsScoredAvg_3': df[df['HomeTeam'] == home_team]['FTHG'].rolling(3, min_periods=1).mean().iloc[-1],
        'AwayGoalsScoredAvg_3': df[df['AwayTeam'] == away_team]['FTAG'].rolling(3, min_periods=1).mean().iloc[-1],
        'HomeGoalsConcededAvg_3': df[df['HomeTeam'] == home_team]['FTAG'].rolling(3, min_periods=1).mean().iloc[-1],
        'AwayGoalsConcededAvg_3': df[df['AwayTeam'] == away_team]['FTHG'].rolling(3, min_periods=1).mean().iloc[-1],
        'HomeGoalsScoredAvg_5': df[df['HomeTeam'] == home_team]['FTHG'].rolling(5, min_periods=1).mean().iloc[-1],
        'AwayGoalsScoredAvg_5': df[df['AwayTeam'] == away_team]['FTAG'].rolling(5, min_periods=1).mean().iloc[-1],
        'HomeGoalsConcededAvg_5': df[df['HomeTeam'] == home_team]['FTAG'].rolling(5, min_periods=1).mean().iloc[-1],
        'AwayGoalsConcededAvg_5': df[df['AwayTeam'] == away_team]['FTHG'].rolling(5, min_periods=1).mean().iloc[-1],
        
        # Add other features as required by the model, calculated or retrieved as done during training
        # ...
    }

    match_df = pd.DataFrame([match_data])
    match_scaled = scaler.transform(match_df)
    # Make predictions
    home_team_win_prob = best_model.predict_proba(match_scaled)[0][0]
    away_team_win_prob = best_model.predict_proba(match_scaled)[0][2]
    draw_prob = best_model.predict_proba(match_scaled)[0][1]
    # Print the results
    print(f"{home_team} win probability: {home_team_win_prob}")
    print(f"{away_team} win probability: {away_team_win_prob}")
    print(f"Draw probability: {draw_prob}")
    
    return home_team_win_prob, away_team_win_prob, draw_prob



print('##########################')
print('Today\'s matches')

predict_match('Amiens', 'Annecy', best_model, df, label_encoder, scaler)

predict_match('Angers', 'Quevilly Rouen', best_model, df, label_encoder, scaler)

predict_match('Bordeaux', 'Valenciennes', best_model, df, label_encoder, scaler)

predict_match('Concarneau', 'Troyes', best_model, df, label_encoder, scaler)

predict_match('Dunkerque', 'Ajaccio', best_model, df, label_encoder, scaler)

predict_match('Guingamp', 'Rodez', best_model, df, label_encoder, scaler)

predict_match('Laval', 'Paris FC', best_model, df, label_encoder, scaler)

predict_match('Pau FC', 'St Etienne', best_model, df, label_encoder, scaler)

predict_match('Bastia', 'Caen', best_model, df, label_encoder, scaler)

# predict_match('Sheffield United', 'West Ham', best_model, df, label_encoder,scaler)
# 
# predict_match('Bournemouth', 'Liverpool', best_model, df, label_encoder,scaler)


##########################
Today's matches
Encoded Home Team: 1, Encoded Away Team: 3
Amiens win probability: 0.12
Annecy win probability: 0.24
Draw probability: 0.64
Encoded Home Team: 2, Encoded Away Team: 15
Angers win probability: 0.11
Quevilly Rouen win probability: 0.81
Draw probability: 0.08
Encoded Home Team: 6, Encoded Away Team: 19
Bordeaux win probability: 0.24
Valenciennes win probability: 0.21
Draw probability: 0.55
Encoded Home Team: 8, Encoded Away Team: 18
Concarneau win probability: 0.45
Troyes win probability: 0.15
Draw probability: 0.4
Encoded Home Team: 9, Encoded Away Team: 0
Dunkerque win probability: 0.63
Ajaccio win probability: 0.13
Draw probability: 0.24
Encoded Home Team: 11, Encoded Away Team: 16
Guingamp win probability: 0.34




Rodez win probability: 0.1
Draw probability: 0.56
Encoded Home Team: 12, Encoded Away Team: 13
Laval win probability: 0.69
Paris FC win probability: 0.01
Draw probability: 0.3
Encoded Home Team: 14, Encoded Away Team: 17
Pau FC win probability: 0.05
St Etienne win probability: 0.38
Draw probability: 0.57
Encoded Home Team: 5, Encoded Away Team: 7
Bastia win probability: 0.02
Caen win probability: 0.44
Draw probability: 0.54




(0.02, 0.44, 0.54)

In [190]:
# # save the 3 models
# import joblib
# 
# # Save the model as a pickle file
# joblib.dump(best_model, 'models/best_model.pkl')
# # joblib.dump(label_encoder, 'label_encoder.pkl')
# # joblib.dump(scaler, 'scaler.pkl')
#     
# # save the xgb model
# import pickle
# 
# # Save the model as a pickle file
# pickle.dump(xgb_model, open('models/xgb_model.pkl', 'wb'))
# # pickle.dump(label_encoder, open('label_encoder.pkl', 'wb'))
# # pickle.dump(scaler, open('scaler.pkl', 'wb'))
# 
# # save the catboost model
# import pickle
# 
# # Save the model as a pickle file
# pickle.dump(catboost_model, open('models/catboost_model.pkl', 'wb'))
# pickle.dump(label_encoder, open('models/label_encoder.pkl', 'wb'))
# pickle.dump(scaler, open('models/scaler.pkl', 'wb'))
# 
