In [10]:
import pandas as pd
import numpy as np
import os as os
from IPython.display import display
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.naive_bayes import MultinomialNB,CategoricalNB
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score,f1_score
from sklearn.preprocessing import MinMaxScaler #fixed import
from statistics import mean

### Load data

In [11]:
dfs_train = {}
dfs_test = {}

In [12]:
def drop_columns_with_div(df):
    # Get a list of columns containing "Div" in their name
    div_cols = [col for col in df.columns if 'Div' in col]

    # Drop the identified columns
    df.drop(columns=div_cols, inplace=True)

    return df

In [13]:
for root, directory, files in os.walk("data/train_preprocessed", topdown=False):
    if files:
        for file in files:
            dfs_train[file[:-4]] = pd.read_csv(f"{root}/{file}")
            dfs_train[file[:-4]] = drop_columns_with_div(dfs_train[file[:-4]])
            try:
                dfs_train[file[:-4]] = pd.get_dummies(dfs_train[file[:-4]], columns=["Avg_bookie_prediction"], prefix='Bookie_Prediction')
            except KeyError:
                pass
lens_test = 0
for root, directory, files in os.walk("data/test_preprocessed", topdown=False):
    if files:
        for file in files:
            dfs_test[file[:-4]] = pd.read_csv(f"{root}/{file}")
            dfs_test[file[:-4]] = drop_columns_with_div(dfs_test[file[:-4]])
    try:
        dfs_test[file[:-4]] = pd.get_dummies(dfs_test[file[:-4]], columns=["Avg_bookie_prediction"], prefix='Bookie_Prediction')
    except KeyError:
        pass
            # lens_test += dfs_test[file[:-4]].shape[0]
print("---")
lens_orig = 0
df_test_y = pd.DataFrame()
for root, directory, files in os.walk("data/orig_data", topdown=False):
    if files:
        for file in files:
            # print(pd.read_csv(f"{root}/{file}").shape)
            tmp = pd.read_csv(f"{root}/{file}")
            lens_orig += tmp.shape[0]
            tmp["country"] = file[:-5]
            
            df_test_y = pd.concat([df_test_y, tmp], axis=0)

---


In [14]:
dfs_test_copy = dfs_test

In [15]:
for country in df_test_y["country"].unique():
    col1 = df_test_y[df_test_y["country"] == country]["FTHG"].reset_index()
    col2 = df_test_y[df_test_y["country"] == country]["FTAG"].reset_index()

    target_values = col1["FTHG"] + col2["FTAG"]

    dfs_test[f"df_{country}"]["Target_regr"] = target_values
    dfs_test[f"df_{country}"]["FTHG"] = col1["FTHG"]
    dfs_test[f"df_{country}"]["FTAG"] = col2["FTAG"]
    dfs_test[f"df_{country}"]['Target_clas'] = [0 if a > h else 1 if h > a else -1 for a, h in zip(dfs_test[f"df_{country}"]['FTAG'], dfs_test[f"df_{country}"]['FTHG'])]
    dfs_test[f"df_{country}"].drop(columns=["FTHG", "FTAG", "Unnamed: 0"], inplace=True)
    

In [17]:
def impute_nan_values(dfs):
    for df in dfs.values():
        for col in df.columns:
            if df[col].dtype == np.float64 or df[col].dtype == np.int64:
                df[col] = df.groupby("season")[col].transform(lambda x: x.fillna(x.mean()))
        df.dropna(inplace=True)
impute_nan_values(dfs_train)
impute_nan_values(dfs_test)

In [18]:
# validation set
dfs_valid_reg_X = {}
dfs_valid_reg_y = {}
dfs_train_reg_X = {}
dfs_train_reg_y = {}
dfs_valid_clas_X = {}
dfs_valid_clas_y = {}
dfs_train_clas_X = {}
dfs_train_clas_y = {}
dfs_test_clas_X = {}
dfs_test_clas_y = {}
dfs_test_reg_X = {}
dfs_test_reg_y = {}

cols_to_drop = ['FTHG', 'FTAG', 'MatchTeams', 'SameHomeTeam', 'Target', 'Target_regr', 'Target_clas', "Unnamed: 0", "index", "Date", "Time"]

for country in dfs_train:
    dfs_train[country] = dfs_train[country][dfs_train[country]["season"] > 17]
    dfs_train[country]['Target_clas'] = [0 if a > h else 1 if h > a else -1 for a, h in zip(dfs_train[country]['FTAG'], dfs_train[country]['FTHG'])]

    dfs_valid_reg_y[country[3:]] = dfs_train[country][dfs_train[country]["season"] == 21]["Target_regr"]
    dfs_valid_reg_X[country[3:]] = dfs_train[country][dfs_train[country]["season"] == 21].drop(columns=cols_to_drop, errors='ignore')
    
    dfs_train_reg_y[country[3:]] = dfs_train[country][dfs_train[country]["season"] != 21]["Target_regr"]
    dfs_train_reg_X[country[3:]] = dfs_train[country][dfs_train[country]["season"] != 21].drop(columns=cols_to_drop, errors='ignore')
    
    dfs_train_clas_y[country[3:]] = dfs_train[country][dfs_train[country]["season"] != 21]["Target_clas"]
    dfs_train_clas_X[country[3:]] = dfs_train[country][dfs_train[country]["season"] != 21].drop(columns=cols_to_drop, errors='ignore')

    dfs_valid_clas_y[country[3:]] = dfs_train[country][dfs_train[country]["season"] == 21]["Target_clas"]
    dfs_valid_clas_X[country[3:]] = dfs_train[country][dfs_train[country]["season"] == 21].drop(columns=cols_to_drop, errors='ignore')

for country in dfs_test:
    dfs_test_reg_y[country[3:]] = dfs_test[country][dfs_test[country]["season"] != 21]["Target_regr"]
    dfs_test_reg_X[country[3:]] = dfs_test[country][dfs_test[country]["season"] != 21].drop(columns=cols_to_drop, errors='ignore')

    dfs_test_clas_y[country[3:]] = dfs_test[country][dfs_test[country]["season"] != 21]["Target_clas"]
    dfs_test_clas_X[country[3:]] = dfs_test[country][dfs_test[country]["season"] != 21].drop(columns=cols_to_drop, errors='ignore')

In [19]:
train_results = pd.DataFrame(columns=["country"])
test_results = pd.DataFrame(columns=["country"])

In [20]:
train_results["country"] = dfs_train_clas_X.keys()
test_results["country"] = dfs_train_clas_X.keys()

In [21]:
leagues = {}
for country in dfs_train_clas_X.keys():
    leagues[country] = dfs_test_clas_X[country]["league"]

In [22]:
scaler = MinMaxScaler()
for country in dfs_train_clas_X.keys():
    leagues[country] = dfs_test_clas_X[country]["league"]

    # Identify numeric columns
    numeric_columns = dfs_train_clas_X[country].select_dtypes(include=['number']).columns

    # Scale only the numeric columns
    dfs_train_clas_X[country][numeric_columns] = scaler.fit_transform(dfs_train_clas_X[country][numeric_columns])
    dfs_valid_clas_X[country][numeric_columns] = scaler.transform(dfs_valid_clas_X[country][numeric_columns])
    dfs_test_clas_X[country][numeric_columns] = scaler.transform(dfs_test_clas_X[country][numeric_columns])

In [23]:
leagues["england"].unique()

array([0., 1., 2., 3.])

# Classification task
We decided to go with Voting Classifier consisting of 3 classification algorithms -  Gaussian Naive Bayes, RandomForestClassifier & Logistic Regression

### Logistic Regression

In [24]:
dfs_test_predict = {}
dfs_train_predict = {}
dfs_valid_predict = {}
dfs_best_params_lr = {}
C_values = [0.001, 0.01, 0.1, 1, 10, 100]  # Example values for regularization parameter C
penalty_values = ['l1', 'l2']  # Example values for penalty (regularization term)

for country in dfs_train_clas_X.keys():
    max_value = 0
    result = [[0 for _ in range(len(penalty_values))] for _ in range(len(C_values))]
    max_i = -1
    max_j = -1

    for i, C in enumerate(C_values):
        for j, penalty in enumerate(penalty_values):
            lr = LogisticRegression(C=C, penalty=penalty, solver='liblinear', max_iter=10000)
            lr.fit(dfs_train_clas_X[country], dfs_train_clas_y[country])
            dfs_valid_predict[country] = lr.predict(dfs_valid_clas_X[country])
            result[i][j] = f1_score(dfs_valid_clas_y[country], dfs_valid_predict[country], average='macro')

    for i in range(len(C_values)):
        for j in range(len(penalty_values)):
            current_value = result[i][j]
            if current_value > max_value:
                max_value = current_value
                max_i = i
                max_j = j

    best_C = C_values[max_i]
    best_penalty = penalty_values[max_j]
    dfs_best_params_lr[country] = {"C": best_C, "penalty": best_penalty}
    test_results.loc[train_results["country"] == country, "lr"] = max_value
    print(f"Best performance on validation data for {country} is {max_value} for C = {best_C} and penalty = {best_penalty}")


Best performance on validation data for belgium is 0.40676487255434624 for C = 10 and penalty = l1
Best performance on validation data for england is 0.39070511142421166 for C = 10 and penalty = l1
Best performance on validation data for france is 0.4037236424660721 for C = 1 and penalty = l1
Best performance on validation data for germany is 0.366087962962963 for C = 0.1 and penalty = l1
Best performance on validation data for greece is 0.43630099444052933 for C = 0.1 and penalty = l2
Best performance on validation data for italy is 0.4450274784647433 for C = 1 and penalty = l1
Best performance on validation data for netherlands is 0.4488143754348474 for C = 10 and penalty = l2
Best performance on validation data for portugal is 0.4419271748038871 for C = 100 and penalty = l2
Best performance on validation data for scotland is 0.39457980191270264 for C = 10 and penalty = l2
Best performance on validation data for spain is 0.409007792346506 for C = 100 and penalty = l1
Best performance

### Random Forest Model

Utilization of basic grid search to find best parameters

In [25]:
rfm = RandomForestClassifier(random_state=42)

In [26]:
train_results_random_forest = {}
def random_forest(params):
    rfm.set_params(**params)
    rfm.fit(dfs_train_clas_X[country], dfs_train_clas_y[country])
    dfs_train_predict[country] = rfm.predict(dfs_train_clas_X[country])
    dfs_valid_predict[country] = rfm.predict(dfs_valid_clas_X[country])
    dfs_test_predict[country] = rfm.predict(dfs_test_clas_X[country])
        
    # print(f"Average Depth of Decision Trees for {country}: {average_depth}")
    train_results[params["max_depth"]] = f1_score(dfs_valid_clas_y[country], dfs_valid_predict[country], average='macro')
    result_train = f1_score(dfs_train_clas_y[country], dfs_train_predict[country], average='macro')
    result_valid = f1_score(dfs_valid_clas_y[country], dfs_valid_predict[country], average='macro')
    result_test =  f1_score(dfs_test_clas_y[country], dfs_test_predict[country], average='macro')
    return result_valid

In [27]:
dfs_train_predict = {}
dfs_valid_predict = {}
dfs_best_params_rfm = {}
max_depth_values = [i for i in range(10, 25)]
min_samples_split_values = [2, 5, 15, 10, 20]

for country in dfs_train_clas_X.keys():
    max_value = 0
    result = [[0 for _ in range(len(min_samples_split_values))] for _ in range(len(max_depth_values))]
    max_i = -1
    max_j = -1

    for i, max_depth in enumerate(max_depth_values):
        for j, min_samples_split in enumerate(min_samples_split_values):
            result[i][j] = random_forest({"max_depth": max_depth + 1, "min_samples_split": min_samples_split})

    for i in range(len(max_depth_values)):
        for j in range(len(min_samples_split_values)):
            current_value = result[i][j]
            if current_value > max_value:
                max_value = current_value
                max_i = i
                max_j = j

    best_max_depth = max_depth_values[max_i]
    best_min_samples_split = min_samples_split_values[max_j]
    test_results.loc[test_results["country"] == country, "rfm"] = max_value
    dfs_best_params_rfm[country] = {"max_depth": best_max_depth, "min_samples_split": best_min_samples_split}

    print(f"Best performance on validation data for {country} is {max_value} for max_depth = {best_max_depth} and min_samples_split = {best_min_samples_split}")

Best performance on validation data for belgium is 0.4469785533615321 for max_depth = 12 and min_samples_split = 2
Best performance on validation data for england is 0.40347792539676997 for max_depth = 24 and min_samples_split = 2
Best performance on validation data for france is 0.4501253123850208 for max_depth = 23 and min_samples_split = 2
Best performance on validation data for germany is 0.4146716155013576 for max_depth = 24 and min_samples_split = 2
Best performance on validation data for greece is 0.4473618453930929 for max_depth = 16 and min_samples_split = 5
Best performance on validation data for italy is 0.45523025012097684 for max_depth = 17 and min_samples_split = 5
Best performance on validation data for netherlands is 0.46151989456288867 for max_depth = 17 and min_samples_split = 2
Best performance on validation data for portugal is 0.5231190871478641 for max_depth = 11 and min_samples_split = 2
Best performance on validation data for scotland is 0.4111449774066567 for m

### MultinomialNaive Bayes

In [28]:
nb = MultinomialNB()

In [29]:
dfs_train_predict = {}
dfs_valid_predict = {}
for country in dfs_train_clas_X.keys():
    scaler = MinMaxScaler()
    
    # X_train = scaler.fit_transform(dfs_train_clas_X[country])
    # X_valid = scaler.fit_transform(dfs_valid_clas_X[country])
    # X_test = scaler.fit_transform(dfs_test_clas_X[country])
    
    nb.fit(dfs_train_clas_X[country], dfs_train_clas_y[country])
    dfs_train_predict[country] = nb.predict(dfs_train_clas_X[country])
    dfs_valid_predict[country] = nb.predict(dfs_valid_clas_X[country])
    dfs_test_predict[country] = nb.predict(dfs_test_clas_X[country])

    test_results.loc[test_results["country"] == country, "nb"] = f1_score(dfs_valid_clas_y[country], dfs_valid_predict[country], average='macro')
    # test_results.loc[test_results["country"] == country, "nb"] = f1_score(dfs_test_clas_y[country], dfs_test_predict[country], average='macro')

    print(f"F1-score for baseline Multinomial Naive Bayes model on train data for {country}: {f1_score(dfs_train_clas_y[country], dfs_train_predict[country], average='macro')}")
    print(f"F1-score for baseline Multinomial Naive Bayes model on validation data for {country}: {f1_score(dfs_valid_clas_y[country], dfs_valid_predict[country], average='macro')}")
    print(f"F1-score for baseline Multinomial Naive Bayes model on test data for {country}: {f1_score(dfs_test_clas_y[country], dfs_test_predict[country], average='macro')}\n --------")


F1-score for baseline Multinomial Naive Bayes model on train data for belgium: 0.42855553042622935
F1-score for baseline Multinomial Naive Bayes model on validation data for belgium: 0.4018406843007843
F1-score for baseline Multinomial Naive Bayes model on test data for belgium: 0.45186126065854165
 --------
F1-score for baseline Multinomial Naive Bayes model on train data for england: 0.46418180895356476
F1-score for baseline Multinomial Naive Bayes model on validation data for england: 0.37246179695679243
F1-score for baseline Multinomial Naive Bayes model on test data for england: 0.36166073721735464
 --------
F1-score for baseline Multinomial Naive Bayes model on train data for france: 0.4854222740765013
F1-score for baseline Multinomial Naive Bayes model on validation data for france: 0.41928853654021686
F1-score for baseline Multinomial Naive Bayes model on test data for france: 0.4240027166856435
 --------
F1-score for baseline Multinomial Naive Bayes model on train data for ger

### KNeighborsClassifier

In [30]:
kn = KNeighborsClassifier()

In [31]:
dfs_train_predict = {}
dfs_valid_predict = {}
for country in dfs_train_clas_X.keys():
    kn.fit(dfs_train_clas_X[country], dfs_train_clas_y[country])
    dfs_train_predict[country] = kn.predict(dfs_train_clas_X[country])
    dfs_valid_predict[country] = kn.predict(dfs_valid_clas_X[country])
    dfs_test_predict[country] = kn.predict(dfs_test_clas_X[country])
    test_results.loc[test_results["country"] == country, "kn"] = f1_score(dfs_valid_clas_y[country], dfs_valid_predict[country], average='macro')
    # test_results.loc[test_results["country"] == country, "kn"] = f1_score(dfs_test_clas_y[country], dfs_test_predict[country], average='macro')

    print(f"F1-score for baseline KNeighbors model on train data for {country}: {f1_score(dfs_train_clas_y[country], dfs_train_predict[country], average='macro')}")
    print(f"F1-score for baseline KNeighbors model on validation data for {country}: {f1_score(dfs_valid_clas_y[country], dfs_valid_predict[country], average='macro')}")
    print(f"F1-score for baseline KNeighbors model on test data for {country}: {f1_score(dfs_test_clas_y[country], dfs_test_predict[country], average='macro')}\n --------")

F1-score for baseline KNeighbors model on train data for belgium: 0.5956657523796139
F1-score for baseline KNeighbors model on validation data for belgium: 0.4063827335635992
F1-score for baseline KNeighbors model on test data for belgium: 0.3828666746990084
 --------
F1-score for baseline KNeighbors model on train data for england: 0.5894038189987336
F1-score for baseline KNeighbors model on validation data for england: 0.3672349280260135
F1-score for baseline KNeighbors model on test data for england: 0.3526896760514356
 --------
F1-score for baseline KNeighbors model on train data for france: 0.5893777623544728
F1-score for baseline KNeighbors model on validation data for france: 0.3873262007695735
F1-score for baseline KNeighbors model on test data for france: 0.3935528295993412
 --------
F1-score for baseline KNeighbors model on train data for germany: 0.6071573809090861
F1-score for baseline KNeighbors model on validation data for germany: 0.3734543373915255
F1-score for baseline

### Gradient Boosting Classifier

In [32]:
gbc = GradientBoostingClassifier(random_state=42)

In [33]:
dfs_train_predict = {}
dfs_valid_predict = {}
for country in dfs_train_clas_X.keys():
    gbc.fit(dfs_train_clas_X[country], dfs_train_clas_y[country])
    dfs_train_predict[country] = gbc.predict(dfs_train_clas_X[country])
    dfs_valid_predict[country] = gbc.predict(dfs_valid_clas_X[country])
    dfs_test_predict[country] = gbc.predict(dfs_test_clas_X[country])

    test_results.loc[test_results["country"] == country, "gbc"] = f1_score(dfs_valid_clas_y[country], dfs_valid_predict[country], average='macro')
    # test_results.loc[test_results["country"] == country, "gbc"] = f1_score(dfs_test_clas_y[country], dfs_test_predict[country], average='macro')
    print(f"F1-score for baseline GradientBoostingClassifier model on train data for {country}: {f1_score(dfs_train_clas_y[country], dfs_train_predict[country], average='macro')}")
    print(f"F1-score for baseline GradientBoostingClassifier model on validation data for {country}: {f1_score(dfs_valid_clas_y[country], dfs_valid_predict[country], average='macro')}")
    print(f"F1-score for baseline GradientBoostingClassifier model on test data for {country}: {f1_score(dfs_test_clas_y[country], dfs_test_predict[country], average='macro')}\n --------")

F1-score for baseline GradientBoostingClassifier model on train data for belgium: 0.9732463877195846
F1-score for baseline GradientBoostingClassifier model on validation data for belgium: 0.4180688162886305
F1-score for baseline GradientBoostingClassifier model on test data for belgium: 0.40327229419433985
 --------
F1-score for baseline GradientBoostingClassifier model on train data for england: 0.5974750127516913
F1-score for baseline GradientBoostingClassifier model on validation data for england: 0.3831094568174671
F1-score for baseline GradientBoostingClassifier model on test data for england: 0.37912762263840727
 --------
F1-score for baseline GradientBoostingClassifier model on train data for france: 0.8224382023286632
F1-score for baseline GradientBoostingClassifier model on validation data for france: 0.4069527603870234
F1-score for baseline GradientBoostingClassifier model on test data for france: 0.4078353502279672
 --------
F1-score for baseline GradientBoostingClassifier m

### Ada Boosting Classifier

In [34]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import f1_score

dfs_train_predict = {}
dfs_valid_predict = {}
dfs_test_predict = {}

for country in dfs_train_clas_X.keys():
    # Create an AdaBoostClassifier
    abc = AdaBoostClassifier(n_estimators=50, random_state=42)  # You can adjust the hyperparameters as needed

    # Train the AdaBoostClassifier
    abc.fit(dfs_train_clas_X[country], dfs_train_clas_y[country])

    # Make predictions on train, validation, and test sets
    dfs_train_predict[country] = abc.predict(dfs_train_clas_X[country])
    dfs_valid_predict[country] = abc.predict(dfs_valid_clas_X[country])
    dfs_test_predict[country] = abc.predict(dfs_test_clas_X[country])

    # Calculate and store F1-score on validation set
    test_results.loc[test_results["country"] == country, "abc"] = f1_score(dfs_valid_clas_y[country], dfs_valid_predict[country], average='macro')
    # test_results.loc[test_results["country"] == country, "abc"] = f1_score(dfs_test_clas_y[country], dfs_test_predict[country], average='macro')

    # Print F1-scores
    print(f"F1-score for baseline AdaBoostClassifier model on train data for {country}: {f1_score(dfs_train_clas_y[country], dfs_train_predict[country], average='macro')}")
    print(f"F1-score for baseline AdaBoostClassifier model on validation data for {country}: {f1_score(dfs_valid_clas_y[country], dfs_valid_predict[country], average='macro')}")
    print(f"F1-score for baseline AdaBoostClassifier model on test data for {country}: {f1_score(dfs_test_clas_y[country], dfs_test_predict[country], average='macro')}\n --------")


F1-score for baseline AdaBoostClassifier model on train data for belgium: 0.6071820013242607
F1-score for baseline AdaBoostClassifier model on validation data for belgium: 0.41347361772033153
F1-score for baseline AdaBoostClassifier model on test data for belgium: 0.402224017729969
 --------
F1-score for baseline AdaBoostClassifier model on train data for england: 0.44678403450765036
F1-score for baseline AdaBoostClassifier model on validation data for england: 0.4015629213123116
F1-score for baseline AdaBoostClassifier model on test data for england: 0.36640692210123965
 --------
F1-score for baseline AdaBoostClassifier model on train data for france: 0.5385211687702087
F1-score for baseline AdaBoostClassifier model on validation data for france: 0.39494952744692063
F1-score for baseline AdaBoostClassifier model on test data for france: 0.42741770309795557
 --------
F1-score for baseline AdaBoostClassifier model on train data for germany: 0.5390574650849018
F1-score for baseline AdaBo

### Implementing Voting Classifier
Our voting classifier is designed to use the 3 best performing classifiers and use them for the final prediction 

In [35]:
best_models = []
for i, row in test_results.iterrows():
    best_models.append(pd.to_numeric(row[["lr", "rfm", "nb", "kn", "gbc", "abc"]]).nlargest(4).index.tolist())

In [36]:
def voting_classifier(best_models_country, X_train, y_train, X_val, y_val, country: str, X_test, y_test):
    """"""
    models = {
        "nb": nb,
        "kn": kn,
        "gbc": GradientBoostingClassifier(random_state=42),
        "rfm": RandomForestClassifier(random_state=42),
        "lr": LogisticRegression(random_state=42, solver='liblinear', max_iter=10000),
        "abc": AdaBoostClassifier()
        
    }
    print(best_models_country)
    clf1 = models[best_models_country[0]] if best_models_country[0] != "rfm" else models[best_models_country[0]].set_params(**dfs_best_params_rfm[country]) if best_models_country[0] != "lr" else models[best_models_country[0]].set_params(**dfs_best_params_lr[country])
    clf2 = models[best_models_country[1]] if best_models_country[1] != "rfm" else models[best_models_country[1]].set_params(**dfs_best_params_rfm[country]) if best_models_country[0] != "lr" else models[best_models_country[0]].set_params(**dfs_best_params_lr[country])
    clf3 = models[best_models_country[2]] if best_models_country[2] != "rfm" else models[best_models_country[2]].set_params(**dfs_best_params_rfm[country]) if best_models_country[0] != "lr" else models[best_models_country[0]].set_params(**dfs_best_params_lr[country])
    
    eclf = VotingClassifier(
        estimators=[(best_models_country[0], clf1 ), (best_models_country[1], clf2 ),(best_models_country[2], clf3 )],
        voting='soft', weights=[1.6,1,1]
    ) 
    eclf.fit(X_train,y_train)
    
    y_train_predict = eclf.predict(X_train)
    y_val_predict = eclf.predict(X_val)
    
    y_test_predict = eclf.predict(X_test)
    clf1.fit(X_train, y_train)
    y_valid_best_model = clf1.predict(X_val)
    y_test_best_model = clf1.predict(X_test)
    
    print(f"F1-score for voting classifier model on train data for {country}: {f1_score(y_train, y_train_predict, average='macro')}")
    print(f"F1-score for voting classifier model on validation data for {country}: {f1_score(y_val, y_val_predict, average='macro')}")
    print(f"F1-score for voting classifier model on test data for {country}: {f1_score(y_test, y_test_predict, average='macro')}")
    print("\n")
    print(f"F1-score for best model ({best_models_country[0]}) on valid data for {country}: {f1_score(y_val, y_valid_best_model, average='macro')}")
    print(f"F1-score for best model ({best_models_country[0]}) on test data for {country}: {f1_score(y_test, y_test_best_model, average='macro')}\n --------")
    final_prediction = y_test_predict if f1_score(y_val, y_val_predict, average='macro') > f1_score(y_val, y_valid_best_model, average='macro') else y_test_best_model

    return final_prediction, max(f1_score(y_test, y_test_best_model, average='macro'), f1_score(y_val, y_val_predict, average='macro'))
    

In [37]:
final_predict = {}
final_f1_score = [0 for _ in range(len(dfs_train_clas_X.keys()))]
for i, country in enumerate(dfs_train_clas_X.keys()):
    final_predict[country], final_f1_score[i] = voting_classifier(best_models[i], dfs_train_clas_X[country], dfs_train_clas_y[country], dfs_valid_clas_X[country], dfs_valid_clas_y[country],country, dfs_test_clas_X[country], dfs_test_clas_y[country])

['rfm', 'gbc', 'abc', 'lr']
F1-score for voting classifier model on train data for belgium: 0.9978463190364916
F1-score for voting classifier model on validation data for belgium: 0.4338941580062557
F1-score for voting classifier model on test data for belgium: 0.4215135106439454


F1-score for best model (rfm) on valid data for belgium: 0.42281606967515284
F1-score for best model (rfm) on test data for belgium: 0.4370509795070321
 --------
['rfm', 'abc', 'lr', 'gbc']
F1-score for voting classifier model on train data for england: 0.9078504752768765
F1-score for voting classifier model on validation data for england: 0.3892855945580786
F1-score for voting classifier model on test data for england: 0.36515996995259137


F1-score for best model (rfm) on valid data for england: 0.3920182822725506
F1-score for best model (rfm) on test data for england: 0.37781303618620377
 --------
['rfm', 'nb', 'gbc', 'lr']
F1-score for voting classifier model on train data for france: 0.9712747716131406


In [38]:
# we used test data only for our curiosity, we made no choices about the models used as that would defeat the purpose of the assignment
mean(final_f1_score)

0.4408238628501137

In [39]:
final_f1_score 

[0.4370509795070321,
 0.3892855945580786,
 0.42720436753807217,
 0.4106279101142915,
 0.49360018403496664,
 0.44874209063570475,
 0.46408987084529985,
 0.48973871080773,
 0.405895631925397,
 0.4378340396416451,
 0.44499311174303324]

In [40]:
for country in final_predict:
    dfs_test_clas_X[country]["season"] = leagues[country]
    dfs_test_clas_X[country]["final_predict"] = np.where(final_predict[country] == 0, 'A', np.where(final_predict[country] == 1, 'H', 'D'))
    df = dfs_test_clas_X[country]
    for league in list(df["league"].unique()):
        df[df["league"] == league]["final_predict"].to_csv(f"final_results/{country}_{int(league)}.csv", index= False, header=False)