In [275]:
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

pd.set_option('display.max_columns', 150)


In [276]:
training_dataset = pd.read_csv("../data/training_clean.csv", index_col=0)
testing_dataset = pd.read_csv("../data/testing_clean.csv", index_col=0)

# <h1 style='font-size:30px;'>Encoding</h1>

In this step, we transform categorical team names and matches into numerical representations for machine learning models. Methods:

1. **Label Encoding**: Assigns a unique integer to result of each match.
2. **One-Hot Encoding**: Creates binary columns for each team, with a `1` indicating the presence of a particular team.

In [277]:
def team_encode(df):
    all_teams = pd.concat([df['HomeTeam'], df['AwayTeam']]).unique()
    one_hot_encoder = OneHotEncoder(sparse_output=False)
    team_encoded = one_hot_encoder.fit_transform(df[['HomeTeam', 'AwayTeam']])
    team_encoded_df = pd.DataFrame(team_encoded, columns=one_hot_encoder.get_feature_names_out(['HomeTeam', 'AwayTeam']))
    df = pd.concat([df.reset_index(drop=True), team_encoded_df.reset_index(drop=True)], axis=1)
    return df, team_encoded_df

def match_encode(df):
    label_encoder = LabelEncoder()
    df["FTR_encoded"] = label_encoder.fit_transform(df["FTR"])
    return df

In [278]:
training_dataset, training_team_encoded_df = team_encode(training_dataset)
testing_dataset, testing_team_encoded_df = team_encode(testing_dataset)

training_dataset = match_encode(training_dataset)
testing_dataset = match_encode(testing_dataset)

Let's look at the match encoded. We see:

0: Away team wins

1: Draw

2: Home team wins

In [279]:
training_dataset[["FTR_encoded", "FTR"]]

Unnamed: 0,FTR_encoded,FTR
0,2,H
1,2,H
2,0,A
3,2,H
4,0,A
...,...,...
4175,1,D
4176,2,H
4177,0,A
4178,1,D


# <h1 style='font-size:30px;'>Feature Engineering</h1>

In [280]:
training_dataset

Unnamed: 0,Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR,B365H,B365D,B365A,BWH,BWD,BWA,IWH,IWD,IWA,WHH,WHD,WHA,VCH,VCD,VCA,MaxH,MaxD,MaxA,Max>2.5,Max<2.5,AHh,MaxAHH,MaxAHA,PSH,PSD,PSA,PSCH,PSCD,PSCA,HomeTeam_Alaves,HomeTeam_Almeria,HomeTeam_Ath Bilbao,HomeTeam_Ath Madrid,HomeTeam_Barcelona,HomeTeam_Betis,HomeTeam_Celta,HomeTeam_Cordoba,HomeTeam_Eibar,HomeTeam_Elche,HomeTeam_Espanol,HomeTeam_Getafe,HomeTeam_Girona,HomeTeam_Granada,HomeTeam_Hercules,HomeTeam_Huesca,HomeTeam_La Coruna,HomeTeam_Las Palmas,HomeTeam_Leganes,HomeTeam_Levante,HomeTeam_Malaga,HomeTeam_Mallorca,HomeTeam_Numancia,HomeTeam_Osasuna,HomeTeam_Real Madrid,HomeTeam_Recreativo,HomeTeam_Santander,HomeTeam_Sevilla,HomeTeam_Sociedad,HomeTeam_Sp Gijon,HomeTeam_Tenerife,HomeTeam_Valencia,HomeTeam_Valladolid,HomeTeam_Vallecano,HomeTeam_Villarreal,HomeTeam_Xerez,HomeTeam_Zaragoza,AwayTeam_Alaves,AwayTeam_Almeria,AwayTeam_Ath Bilbao,AwayTeam_Ath Madrid,AwayTeam_Barcelona,AwayTeam_Betis,AwayTeam_Celta,AwayTeam_Cordoba,AwayTeam_Eibar,AwayTeam_Elche,AwayTeam_Espanol,AwayTeam_Getafe,AwayTeam_Girona,AwayTeam_Granada,AwayTeam_Hercules,AwayTeam_Huesca,AwayTeam_La Coruna,AwayTeam_Las Palmas,AwayTeam_Leganes,AwayTeam_Levante,AwayTeam_Malaga,AwayTeam_Mallorca,AwayTeam_Numancia,AwayTeam_Osasuna,AwayTeam_Real Madrid,AwayTeam_Recreativo,AwayTeam_Santander,AwayTeam_Sevilla,AwayTeam_Sociedad,AwayTeam_Sp Gijon,AwayTeam_Tenerife,AwayTeam_Valencia,AwayTeam_Valladolid,AwayTeam_Vallecano,AwayTeam_Villarreal,AwayTeam_Xerez,AwayTeam_Zaragoza,FTR_encoded
0,SP1,30/08/08,Espanol,Valladolid,1,0,H,0,0,D,10,11,2,1,18,17,1,9,3,5,0,0,2.00,3.30,3.80,1.80,3.25,4.10,1.75,3.20,4.30,1.83,3.20,3.75,1.90,3.25,3.50,2.09,3.40,4.50,2.12,1.83,0.0,1.46,3.00,1.90,3.25,3.99,1.90,3.25,3.99,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2
1,SP1,30/08/08,Valencia,Mallorca,3,0,H,2,0,H,17,16,6,2,17,14,5,6,4,0,0,0,1.70,3.60,5.25,1.65,3.35,5.00,1.70,3.30,4.50,1.67,3.30,4.50,1.65,3.40,4.50,1.75,3.88,5.40,2.00,1.93,-0.5,1.76,2.21,1.70,3.42,4.85,1.70,3.42,4.85,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
2,SP1,31/08/08,Ath Bilbao,Almeria,1,3,A,0,2,A,10,11,4,5,35,20,2,6,2,4,0,0,2.00,3.30,3.80,1.90,3.20,3.80,2.00,3.00,3.60,1.91,3.20,3.50,1.90,3.20,3.60,2.10,3.40,4.00,2.25,1.70,0.0,1.47,2.75,1.96,3.23,3.70,1.96,3.23,3.70,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,SP1,31/08/08,Ath Madrid,Malaga,4,0,H,3,0,H,25,7,9,2,16,13,11,7,1,2,0,0,1.44,4.20,7.50,1.40,3.80,7.95,1.45,3.60,7.00,1.40,3.75,7.00,1.45,3.75,7.00,1.50,4.70,9.00,1.80,2.11,0.0,1.14,6.50,1.44,3.92,7.42,1.44,3.92,7.42,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
4,SP1,31/08/08,Betis,Recreativo,0,1,A,0,1,A,8,13,2,6,17,18,2,2,3,1,0,0,2.00,3.25,3.80,1.75,3.30,4.40,1.90,3.10,3.80,1.80,3.25,3.80,1.75,3.25,4.35,2.10,3.36,4.60,2.25,1.80,0.0,1.40,3.25,1.86,3.26,4.15,1.86,3.26,4.15,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4175,SP1,18/05/2019,Levante,Ath Madrid,2,2,D,2,0,H,17,17,7,8,9,7,6,4,0,1,0,1,3.75,3.75,1.95,3.50,3.75,2.00,3.55,3.80,1.97,3.60,3.80,1.95,3.60,3.80,2.05,3.75,3.90,2.05,1.71,2.31,0.5,1.91,2.04,3.67,3.87,2.02,4.34,4.10,1.81,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
4176,SP1,18/05/2019,Sevilla,Ath Bilbao,2,0,H,1,0,H,9,9,3,1,14,19,2,3,4,2,0,0,1.85,3.40,4.33,1.87,3.40,4.60,1.87,3.40,4.60,1.85,3.30,4.60,1.93,3.50,4.20,1.93,3.50,4.90,1.81,2.16,-1.0,2.70,1.60,1.90,3.42,4.81,2.17,3.08,4.15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
4177,SP1,18/05/2019,Valladolid,Valencia,0,2,A,0,1,A,19,9,4,6,12,10,8,2,1,3,0,0,8.00,5.25,1.36,8.00,4.75,1.40,7.50,4.85,1.40,8.50,4.80,1.38,8.00,5.20,1.40,8.60,5.25,1.42,1.69,2.33,1.5,1.78,2.20,8.05,4.91,1.42,8.01,5.13,1.40,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0
4178,SP1,19/05/2019,Eibar,Barcelona,2,2,D,2,2,D,15,6,8,3,11,3,5,2,4,2,0,0,3.60,4.00,1.90,3.70,4.10,1.87,3.75,4.00,1.87,3.70,4.00,1.88,3.70,4.10,1.87,3.85,4.20,1.94,1.50,2.85,0.5,2.03,1.92,3.83,4.20,1.89,4.96,4.55,1.65,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1


In [281]:
def team_last_matches_performance(df, team, date, number_of_matches):
    past_n_matches = df.loc[((df["HomeTeam"] == team) | (df["AwayTeam"] == team)) & (df["Date"] < date), :].tail(number_of_matches)

    goal_scored = (past_n_matches.loc[past_n_matches["HomeTeam"] == team, "FTHG"].sum() + 
                past_n_matches.loc[past_n_matches["AwayTeam"] == team, "FTAG"].sum()
    )

    goals_conceded = (past_n_matches.loc[past_n_matches["HomeTeam"] == team, "FTAG"].sum() + 
                past_n_matches.loc[past_n_matches["AwayTeam"] == team, "FTHG"].sum()
    )

    avg_goal_diff = (goal_scored - goals_conceded) / number_of_matches

    points = 0
    for _, match in past_n_matches.iterrows():
        if ((match["HomeTeam"] == team) and (match["FTR_encoded"] == 2)) or (
            (match["AwayTeam"] == team) and (match["FTR_encoded"] == 0)
        ):
            points += 3
        elif ((match["HomeTeam"] == team) and (match["FTR_encoded"] == 0)) or (
            (match["AwayTeam"] == team) and (match["FTR_encoded"] == 2)
        ):
            points += 0
        else:
            points += 1

    shot_on_target = (past_n_matches.loc[past_n_matches["HomeTeam"] == team, "HST"].sum() +
                    past_n_matches.loc[past_n_matches["AwayTeam"] == team, "AST"].sum()
    ) / number_of_matches
    
    return avg_goal_diff, points, shot_on_target

In [282]:
training_columns = training_dataset.columns.to_list()

training_dataset[["HomeTeam_avg_goal_diff", "HomeTeam_points", "HomeTeam_ShotOnTarget"]] = training_dataset.apply(
    lambda row: pd.Series(
        team_last_matches_performance(training_dataset, row["HomeTeam"], row["Date"], 5)
    ),
    axis=1
)


training_dataset[["AwayTeam_avg_goal_diff", "AwayTeam_points", "AwayTeam_ShotOnTarget"]] = training_dataset.apply(
    lambda row: pd.Series(
        team_last_matches_performance(training_dataset, row["AwayTeam"], row["Date"], 5)
    ),
    axis=1
)


training_dataset = training_dataset[training_columns[:training_columns.index("FTHG")]
                                    +['HomeTeam_avg_goal_diff', 'HomeTeam_points', "HomeTeam_ShotOnTarget", "AwayTeam_avg_goal_diff", "AwayTeam_points", "AwayTeam_ShotOnTarget"] 
                                    + training_columns[training_columns.index("FTHG"):]]


testing_columns = testing_dataset.columns.to_list()

testing_dataset[["HomeTeam_avg_goal_diff", "HomeTeam_points",  "HomeTeam_ShotOnTarget"]] = testing_dataset.apply(
    lambda row: pd.Series(
        team_last_matches_performance(testing_dataset, row["HomeTeam"], row["Date"], 5)
    ),
    axis=1
)

testing_dataset[["AwayTeam_avg_goal_diff", "AwayTeam_points",  "AwayTeam_ShotOnTarget"]] = testing_dataset.apply(
    lambda row: pd.Series(
        team_last_matches_performance(testing_dataset, row["AwayTeam"], row["Date"], 5)
    ),
    axis=1
)

testing_dataset = testing_dataset[testing_columns[:testing_columns.index("FTHG")]
                                  +['HomeTeam_avg_goal_diff', 'HomeTeam_points', "HomeTeam_ShotOnTarget", "AwayTeam_avg_goal_diff", "AwayTeam_points", "AwayTeam_ShotOnTarget"] 
                                  + testing_columns[testing_columns.index("FTHG"):]]

In [283]:
training_dataset = training_dataset.drop(columns=["FTHG", "FTAG", "FTR", "HTHG", "HTAG", "HTR", "HF", "AF", "HY", "AY", "HR", "AR"])
testing_dataset = testing_dataset.drop(columns=["FTHG", "FTAG", "FTR", "HTHG", "HTAG", "HTR", "HF", "AF", "HY", "AY", "HR", "AR"])

In [284]:
training_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4180 entries, 0 to 4179
Columns: 120 entries, Div to FTR_encoded
dtypes: float64(109), int64(7), object(4)
memory usage: 3.8+ MB


In [285]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
training_columns_to_scale = training_dataset.loc[:, "HomeTeam_avg_goal_diff":"HomeTeam_Alaves"].columns
training_dataset[training_columns_to_scale] = training_dataset[training_columns_to_scale].astype(float)
training_dataset.loc[:, training_columns_to_scale] = scaler.fit_transform(training_dataset[training_columns_to_scale])


In [286]:
testing_columns_to_scale = testing_dataset.loc[: ,"HomeTeam_avg_goal_diff" : "HomeTeam_Alaves"].columns.tolist()
testing_dataset[testing_columns_to_scale] = testing_dataset[testing_columns_to_scale].astype(float)
testing_dataset.loc[:, testing_columns_to_scale] = scaler.fit_transform(testing_dataset[testing_columns_to_scale])

In [287]:
training_dataset

Unnamed: 0,Div,Date,HomeTeam,AwayTeam,HomeTeam_avg_goal_diff,HomeTeam_points,HomeTeam_ShotOnTarget,AwayTeam_avg_goal_diff,AwayTeam_points,AwayTeam_ShotOnTarget,HS,AS,HST,AST,HC,AC,B365H,B365D,B365A,BWH,BWD,BWA,IWH,IWD,IWA,WHH,WHD,WHA,VCH,VCD,VCA,MaxH,MaxD,MaxA,Max>2.5,Max<2.5,AHh,MaxAHH,MaxAHA,PSH,PSD,PSA,PSCH,PSCD,PSCA,HomeTeam_Alaves,HomeTeam_Almeria,HomeTeam_Ath Bilbao,HomeTeam_Ath Madrid,HomeTeam_Barcelona,HomeTeam_Betis,HomeTeam_Celta,HomeTeam_Cordoba,HomeTeam_Eibar,HomeTeam_Elche,HomeTeam_Espanol,HomeTeam_Getafe,HomeTeam_Girona,HomeTeam_Granada,HomeTeam_Hercules,HomeTeam_Huesca,HomeTeam_La Coruna,HomeTeam_Las Palmas,HomeTeam_Leganes,HomeTeam_Levante,HomeTeam_Malaga,HomeTeam_Mallorca,HomeTeam_Numancia,HomeTeam_Osasuna,HomeTeam_Real Madrid,HomeTeam_Recreativo,HomeTeam_Santander,HomeTeam_Sevilla,HomeTeam_Sociedad,HomeTeam_Sp Gijon,HomeTeam_Tenerife,HomeTeam_Valencia,HomeTeam_Valladolid,HomeTeam_Vallecano,HomeTeam_Villarreal,HomeTeam_Xerez,HomeTeam_Zaragoza,AwayTeam_Alaves,AwayTeam_Almeria,AwayTeam_Ath Bilbao,AwayTeam_Ath Madrid,AwayTeam_Barcelona,AwayTeam_Betis,AwayTeam_Celta,AwayTeam_Cordoba,AwayTeam_Eibar,AwayTeam_Elche,AwayTeam_Espanol,AwayTeam_Getafe,AwayTeam_Girona,AwayTeam_Granada,AwayTeam_Hercules,AwayTeam_Huesca,AwayTeam_La Coruna,AwayTeam_Las Palmas,AwayTeam_Leganes,AwayTeam_Levante,AwayTeam_Malaga,AwayTeam_Mallorca,AwayTeam_Numancia,AwayTeam_Osasuna,AwayTeam_Real Madrid,AwayTeam_Recreativo,AwayTeam_Santander,AwayTeam_Sevilla,AwayTeam_Sociedad,AwayTeam_Sp Gijon,AwayTeam_Tenerife,AwayTeam_Valencia,AwayTeam_Valladolid,AwayTeam_Vallecano,AwayTeam_Villarreal,AwayTeam_Xerez,AwayTeam_Zaragoza,FTR_encoded
0,SP1,30/08/08,Espanol,Valladolid,0.846154,0.733333,0.511628,0.576923,0.600000,0.488372,0.250000,0.282051,0.111111,0.0625,0.05,0.529412,0.039231,0.055172,0.068136,0.023355,0.049708,0.060309,0.037955,0.051852,0.078240,0.032426,0.056088,0.053486,0.024878,0.031915,0.036711,0.027282,0.029508,0.049744,0.231850,0.066884,0.538462,0.043129,0.139168,0.024600,0.033045,0.062487,0.023504,0.036716,0.058293,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2
1,SP1,30/08/08,Valencia,Mallorca,0.807692,0.600000,0.534884,0.653846,0.533333,0.744186,0.444444,0.410256,0.333333,0.1250,0.25,0.352941,0.027222,0.075862,0.104459,0.018805,0.055556,0.078341,0.035319,0.059259,0.083130,0.026021,0.062927,0.068510,0.017729,0.038298,0.051881,0.018363,0.045246,0.062911,0.203747,0.083197,0.461538,0.073220,0.082496,0.018879,0.039429,0.081017,0.018038,0.043729,0.075521,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
2,SP1,31/08/08,Ath Bilbao,Almeria,0.576923,0.466667,0.209302,0.346154,0.066667,0.302326,0.250000,0.282051,0.222222,0.3125,0.10,0.352941,0.039231,0.055172,0.068136,0.026388,0.046784,0.054298,0.051133,0.037037,0.061125,0.035629,0.056088,0.048478,0.024878,0.029787,0.038228,0.027545,0.029508,0.042429,0.262295,0.045677,0.538462,0.044132,0.121234,0.026316,0.032294,0.056238,0.025143,0.035891,0.052484,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,SP1,31/08/08,Ath Madrid,Malaga,0.538462,0.533333,0.511628,0.346154,0.200000,0.348837,0.666667,0.179487,0.500000,0.1250,0.55,0.411765,0.016813,0.117241,0.160822,0.011222,0.081871,0.137447,0.022140,0.081481,0.144254,0.015212,0.093707,0.118590,0.012010,0.053191,0.089806,0.011805,0.072131,0.115582,0.156909,0.112561,0.538462,0.011033,0.390244,0.011442,0.058205,0.136393,0.010932,0.064356,0.127003,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
4,SP1,31/08/08,Betis,Recreativo,0.500000,0.466667,0.651163,0.461538,0.200000,0.441860,0.194444,0.333333,0.111111,0.3750,0.10,0.117647,0.039231,0.051724,0.068136,0.021838,0.052632,0.066319,0.045862,0.044444,0.066015,0.031225,0.059508,0.054487,0.020589,0.031915,0.049606,0.027545,0.028197,0.051207,0.262295,0.061990,0.538462,0.037111,0.157102,0.023455,0.033421,0.065934,0.022410,0.037129,0.061498,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4175,SP1,18/05/2019,Levante,Ath Madrid,0.615385,0.466667,0.604651,0.538462,0.466667,0.488372,0.444444,0.435897,0.388889,0.5000,0.30,0.235294,0.109287,0.086207,0.021794,0.074917,0.078947,0.018233,0.132841,0.096296,0.021271,0.103283,0.097127,0.017428,0.073492,0.055319,0.014715,0.070829,0.045902,0.013899,0.135831,0.145188,0.615385,0.088265,0.070301,0.075229,0.056327,0.020039,0.090189,0.071782,0.014623,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
4176,SP1,18/05/2019,Sevilla,Ath Bilbao,0.653846,0.666667,0.511628,0.653846,0.600000,0.511628,0.222222,0.230769,0.166667,0.0625,0.10,0.176471,0.033227,0.062069,0.081413,0.025478,0.058480,0.070327,0.044280,0.066667,0.085575,0.033227,0.062927,0.070513,0.025736,0.042553,0.047330,0.023085,0.032787,0.055596,0.159251,0.120718,0.384615,0.167503,0.038737,0.024600,0.039429,0.080155,0.030883,0.029703,0.061498,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
4177,SP1,18/05/2019,Valladolid,Valencia,0.538462,0.466667,0.441860,0.846154,0.800000,0.558140,0.500000,0.230769,0.222222,0.3750,0.40,0.117647,0.279424,0.189655,0.007014,0.211404,0.137427,0.006211,0.341065,0.174074,0.007335,0.299440,0.165527,0.006010,0.199314,0.114894,0.004854,0.198059,0.090164,0.004682,0.131148,0.148450,0.769231,0.075226,0.081779,0.200515,0.095381,0.007111,0.190489,0.114274,0.006410,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0
4178,SP1,19/05/2019,Eibar,Barcelona,0.576923,0.466667,0.348837,0.653846,0.533333,0.697674,0.388889,0.153846,0.444444,0.1875,0.25,0.117647,0.103283,0.103448,0.020541,0.080983,0.099415,0.015628,0.143384,0.111111,0.018826,0.107286,0.110807,0.016026,0.076351,0.068085,0.011984,0.073452,0.055738,0.012290,0.086651,0.233279,0.615385,0.100301,0.061693,0.079805,0.068719,0.017238,0.107133,0.090347,0.011418,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1


In [288]:
training_dataset


Unnamed: 0,Div,Date,HomeTeam,AwayTeam,HomeTeam_avg_goal_diff,HomeTeam_points,HomeTeam_ShotOnTarget,AwayTeam_avg_goal_diff,AwayTeam_points,AwayTeam_ShotOnTarget,HS,AS,HST,AST,HC,AC,B365H,B365D,B365A,BWH,BWD,BWA,IWH,IWD,IWA,WHH,WHD,WHA,VCH,VCD,VCA,MaxH,MaxD,MaxA,Max>2.5,Max<2.5,AHh,MaxAHH,MaxAHA,PSH,PSD,PSA,PSCH,PSCD,PSCA,HomeTeam_Alaves,HomeTeam_Almeria,HomeTeam_Ath Bilbao,HomeTeam_Ath Madrid,HomeTeam_Barcelona,HomeTeam_Betis,HomeTeam_Celta,HomeTeam_Cordoba,HomeTeam_Eibar,HomeTeam_Elche,HomeTeam_Espanol,HomeTeam_Getafe,HomeTeam_Girona,HomeTeam_Granada,HomeTeam_Hercules,HomeTeam_Huesca,HomeTeam_La Coruna,HomeTeam_Las Palmas,HomeTeam_Leganes,HomeTeam_Levante,HomeTeam_Malaga,HomeTeam_Mallorca,HomeTeam_Numancia,HomeTeam_Osasuna,HomeTeam_Real Madrid,HomeTeam_Recreativo,HomeTeam_Santander,HomeTeam_Sevilla,HomeTeam_Sociedad,HomeTeam_Sp Gijon,HomeTeam_Tenerife,HomeTeam_Valencia,HomeTeam_Valladolid,HomeTeam_Vallecano,HomeTeam_Villarreal,HomeTeam_Xerez,HomeTeam_Zaragoza,AwayTeam_Alaves,AwayTeam_Almeria,AwayTeam_Ath Bilbao,AwayTeam_Ath Madrid,AwayTeam_Barcelona,AwayTeam_Betis,AwayTeam_Celta,AwayTeam_Cordoba,AwayTeam_Eibar,AwayTeam_Elche,AwayTeam_Espanol,AwayTeam_Getafe,AwayTeam_Girona,AwayTeam_Granada,AwayTeam_Hercules,AwayTeam_Huesca,AwayTeam_La Coruna,AwayTeam_Las Palmas,AwayTeam_Leganes,AwayTeam_Levante,AwayTeam_Malaga,AwayTeam_Mallorca,AwayTeam_Numancia,AwayTeam_Osasuna,AwayTeam_Real Madrid,AwayTeam_Recreativo,AwayTeam_Santander,AwayTeam_Sevilla,AwayTeam_Sociedad,AwayTeam_Sp Gijon,AwayTeam_Tenerife,AwayTeam_Valencia,AwayTeam_Valladolid,AwayTeam_Vallecano,AwayTeam_Villarreal,AwayTeam_Xerez,AwayTeam_Zaragoza,FTR_encoded
0,SP1,30/08/08,Espanol,Valladolid,0.846154,0.733333,0.511628,0.576923,0.600000,0.488372,0.250000,0.282051,0.111111,0.0625,0.05,0.529412,0.039231,0.055172,0.068136,0.023355,0.049708,0.060309,0.037955,0.051852,0.078240,0.032426,0.056088,0.053486,0.024878,0.031915,0.036711,0.027282,0.029508,0.049744,0.231850,0.066884,0.538462,0.043129,0.139168,0.024600,0.033045,0.062487,0.023504,0.036716,0.058293,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2
1,SP1,30/08/08,Valencia,Mallorca,0.807692,0.600000,0.534884,0.653846,0.533333,0.744186,0.444444,0.410256,0.333333,0.1250,0.25,0.352941,0.027222,0.075862,0.104459,0.018805,0.055556,0.078341,0.035319,0.059259,0.083130,0.026021,0.062927,0.068510,0.017729,0.038298,0.051881,0.018363,0.045246,0.062911,0.203747,0.083197,0.461538,0.073220,0.082496,0.018879,0.039429,0.081017,0.018038,0.043729,0.075521,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
2,SP1,31/08/08,Ath Bilbao,Almeria,0.576923,0.466667,0.209302,0.346154,0.066667,0.302326,0.250000,0.282051,0.222222,0.3125,0.10,0.352941,0.039231,0.055172,0.068136,0.026388,0.046784,0.054298,0.051133,0.037037,0.061125,0.035629,0.056088,0.048478,0.024878,0.029787,0.038228,0.027545,0.029508,0.042429,0.262295,0.045677,0.538462,0.044132,0.121234,0.026316,0.032294,0.056238,0.025143,0.035891,0.052484,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,SP1,31/08/08,Ath Madrid,Malaga,0.538462,0.533333,0.511628,0.346154,0.200000,0.348837,0.666667,0.179487,0.500000,0.1250,0.55,0.411765,0.016813,0.117241,0.160822,0.011222,0.081871,0.137447,0.022140,0.081481,0.144254,0.015212,0.093707,0.118590,0.012010,0.053191,0.089806,0.011805,0.072131,0.115582,0.156909,0.112561,0.538462,0.011033,0.390244,0.011442,0.058205,0.136393,0.010932,0.064356,0.127003,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
4,SP1,31/08/08,Betis,Recreativo,0.500000,0.466667,0.651163,0.461538,0.200000,0.441860,0.194444,0.333333,0.111111,0.3750,0.10,0.117647,0.039231,0.051724,0.068136,0.021838,0.052632,0.066319,0.045862,0.044444,0.066015,0.031225,0.059508,0.054487,0.020589,0.031915,0.049606,0.027545,0.028197,0.051207,0.262295,0.061990,0.538462,0.037111,0.157102,0.023455,0.033421,0.065934,0.022410,0.037129,0.061498,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4175,SP1,18/05/2019,Levante,Ath Madrid,0.615385,0.466667,0.604651,0.538462,0.466667,0.488372,0.444444,0.435897,0.388889,0.5000,0.30,0.235294,0.109287,0.086207,0.021794,0.074917,0.078947,0.018233,0.132841,0.096296,0.021271,0.103283,0.097127,0.017428,0.073492,0.055319,0.014715,0.070829,0.045902,0.013899,0.135831,0.145188,0.615385,0.088265,0.070301,0.075229,0.056327,0.020039,0.090189,0.071782,0.014623,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
4176,SP1,18/05/2019,Sevilla,Ath Bilbao,0.653846,0.666667,0.511628,0.653846,0.600000,0.511628,0.222222,0.230769,0.166667,0.0625,0.10,0.176471,0.033227,0.062069,0.081413,0.025478,0.058480,0.070327,0.044280,0.066667,0.085575,0.033227,0.062927,0.070513,0.025736,0.042553,0.047330,0.023085,0.032787,0.055596,0.159251,0.120718,0.384615,0.167503,0.038737,0.024600,0.039429,0.080155,0.030883,0.029703,0.061498,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
4177,SP1,18/05/2019,Valladolid,Valencia,0.538462,0.466667,0.441860,0.846154,0.800000,0.558140,0.500000,0.230769,0.222222,0.3750,0.40,0.117647,0.279424,0.189655,0.007014,0.211404,0.137427,0.006211,0.341065,0.174074,0.007335,0.299440,0.165527,0.006010,0.199314,0.114894,0.004854,0.198059,0.090164,0.004682,0.131148,0.148450,0.769231,0.075226,0.081779,0.200515,0.095381,0.007111,0.190489,0.114274,0.006410,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0
4178,SP1,19/05/2019,Eibar,Barcelona,0.576923,0.466667,0.348837,0.653846,0.533333,0.697674,0.388889,0.153846,0.444444,0.1875,0.25,0.117647,0.103283,0.103448,0.020541,0.080983,0.099415,0.015628,0.143384,0.111111,0.018826,0.107286,0.110807,0.016026,0.076351,0.068085,0.011984,0.073452,0.055738,0.012290,0.086651,0.233279,0.615385,0.100301,0.061693,0.079805,0.068719,0.017238,0.107133,0.090347,0.011418,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1


In [289]:
# unused_columns = ["AHh", "MaxAHH", "MaxAHA", "Max>2.5", "Max<2.5"] 

common_columns = training_dataset.columns.intersection(testing_dataset.columns)
X_train = training_dataset[common_columns].drop(columns= "FTR_encoded")
X_test = testing_dataset[common_columns].drop(columns= "FTR_encoded")

y_train = training_dataset["FTR_encoded"]
y_test = testing_dataset["FTR_encoded"]

In [290]:
X_train = X_train.drop(columns=["HS", "AS", "HST", "AST", "HC", "AC"])
X_test = X_test.drop(columns=["HS", "AS", "HST", "AST", "HC", "AC"])

In [291]:
X_train.to_csv("../data/X_train.csv")
y_train.to_csv("../data/y_train.csv")
X_test.to_csv("../data/X_test.csv")
y_test.to_csv("../data/y_test.csv")