In [None]:
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from keras.optimizers import Adam
from keras.callbacks import TensorBoard
import numpy as np
import pandas as pd

In [None]:
# Open the file with the historical data 
dataset_path = 'data/def'
overview = pd.read_csv(dataset_path + "/results_def.csv")
# Create two arrays, features and labels (in this case there is only one label)
features = overview[["home_team_id","away_team_id","recent","home_team_fitness","away_team_fitness","home_team_level","away_team_level","championship_importance","home_team_fifa_ranking","away_team_fifa_ranking","neutral"]]
#divide by 1
label = overview[["result"]]
x_train = features.as_matrix()
y_train = label.as_matrix()

In [None]:
# Define the model. In this case it is a NN
im_shape = (11,)
nn_model = Sequential([
    Dense(11, input_shape=im_shape),
    Dropout(0.1),
    Dense(128, activation='sigmoid'),
    Dense(128, activation='sigmoid'),
    Dense(1, activation='sigmoid')
])

In [None]:
nn_model.compile(
    loss='mean_squared_error',
    optimizer=Adam(lr=0.009),
    metrics=['accuracy']
)

In [None]:
# Train the model with the historical data
nn_model.fit(
    x_train, y_train, batch_size=10,
    epochs=5
)

In [None]:
#At this point be sure you have the file "group_phase.csv" correctly located.
#It should contain the matches for the Group Phase 

In [None]:
group_phase = pd.read_csv(dataset_path + "/group_phase.csv")
features2 = group_phase[["home_team_id","away_team_id","recent","home_team_fitness","away_team_fitness","home_team_level","away_team_level","championship_importance","home_team_fifa_ranking","away_team_fifa_ranking","neutral"]]
features2.loc[:,"recent"] = 0.7
label2 = group_phase[["result"]]

features_array2 = features2.as_matrix()

i = 0
for x in features_array2:
    x = x.reshape((1,11))
    score = nn_model.predict(x)
    if score[0] < 0.4:
        label2.loc[i,"result"] = 0
    elif score[0] > 0.6:
        label2.loc[i,"result"] = 1
    else:
        label2.loc[i,"result"] = 0.5
    i = i + 1

group_phase = pd.concat([group_phase[["group","home_team","away_team"]],features2, label2], axis=1)
group_phase.to_csv(path_or_buf=dataset_path + "/group_phase_results.csv")

# Train again with the new results
x_train = np.concatenate([features.as_matrix(),features2.as_matrix()])
y_train = np.concatenate([label.as_matrix(),label2.as_matrix()])

nn_model.fit(
    x_train, y_train, batch_size=10,
    epochs=5
)

In [None]:
#At this point be sure you have the file "8ths.csv" correctly located.
#It should contain the matches for the 8th finals 

In [None]:
eight_phase = pd.read_csv(dataset_path + "/8ths.csv")
features3 = eight_phase[["home_team_id","away_team_id","recent","home_team_fitness","away_team_fitness","home_team_level","away_team_level","championship_importance","home_team_fifa_ranking","away_team_fifa_ranking","neutral"]]
features3.loc[:,"recent"] = 0.8
label3 = eight_phase[["result"]]

features_array3 = features3.as_matrix()

i = 0
for x in features_array3:
    x = x.reshape((1,11))
    score = nn_model.predict(x)
    if score[0] <= 0.5:
        label3.loc[i,"result"] = 0
    else:
        label3.loc[i,"result"] = 1
    i = i + 1

eight_phase = pd.concat([eight_phase[["group","home_team","away_team"]],features3, label3], axis=1)
eight_phase.to_csv(path_or_buf=dataset_path + "/8ths_results.csv")

# Train again with the new results
x_train = np.concatenate([features.as_matrix(),features2.as_matrix(),features3.as_matrix()])
y_train = np.concatenate([label.as_matrix(),label2.as_matrix(),label3.as_matrix()])

nn_model.fit(
    x_train, y_train, batch_size=10,
    epochs=5
)

In [None]:
#At this point be sure you have the file "4ths.csv" correctly located.
#It should contain the matches for the 4th finals

In [None]:
fourth_phase = pd.read_csv(dataset_path + "/4ths.csv")
features4 = fourth_phase[["home_team_id","away_team_id","recent","home_team_fitness","away_team_fitness","home_team_level","away_team_level","championship_importance","home_team_fifa_ranking","away_team_fifa_ranking","neutral"]]
features4.loc[:,"recent"] = 0.9
label4 = fourth_phase[["result"]]

features_array4 = features4.as_matrix()

i = 0
for x in features_array4:
    x = x.reshape((1,11))
    score = nn_model.predict(x)
    if score[0] < 0.5:
        label4.loc[i,"result"] = 0
    else:
        label4.loc[i,"result"] = 1
    i = i + 1

fourth_phase = pd.concat([fourth_phase[["group","home_team","away_team"]],features4, label4], axis=1)
fourth_phase.to_csv(path_or_buf=dataset_path + "/4ths_results.csv")

# Train again with the new results
x_train = np.concatenate([features.as_matrix(),features2.as_matrix(),features3.as_matrix(),features4.as_matrix()])
y_train = np.concatenate([label.as_matrix(),label2.as_matrix(),label3.as_matrix(),label4.as_matrix()])

nn_model.fit(
    x_train, y_train, batch_size=10,
    epochs=5
)

In [None]:
#At this point be sure you have the file "semis.csv" correctly located.
#It should contain the matches for the semi finals

In [None]:
semis_phase = pd.read_csv(dataset_path + "/semis.csv")
features5 = semis_phase[["home_team_id","away_team_id","recent","home_team_fitness","away_team_fitness","home_team_level","away_team_level","championship_importance","home_team_fifa_ranking","away_team_fifa_ranking","neutral"]]
features5.loc[:,"recent"] = 1
label5 = semis_phase[["result"]]

features_array5= features5.as_matrix()

i = 0
for x in features_array5:
    x = x.reshape((1,11))
    score = nn_model.predict(x)
    if score[0] < 0.5:
        label5.loc[i,"result"] = 0
    else:
        label5.loc[i,"result"] = 1
    i = i + 1

semis_phase = pd.concat([semis_phase[["group","home_team","away_team"]],features5, label5], axis=1)
semis_phase.to_csv(path_or_buf=dataset_path + "/semis_results.csv")

# Train again with the new results
x_train = np.concatenate([features.as_matrix(),features2.as_matrix(),features3.as_matrix(),features4.as_matrix(),features5.as_matrix()])
y_train = np.concatenate([label.as_matrix(),label2.as_matrix(),label3.as_matrix(),label4.as_matrix(),label5.as_matrix()])

nn_model.fit(
    x_train, y_train, batch_size=10,
    epochs=5
)

In [None]:
#At this point be sure you have the file "final.csv" correctly located.
#It should contain the matches for the great final 

In [None]:
final_phase = pd.read_csv(dataset_path + "/final.csv")
features6 = final_phase[["home_team_id","away_team_id","recent","home_team_fitness","away_team_fitness","home_team_level","away_team_level","championship_importance","home_team_fifa_ranking","away_team_fifa_ranking","neutral"]]
features6.loc[:,"recent"] = 1
label6 = final_phase[["result"]]

features_array6= features6.as_matrix()

i = 0
for x in features_array6:
    x = x.reshape((1,11))
    score = nn_model.predict(x)
    if score[0] < 0.5: 
        label6.loc[i,"result"] = 0
    else:
        label6.loc[i,"result"] = 1
    i = i + 1

final_phase = pd.concat([final_phase[["group","home_team","away_team"]],features6, label6], axis=1)
final_phase.to_csv(path_or_buf=dataset_path + "/final_results.csv")