In [18]:
import pandas as pd
import numpy as np
import keras
from keras import layers
from keras import ops
from keras import utils
from keras.optimizers import Adam, SGD, RMSprop
from keras.metrics import MeanSquaredError

In [2]:
df = pd.read_csv('dataset/results.csv')

# Drop rows where at least one elelemt is missong
df = df.dropna()

# Convert data cplumn to date time 
df['date'] = pd.to_datetime(df['date'])

df.info()


<class 'pandas.core.frame.DataFrame'>
Index: 47075 entries, 0 to 47074
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   date        47075 non-null  datetime64[ns]
 1   home_team   47075 non-null  object        
 2   away_team   47075 non-null  object        
 3   home_score  47075 non-null  float64       
 4   away_score  47075 non-null  float64       
 5   tournament  47075 non-null  object        
 6   city        47075 non-null  object        
 7   country     47075 non-null  object        
 8   neutral     47075 non-null  bool          
dtypes: bool(1), datetime64[ns](1), float64(2), object(5)
memory usage: 3.3+ MB


In [3]:
# Get the names of all the countries
unique_names = df[['home_team', 'away_team']].stack().drop_duplicates()
# Create a list of country names so they can be indexed
name_list = list(unique_names)

# Add day, month and year as seperate columns
df['day'] = [day_nr for day_nr in df['date'].dt.day]
df['month'] = [month_nr for month_nr in df['date'].dt.month]
df['year'] = [year_nr for year_nr in df['date'].dt.year]

#Add the index numbers into the dataframe
df['home_team_index'] = [name_list.index(team_name) for team_name in df['home_team']]
df['away_team_index'] = [name_list.index(team_name) for team_name in df['away_team']]



In [4]:
# Shuffle the data set
df =df.sample(frac=1)

# Create label dataset and feature dataset and convert to numpy arrays
df_features = df[['home_team_index','away_team_index','day','month', 'year']].to_numpy().astype(float)
df_labels = df[['home_score','away_score']].to_numpy().astype(float)

# Split it into a train-, validate- and test set
nrOfSamples = len(df_features)
train_size = int(nrOfSamples * 0.7)     # Train set is 70% of total set
val_size = int(train_size * 0.3)        # Val set is 30% of train set
test_size = int(nrOfSamples * 0.3)      # Test set is 30% of the total set

# split data set into train and validate
train_features, val_features = df_features[:train_size], df_features[train_size:]
train_labels, val_labels = df_labels[:train_size], df_labels[train_size:]


In [24]:
# Create a model
model = keras.Sequential(
    [
        keras.Input(shape=(5,)),
        layers.Dense(5, activation = "relu"),
        layers.Dense(16, activation = "relu"),
        layers.Dense(16, activation = "relu"),
        layers.Dense(2),
    ]
)

In [25]:
model.compile(
    #optimizer=Adam(learning_rate=0.001),
    #loss=keras.losses.CategoricalCrossentropy(),
    #metrics=[keras.metrics.CategoricalAccuracy()]

    optimizer=RMSprop(learning_rate=0.01),
    loss="mse",
    metrics=[MeanSquaredError()]
)

In [26]:
history=model.fit(
    train_features,
    train_labels,
    batch_size= 64,
    epochs=5, 
    validation_data=(val_features, val_labels)
)

Epoch 1/5
[1m515/515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - loss: 2997.4482 - mean_squared_error: 2997.4482 - val_loss: 2.9898 - val_mean_squared_error: 2.9898
Epoch 2/5
[1m515/515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 11ms/step - loss: 2.9813 - mean_squared_error: 2.9813 - val_loss: 2.5491 - val_mean_squared_error: 2.5491
Epoch 3/5
[1m515/515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 9ms/step - loss: 2.7044 - mean_squared_error: 2.7044 - val_loss: 2.5139 - val_mean_squared_error: 2.5139
Epoch 4/5
[1m515/515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 2.6122 - mean_squared_error: 2.6122 - val_loss: 2.4922 - val_mean_squared_error: 2.4922
Epoch 5/5
[1m515/515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 2.5577 - mean_squared_error: 2.5577 - val_loss: 2.5191 - val_mean_squared_error: 2.5191


In [21]:
for layer in model.layers:
    print (layer.get_config())
    print (layer.get_weights())       

{'name': 'dense_3', 'trainable': True, 'dtype': 'float32', 'units': 5, 'activation': 'relu', 'use_bias': True, 'kernel_initializer': {'module': 'keras.initializers', 'class_name': 'GlorotUniform', 'config': {'seed': None}, 'registered_name': None}, 'bias_initializer': {'module': 'keras.initializers', 'class_name': 'Zeros', 'config': {}, 'registered_name': None}, 'kernel_regularizer': None, 'bias_regularizer': None, 'kernel_constraint': None, 'bias_constraint': None}
[array([[-2.93622303e+00, -7.73256302e-01, -1.23648383e-01,
         2.56179333e-01, -6.69052720e-01],
       [ 2.87414861e+00, -2.20062017e-01, -1.07229030e+00,
        -1.21932030e-02,  2.43397474e-01],
       [ 4.63437945e-01,  1.02935016e-01, -9.39563394e-01,
         6.83313370e-01, -3.16611648e-01],
       [ 2.70564175e+00,  3.93784761e-01, -8.70844364e-01,
        -3.03205669e-01, -3.99453491e-01],
       [-3.39667536e-02, -6.07432008e-01, -6.79071993e-04,
        -4.76921946e-01, -4.35741842e-01]], dtype=float32), a

In [42]:
home_team_index = name_list.index('Germany')
away_team_index = name_list.index('Scotland')

# Get records of all matches 
df_matches = df[((df['home_team_index']==home_team_index) & (df['away_team_index']==away_team_index)) |
                ((df['home_team_index']==away_team_index) & (df['away_team_index']==home_team_index))]

# Get wins of home_team
df_wins_as_home_team = df_matches[(df['home_team_index'] == home_team_index) & (df['home_score'] > df['away_score'])]
df_wins_as_away_team = df_matches[(df['away_team_index'] == home_team_index) & (df['home_score'] < df['away_score'])]

# Get equals
df_equals = df_matches[df['home_score'] == df['away_score']]

wins_home = len(df_wins_as_home_team) + len(df_wins_as_away_team)
equals = len(df_equals)
wins_away = len(df_matches) - wins_home - equals
print (wins_home, equals, wins_away)



8 5 4


  df_wins_as_home_team = df_matches[(df['home_team_index'] == home_team_index) & (df['home_score'] > df['away_score'])]
  df_wins_as_away_team = df_matches[(df['away_team_index'] == home_team_index) & (df['home_score'] < df['away_score'])]
  df_equals = df_matches[df['home_score'] == df['away_score']]


In [73]:
# Create list of matches
# Match day 1
matches = [['Germany', 'Scotland'], ['Hungary','Switzerland'],['Spain', 'Croatia'],['Italy','Albania']]
matches.extend([['Poland','Netherlands'],['Slovenia','Denmark'], ['Serbia', 'England'], ['Romania', "Ukraine"]])
matches.extend([['Belgium', 'Slovakia'],['Austria', 'France'], ['Turkey', 'Georgia'], ['Portugal', 'Czech Republic']])

# Match day 2
matches.extend([['Croatia', 'Albania'], ['Germany','Hungary'], ['Scotland', 'Switzerland'], ['Slovenia', 'Serbia']])
matches.extend([['Denmark', 'England'],['Spain', 'Italy'], ['Slovakia', 'Ukraine'], ['Poland', 'Austria']])
matches.extend([['Netherlands', 'France'], ['Georgia', 'Czech Republic'], ['Turkey', 'Portugal'], ['Belgium', 'Romania']])

# Match day 3
matches.extend([['Switzerland', 'Germany'], ['Scotland', 'Hungary'], ['Albania', 'Spain'], ['Croatia', 'Italy']])
matches.extend([['France', 'Poland'], ['Netherlands', 'Austria'], ['Denmark', 'Serbia'], ['England', 'Slovenia']])
matches.extend([['Slovakia', 'Romania'], ['Ukraine', 'Belgium'], ['Georgia', 'Portugal', 'Czech Republic', 'Turkey']])

for match in matches:
    home_team_index = name_list.index(match[0])
    away_team_index = name_list.index(match[1])

    # Get records of all matches 
    df_matches = df[((df['home_team_index']==home_team_index) & (df['away_team_index']==away_team_index)) |
                    ((df['home_team_index']==away_team_index) & (df['away_team_index']==home_team_index))]

    # Get wins of home_team
    df_wins_as_home_team = df_matches[(df_matches['home_team_index'] == home_team_index) & (df_matches['home_score'] > df_matches['away_score'])]
    df_wins_as_away_team = df_matches[(df_matches['away_team_index'] == home_team_index) & (df_matches['home_score'] < df_matches['away_score'])]

    # Get equals
    df_equals = df_matches[df_matches['home_score'] == df_matches['away_score']] 

    wins_home = len(df_wins_as_home_team) + len(df_wins_as_away_team)
    equals = len(df_equals) 
    lost_home = len(df_matches) - wins_home - equals
    print (match[0], "vs", match[1], "|", match[0], "wins", wins_home, "equals", equals, "lost", lost_home)

Germany vs Scotland Germany wins 8 equals 5 lost 4
Hungary vs Switzerland Hungary wins 30 equals 5 lost 11
Spain vs Croatia Spain wins 5 equals 2 lost 3
Italy vs Albania Italy wins 4 equals 0 lost 0
Poland vs Netherlands Poland wins 3 equals 7 lost 9
Slovenia vs Denmark Slovenia wins 0 equals 1 lost 5
Serbia vs England Serbia wins 0 equals 0 lost 1
Romania vs Ukraine Romania wins 3 equals 1 lost 2
Belgium vs Slovakia Belgium wins 1 equals 2 lost 0
Austria vs France Austria wins 9 equals 3 lost 13
Turkey vs Georgia Turkey wins 3 equals 1 lost 1
Portugal vs Czech Republic Portugal wins 4 equals 0 lost 1
Croatia vs Albania Croatia wins 0 equals 0 lost 0
Germany vs Hungary Germany wins 13 equals 12 lost 11
Scotland vs Switzerland Scotland wins 8 equals 3 lost 5
Slovenia vs Serbia Slovenia wins 1 equals 6 lost 1
Denmark vs England Denmark wins 4 equals 5 lost 13
Spain vs Italy Spain wins 12 equals 16 lost 11
Slovakia vs Ukraine Slovakia wins 2 equals 3 lost 3
Poland vs Austria Poland wins 5

In [44]:
# Wedstijd Duitsland - Schotland
# Get index of home_team
home_team_index = name_list.index('Germany')
away_team_index = name_list.index('Scotland')
input_x = np.array([home_team_index, away_team_index, 14.0, 6.0, 2024.0])
input_x = input_x[None,:]
prediction = model.predict(input_x)
print(prediction)
# 1873-03-08,England,Scotland,4,2
home_team_index = name_list.index('England')
away_team_index = name_list.index('Scotland')
input_x = np.array([home_team_index, away_team_index, 8.0, 3.0, 1873.0])
input_x = input_x[None,:]
prediction = model.predict(input_x)
print(prediction)

home_team_index = name_list.index('Netherlands')
away_team_index = name_list.index('Scotland')
input_x = np.array([home_team_index, away_team_index, 17.0, 6.0, 2024.0])
input_x = input_x[None,:]
prediction = model.predict(input_x)
print(prediction)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
[[1.3833847 1.3474575]]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[[1.4826127 1.273158 ]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step
[[1.3905692 1.342078 ]]


In [62]:
print(prediction)

[[-43.069107 200.61807 ]]
