In [1]:
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
from scipy.stats import poisson
import statistics as st
import matplotlib.pyplot as plt

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing

print(tf.__version__)

2.6.0


In [3]:
caf = pd.read_csv("/Users/abhijotsingh/Desktop/AtlassianDatathon/CAF/INPUT/caf_input.csv")
print(caf)
# home_xg = conmebol["H_xG"]
# away_xg = conmebol["A_xG"]

            date     home_team   away_team  home_score  away_score  H_rank  \
0     11/08/2010       Algeria       Gabon           1           2      34   
1     11/08/2010         Benin       Niger           0           0      67   
2     11/08/2010  Burkina Faso       Congo           3           0      41   
3     11/08/2010         Egypt    DR Congo           6           3      10   
4     11/08/2010          Mali      Guinea           0           2      62   
...          ...           ...         ...         ...         ...     ...   
1595  13/07/2021        Malawi     Namibia           1           1     118   
1596  14/07/2021  South Africa      Zambia           0           0      74   
1597  14/07/2021    Mozambique     Namibia           1           0     113   
1598  16/07/2021  South Africa  Mozambique           3           0      74   
1599  18/07/2021  South Africa     Senegal           0           0      74   

      A_rank  Rank_value   Home_GF   Away_GA   Home_GA   Away_G

In [4]:
# Regression model - Neural network for home_xG, away_xg
home_xg_df = caf[['home_score', 'H_attack', 'A_defence']]
away_xg_df = caf[['away_score', 'H_defence', 'A_attack']]

# Splitting dataset
train_homexG = home_xg_df.sample(frac=0.8, random_state = 0)
test_homexG = home_xg_df.drop(train_homexG.index)

train_awayxG = away_xg_df.sample(frac=0.8, random_state = 0)
test_awayxG = away_xg_df.drop(train_awayxG.index)

# For home_xG
train_features = train_homexG.copy()
test_features = test_homexG.copy()

train_labels = train_features.pop('home_score')
test_labels = test_features.pop('home_score')

# For away_xG
train_features2 = train_awayxG.copy()
test_features2 = test_awayxG.copy()

train_labels2 = train_features2.pop('away_score')
test_labels2 = test_features2.pop('away_score')

In [5]:
# Pre-processing, home / away
normalizer = preprocessing.Normalization(axis=-1)
normalizer.adapt(np.array(train_features))

normalizer2 = preprocessing.Normalization(axis=-1)
normalizer2.adapt(np.array(train_features2))

# Model functions, home / away
# home
linear_model = tf.keras.Sequential([
    normalizer,
    layers.Dense(units=1)
])
# away
linear_model2 = tf.keras.Sequential([
    normalizer2,
    layers.Dense(units=1)
])

def build_and_compile_model(norm):
  model = keras.Sequential([
      norm,
      layers.Dense(64, activation='relu'),
      layers.Dense(64, activation='relu'),
      layers.Dense(1)
  ])

  model.compile(loss='mean_absolute_error',
                optimizer=tf.keras.optimizers.Adam(0.0025))
  return model

# Building model, home / away
dnn_model = build_and_compile_model(normalizer)
dnn_model.summary()

dnn_model2 = build_and_compile_model(normalizer2)
dnn_model2.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
normalization (Normalization (None, 2)                 5         
_________________________________________________________________
dense_2 (Dense)              (None, 64)                192       
_________________________________________________________________
dense_3 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 65        
Total params: 4,422
Trainable params: 4,417
Non-trainable params: 5
_________________________________________________________________
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
normalization_1 (Normalizati (None, 2)                 5         
______________________________

In [6]:
%%time
history = dnn_model.fit(
    train_features, train_labels,
    validation_split=0.2,
    verbose=0, epochs=1500)

# training model home

CPU times: user 1min 16s, sys: 13.5 s, total: 1min 29s
Wall time: 58.3 s


In [7]:
%%time
history = dnn_model2.fit(
    train_features2, train_labels2,
    validation_split=0.2,
    verbose=0, epochs=1500)

# training model away

CPU times: user 1min 10s, sys: 12.2 s, total: 1min 22s
Wall time: 51.9 s


In [8]:
test_results = {}
test_results['dnn_model'] = dnn_model.evaluate(test_features, test_labels, verbose=0)
pd.DataFrame(test_results, index=['Mean absolute error [home_score]']).T

test_results2 = {}
test_results2['dnn_model2'] = dnn_model2.evaluate(test_features2, test_labels2, verbose=0)
pd.DataFrame(test_results2, index=['Mean absolute error [away_score]']).T


Unnamed: 0,Mean absolute error [away_score]
dnn_model2,0.748965


In [9]:
# Predicting home_xG and away_xG
home_xg_features = home_xg_df[['H_attack', 'A_defence']]
pred_home_xg = dnn_model.predict(home_xg_features).flatten()
pred_home_xg = [0 if i < 0 else i for i in pred_home_xg]
# print(pred_home_xg)
# pred_home_xg = np.array(pred_home_xg, dtype=np.float32)
# pred_home_xg = pred_home_xg * 1.453

away_xg_features = away_xg_df[['H_defence', 'A_attack']]
pred_away_xg = dnn_model2.predict(away_xg_features).flatten()
pred_away_xg = [0 if i < 0 else i for i in pred_away_xg]
# pred_away_xg = np.array(pred_away_xg, dtype=np.float32)
# pred_away_xg = pred_away_xg * 0.96

In [10]:
# Run the poisson results for the home team
home_outcome = []

for i in range(len(pred_home_xg)):
    expected = pred_home_xg[i]
    poi = poisson.rvs(expected, size=10000)
    # Select most common outcome and append to the poisson outcome list
    common = st.mode(poi)
    home_outcome.append(common)

home_outcome

[1,
 2,
 2,
 1,
 1,
 2,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 2,
 2,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 3,
 1,
 1,
 1,
 0,
 2,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 2,
 2,
 1,
 1,
 2,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 3,
 2,
 0,
 2,
 2,
 3,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 2,
 2,
 1,
 0,
 0,
 2,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 3,
 1,
 0,
 0,
 1,
 0,
 1,
 2,
 0,
 1,
 1,
 0,
 1,
 2,
 3,
 0,
 1,
 0,
 2,
 1,
 2,
 2,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 2,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 2,
 1,
 1,
 0,
 0,
 2,
 0,
 1,
 1,
 3,
 0,
 1,
 1,
 3,
 1,
 0,
 0,
 0,
 0,
 0,
 2,
 1,
 0,
 0,
 0,
 1,
 0,
 2,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 2,
 2,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 2,
 0,
 2,
 2,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 3,


In [11]:
# Run the poisson results for the away team
away_outcome = []

for i in range(len(pred_away_xg)):
    expected = pred_away_xg[i] + 0.25
    poi = poisson.rvs(expected, size=10000)
    # Select most common outcome and append to the poisson outcome list
    common = st.mode(poi)
    away_outcome.append(common)

away_outcome

[0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 2,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 3,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 2,
 1,
 3,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 2,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,


In [12]:
caf["Poisson_H_xG"] = home_outcome
caf["Poisson_A_xG"] = away_outcome
caf_output = caf[['date', 'home_team', 'away_team', 'home_score', 'away_score','H_rank', 'A_rank', 'Poisson_H_xG', 'Poisson_A_xG']]
ActualRes = []
outcomes = ["H_WIN", "DRAW", "A_WIN"]
for i in range(len(away_outcome)):
    if caf_output["home_score"][i] > caf_output["away_score"][i]:
        ActualRes.append(outcomes[0])
    elif caf_output["home_score"][i] == caf_output["away_score"][i]:
        ActualRes.append(outcomes[1])
    else:
        ActualRes.append(outcomes[2])

caf_output["Actual_Results"] = ActualRes

Pred_res = []
outcomes = ["H_WIN", "DRAW", "A_WIN"]
for i in range(len(away_outcome)):
    if caf_output["Poisson_H_xG"][i] > caf_output["Poisson_A_xG"][i]:
        Pred_res.append(outcomes[0])
    elif caf_output["Poisson_H_xG"][i] == caf_output["Poisson_A_xG"][i]:
        Pred_res.append(outcomes[1])
    else:
        Pred_res.append(outcomes[2])

caf_output["Pred_Results"] = Pred_res

outcome_match = []
for i in range(len(away_outcome)):
    if caf_output["Actual_Results"][i] == caf_output["Pred_Results"][i]:
        outcome_match.append(1)
    else:
        outcome_match.append(0)

caf_output["Outcome_match"] = outcome_match

success = st.mean(outcome_match)
print("The percentage of matching outcome is: " + str(success))
caf_output

The percentage of matching outcome is: 0.49625


Unnamed: 0,date,home_team,away_team,home_score,away_score,H_rank,A_rank,Poisson_H_xG,Poisson_A_xG,Actual_Results,Pred_Results,Outcome_match
0,11/08/2010,Algeria,Gabon,1,2,34,36,1,0,A_WIN,H_WIN,0
1,11/08/2010,Benin,Niger,0,0,67,124,2,0,DRAW,H_WIN,0
2,11/08/2010,Burkina Faso,Congo,3,0,41,114,2,0,H_WIN,H_WIN,1
3,11/08/2010,Egypt,DR Congo,6,3,10,127,1,1,H_WIN,DRAW,0
4,11/08/2010,Mali,Guinea,0,2,62,71,1,1,A_WIN,DRAW,0
...,...,...,...,...,...,...,...,...,...,...,...,...
1595,13/07/2021,Malawi,Namibia,1,1,118,114,0,0,DRAW,DRAW,1
1596,14/07/2021,South Africa,Zambia,0,0,74,88,0,1,DRAW,A_WIN,0
1597,14/07/2021,Mozambique,Namibia,1,0,113,114,1,0,H_WIN,H_WIN,1
1598,16/07/2021,South Africa,Mozambique,3,0,74,113,1,0,H_WIN,H_WIN,1


In [13]:
#caf.to_csv("/Users/abhijotsingh/Desktop/AtlassianDatathon/CAF_results_dnn_rating.csv", index=False)