In [1]:
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
from scipy.stats import poisson
import statistics as st
import matplotlib.pyplot as plt

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing

print(tf.__version__)

2.6.0


In [3]:
upcoming_conmebol = pd.read_csv("/Users/abhijotsingh/Desktop/AtlassianDatathon/CONMEBOL/INPUTS/upcoming_conmebol.csv")
conmebol = pd.read_csv("/Users/abhijotsingh/Desktop/AtlassianDatathon/CONMEBOL/INPUTS/conmebol_input.csv")
conmebol
# home_xg = conmebol["H_xG"]
# away_xg = conmebol["A_xG"]

Unnamed: 0,date,tournament,home_team,away_team,home_score,away_score,H_xG,A_xG,H_attack,H_defence,A_attack,A_defence
0,11/08/2010,Friendly,Bolivia,Colombia,1,1,1.179371,2.919989,0.880029,1.995878,1.710599,0.684783
1,03/09/2010,Friendly,Venezuela,Colombia,0,2,0.873540,1.726650,0.730123,1.087596,1.612716,0.750855
2,07/09/2010,Friendly,Venezuela,Ecuador,1,0,1.098438,1.128792,0.730123,1.087596,0.981654,1.017141
3,08/10/2010,Friendly,Colombia,Ecuador,1,0,1.211753,0.611160,0.805442,0.588855,0.922924,1.056784
4,17/11/2010,Friendly,Argentina,Brazil,1,0,0.558323,1.668290,1.218159,0.686382,2.388904,0.296220
...,...,...,...,...,...,...,...,...,...,...,...,...
365,03/07/2021,Copa América,Argentina,Ecuador,3,0,1.832667,0.712381,1.218159,0.686382,0.790781,1.145980
366,05/07/2021,Copa América,Brazil,Peru,1,0,1.694489,0.509514,1.368365,0.486332,0.912927,0.894166
367,06/07/2021,Copa América,Argentina,Colombia,1,1,1.457440,1.089690,1.218159,0.686382,1.519726,0.813622
368,09/07/2021,Copa América,Peru,Colombia,2,3,0.891843,2.635134,0.745421,1.659838,1.607821,0.754158


In [4]:
# Regression model - Neural network for home_xG, away_xg
home_xg_df = conmebol[['home_score', 'H_attack', 'A_defence']]
away_xg_df = conmebol[['away_score', 'H_defence', 'A_attack']]

# Splitting dataset
train_homexG = home_xg_df.sample(frac=0.8, random_state = 0)
test_homexG = home_xg_df.drop(train_homexG.index)

train_awayxG = away_xg_df.sample(frac=0.8, random_state = 0)
test_awayxG = away_xg_df.drop(train_awayxG.index)

# For home_xG
train_features = train_homexG.copy()
test_features = test_homexG.copy()

train_labels = train_features.pop('home_score')
test_labels = test_features.pop('home_score')

# For away_xG
train_features2 = train_awayxG.copy()
test_features2 = test_awayxG.copy()

train_labels2 = train_features2.pop('away_score')
test_labels2 = test_features2.pop('away_score')

In [5]:
# Pre-processing, home / away
normalizer = preprocessing.Normalization(axis=-1)
normalizer.adapt(np.array(train_features))

normalizer2 = preprocessing.Normalization(axis=-1)
normalizer2.adapt(np.array(train_features2))

# Model functions, home / away
# home
linear_model = tf.keras.Sequential([
    normalizer,
    layers.Dense(units=1)
])
# away
linear_model2 = tf.keras.Sequential([
    normalizer2,
    layers.Dense(units=1)
])

def build_and_compile_model(norm):
  model = keras.Sequential([
      norm,
      layers.Dense(64, activation='relu'),
      layers.Dense(64, activation='relu'),
      layers.Dense(1)
  ])

  model.compile(loss='mean_absolute_error',
                optimizer=tf.keras.optimizers.Adam(0.0025))
  return model

# Building model, home / away
dnn_model = build_and_compile_model(normalizer)
dnn_model.summary()

dnn_model2 = build_and_compile_model(normalizer2)
dnn_model2.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
normalization (Normalization (None, 2)                 5         
_________________________________________________________________
dense_2 (Dense)              (None, 64)                192       
_________________________________________________________________
dense_3 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 65        
Total params: 4,422
Trainable params: 4,417
Non-trainable params: 5
_________________________________________________________________
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
normalization_1 (Normalizati (None, 2)                 5         
______________________________

In [6]:
%%time
history = dnn_model.fit(
    train_features, train_labels,
    validation_split=0.2,
    verbose=0, epochs=2000)

# training model home

CPU times: user 47.9 s, sys: 5.96 s, total: 53.9 s
Wall time: 42.1 s


In [7]:
%%time
history = dnn_model2.fit(
    train_features2, train_labels2,
    validation_split=0.2,
    verbose=0, epochs=2000)

# training model away

CPU times: user 47.1 s, sys: 5.98 s, total: 53.1 s
Wall time: 40.8 s


In [8]:
test_results = {}
test_results['dnn_model'] = dnn_model.evaluate(test_features, test_labels, verbose=0)
pd.DataFrame(test_results, index=['Mean absolute error [home_score]']).T

test_results2 = {}
test_results2['dnn_model2'] = dnn_model2.evaluate(test_features2, test_labels2, verbose=0)
pd.DataFrame(test_results2, index=['Mean absolute error [away_score]']).T


Unnamed: 0,Mean absolute error [away_score]
dnn_model2,0.911697


In [9]:
# Predicting home_xG and away_xG for upcoming fixtures
home_xg_features = upcoming_conmebol[['H_attack', 'A_defence']]
pred_home_xg = dnn_model.predict(home_xg_features).flatten()
pred_home_xg = [0 if i < 0 else i for i in pred_home_xg]

away_xg_features = upcoming_conmebol[['H_defence', 'A_attack']]
pred_away_xg = dnn_model2.predict(away_xg_features).flatten()
pred_away_xg = [0 if i < 0 else i for i in pred_away_xg]
#pred_away_xg

In [10]:
# Run the poisson results for the home team
home_outcome = []

for i in range(len(pred_home_xg)):
    expected = pred_home_xg[i]
    poi = poisson.rvs(expected, size=10000)
    # Select most common outcome and append to the poisson outcome list
    common = st.mode(poi)
    home_outcome.append(common)

#home_outcome

In [11]:
# Run the poisson results for the away team
away_outcome = []

for i in range(len(pred_away_xg)):
    expected = pred_away_xg[i]
    poi = poisson.rvs(expected, size=10000)
    # Select most common outcome and append to the poisson outcome list
    common = st.mode(poi)
    away_outcome.append(common)

#away_outcome

In [12]:
upcoming_conmebol["Poisson_H_xG"] = home_outcome
upcoming_conmebol["Poisson_A_xG"] = away_outcome
conmebol_result = upcoming_conmebol[['Date', 'Home', 'Away', 'Poisson_H_xG', 'Poisson_A_xG']]

result = []
outcomes = ["H_WIN", "DRAW", "A_WIN"]
for i in range(len(away_outcome)):
    if upcoming_conmebol["Poisson_H_xG"][i] > upcoming_conmebol["Poisson_A_xG"][i]:
        result.append(outcomes[0])
    elif upcoming_conmebol["Poisson_H_xG"][i] == upcoming_conmebol["Poisson_A_xG"][i]:
        result.append(outcomes[1])
    else:
        result.append(outcomes[2])

conmebol_result["Results"] = result

H_Points = []
A_Points = []
for i in range(len(away_outcome)):
    if result[i] == "H_WIN":
        H_Points.append(3)
        A_Points.append(0)
    elif result[i] == "DRAW":
        H_Points.append(1)
        A_Points.append(1)
    else:
        H_Points.append(0)
        A_Points.append(3)
       
        
conmebol_result["H_Points"] = H_Points
conmebol_result["A_Points"] = A_Points
conmebol_result

Unnamed: 0,Date,Home,Away,Poisson_H_xG,Poisson_A_xG,Results,H_Points,A_Points
0,7/10/21,Paraguay,Argentina,1,1,DRAW,1,1
1,7/10/21,Uruguay,Colombia,1,0,H_WIN,3,0
2,7/10/21,Ecuador,Bolivia,4,1,H_WIN,3,0
3,7/10/21,Peru,Chile,0,0,DRAW,1,1
4,7/10/21,Venezuela,Brazil,0,2,A_WIN,0,3
5,10/10/21,Colombia,Brazil,0,1,A_WIN,0,3
6,10/10/21,Venezuela,Ecuador,0,1,A_WIN,0,3
7,10/10/21,Bolivia,Peru,0,1,A_WIN,0,3
8,10/10/21,Argentina,Uruguay,1,0,H_WIN,3,0
9,10/10/21,Chile,Paraguay,0,1,A_WIN,0,3


In [24]:
# Create Points Table
result_table = pd.DataFrame(columns=['Team','GF','GA', 'Points'])
conmebol_home = conmebol_result.groupby("Home")["H_Points"].sum()
conmebol_away = conmebol_result.groupby("Away")["A_Points"].sum()
points = conmebol_home + conmebol_away
H_GF = conmebol_result.groupby("Home")["Poisson_H_xG"].sum()
A_GF = conmebol_result.groupby("Away")["Poisson_A_xG"].sum()
H_GA = conmebol_result.groupby("Home")["Poisson_A_xG"].sum()
A_GA = conmebol_result.groupby("Away")["Poisson_H_xG"].sum()

GF = H_GF + A_GF
GA = H_GA + A_GA

team_list = pd.unique(conmebol_result["Home"])
team_list_sort = sorted(team_list)

result_table["Points"] = points
result_table["GF"] = GF
result_table["GA"] = GA
result_table["Team"] = team_list_sort
result_table

Unnamed: 0_level_0,Team,GF,GA,Points
Home,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Argentina,Argentina,8,2,18
Bolivia,Bolivia,4,16,4
Brazil,Brazil,14,3,22
Chile,Chile,3,10,6
Colombia,Colombia,9,6,13
Ecuador,Ecuador,8,10,10
Paraguay,Paraguay,4,6,11
Peru,Peru,6,3,13
Uruguay,Uruguay,5,3,15
Venezuela,Venezuela,5,7,9


In [25]:
# conmebol_result.to_csv("/Users/abhijotsingh/Desktop/AtlassianDatathon/CONMEBOL/OUTPUT/conmebol_results.csv", index=False)
# result_table.to_csv("/Users/abhijotsingh/Desktop/AtlassianDatathon/CONMEBOL/OUTPUT/conmebol_points_table.csv", index=False)