In [178]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm

from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import pickle

df = pd.read_csv("events.csv")
df.head(3)

Unnamed: 0,id_odsp,id_event,sort_order,time,text,event_type,event_type2,side,event_team,opponent,...,player_in,player_out,shot_place,shot_outcome,is_goal,location,bodypart,assist_method,situation,fast_break
0,UFot0hit/,UFot0hit1,1,2,Attempt missed. Mladen Petric (Hamburg) left f...,1,12.0,2,Hamburg SV,Borussia Dortmund,...,,,6.0,2.0,0,9.0,2.0,1,1.0,0
1,UFot0hit/,UFot0hit2,2,4,"Corner, Borussia Dortmund. Conceded by Dennis...",2,,1,Borussia Dortmund,Hamburg SV,...,,,,,0,,,0,,0
2,UFot0hit/,UFot0hit3,3,4,"Corner, Borussia Dortmund. Conceded by Heiko ...",2,,1,Borussia Dortmund,Hamburg SV,...,,,,,0,,,0,,0


In [179]:
event_type_dict={0:'Announcement',
                    1:'Attempt',
                    2:'Corner',
                    3:'Foul',
                    4:'Yellow card',
                    5:'Second yellow card',
                    6:'Red card',
                    7:'Substitution',
                    8:'Free kick won',
                    9:'Offside',
                    10:'Hand ball',
                    11:'Penalty conceded'}
df['event_name'] = df.event_type.map(event_type_dict)
df = df[df.event_name.isin(['Attempt','Corner','Foul','Yellow card','Red card','Offside','Hand ball','Penalty conceded'])]
df.shape

(651239, 23)

In [180]:
df['home_team'] = df.apply(lambda x: x['event_team'] if x['side'] == 1 else x['opponent'], axis=1)
df['away_team'] = df.apply(lambda x: x['event_team'] if x['side'] == 2 else x['opponent'], axis=1)

In [181]:
df['final_event_name'] = df['event_name'].astype(str) + '_' + df['side'].astype(str)

def attempt_outcome(x):
    if x['event_name'] == 'Attempt':
        return str(x['final_event_name'])+'_'+str(x['is_goal'])
    else:
        return x['final_event_name']
    
df['final_event_name'] = df.apply(attempt_outcome, axis=1)
df['final_event_name'].unique()

array(['Attempt_2_0', 'Corner_1', 'Foul_1', 'Hand ball_2', 'Corner_2',
       'Foul_2', 'Attempt_1_0', 'Attempt_1_1', 'Offside_1',
       'Yellow card_2', 'Offside_2', 'Attempt_2_1', 'Yellow card_1',
       'Hand ball_1', 'Red card_1', 'Penalty conceded_1',
       'Penalty conceded_2', 'Red card_2'], dtype=object)

In [182]:
[k for k in df['final_event_name'].unique() if "Attempt" in k]

['Attempt_2_0', 'Attempt_1_0', 'Attempt_1_1', 'Attempt_2_1']

In [183]:
df["home_goals"] =  df['final_event_name'].apply(lambda x: 1 if x == "Attempt_1_1" else 0)
df["away_goals"] =  df['final_event_name'].apply(lambda x: 1 if x == "Attempt_2_1" else 0)

cols = ['id_odsp','final_event_name', 'home_team', 'away_team', 'home_goals', 'away_goals']

df_info = df[cols]
df_info['home_goals_cum'] = df_info.groupby('id_odsp')['home_goals'].cumsum()
df_info['away_goals_cum'] = df_info.groupby('id_odsp')['away_goals'].cumsum()
df_info['time'] = df.groupby('id_odsp').cumcount()+1
df_info.head()

Unnamed: 0,id_odsp,final_event_name,home_team,away_team,home_goals,away_goals,home_goals_cum,away_goals_cum,time
0,UFot0hit/,Attempt_2_0,Borussia Dortmund,Hamburg SV,0,0,0,0,1
1,UFot0hit/,Corner_1,Borussia Dortmund,Hamburg SV,0,0,0,0,2
2,UFot0hit/,Corner_1,Borussia Dortmund,Hamburg SV,0,0,0,0,3
3,UFot0hit/,Foul_1,Borussia Dortmund,Hamburg SV,0,0,0,0,4
5,UFot0hit/,Hand ball_2,Borussia Dortmund,Hamburg SV,0,0,0,0,5


In [184]:
# encoding target labels
encoder = LabelEncoder()
df_info['target'] = encoder.fit_transform(df_info['final_event_name'])

with open('encoders/target_label_encoder.pkl', 'wb') as files:
    pickle.dump(encoder, files)

In [185]:
# encoding team names
encoder = LabelEncoder()
df_info['home_team_id'] = encoder.fit_transform(df_info['home_team'])
df_info['away_team_id'] = encoder.fit_transform(df_info['away_team'])

df_info['home_team_id'] = df_info['home_team_id'].astype('category')
df_info['away_team_id'] = df_info['away_team_id'].astype('category')

with open('encoders/team_name_encoder.pkl', 'wb') as files:
    pickle.dump(encoder, files)

In [186]:
df_info.to_csv('data/data.csv')
feature_cols = ['home_team_id', 'away_team_id', 'time', 'home_goals_cum', 'away_goals_cum']
       
X = df_info[feature_cols]
y = df_info.target

In [187]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Create a decision tree classifier
clf = DecisionTreeClassifier(max_depth = 10)

# Fit the classifier to the training data
clf.fit(X_train, y_train)

# Predict the classes of the testing data
y_pred = clf.predict(X_test)

with open('models/classifier.pkl', 'wb') as files:
    pickle.dump(clf, files)

In [188]:
df.side.value_counts()

1    341922
2    309317
Name: side, dtype: int64

In [189]:
df['side'].nunique() , df['event_type'].nunique()

(2, 8)

In [190]:
from utils import get_encoder, get_model

model = get_model()
target_encoder = get_encoder(k='target_label')
team_encoder = get_encoder(k='team_name')

df_info = pd.read_csv("data/data.csv", index_col=0)
feature_cols = ['home_team_id', 'away_team_id', 'time', 'home_goals_cum', 'away_goals_cum']
df_info = df_info[feature_cols]

y_pred = model.predict(X_test)

In [194]:
df_info

Unnamed: 0,home_team_id,away_team_id,time,home_goals_cum,away_goals_cum
0,20,58,1,0,0
1,20,58,2,0,0
2,20,58,3,0,0
3,20,58,4,0,0
5,20,58,5,0,0
...,...,...,...,...,...
941002,11,110,77,1,0
941003,11,110,78,1,0
941005,11,110,79,1,0
941006,11,110,80,1,0


In [244]:
import random
def event_prediction(test_row, clf):
    preds = clf.predict_proba(test_row.reshape(1, -1))
    preds = np.round(preds, 2)
    preds = preds * 100
    preds = preds.reshape(-1)
    preds = preds.astype(int)
    preds[0] = preds[0] - 1
    preds[6] = preds[6] - 2
    preds[7] = preds[7] - 2
    preds[1] = preds[1] + 3
    preds[3] = preds[3] + 2
    
    temp_list = []
    for ix, ele in enumerate(preds):
        temp_list.extend([ix] * ele)

    random.shuffle(temp_list)
    choice = random.choice(temp_list)
    
    return choice

In [245]:
model.predict_proba(np.array(X.iloc[99]).reshape(1,-1))[0]

array([0.1675948 , 0.        , 0.15000442, 0.        , 0.07319014,
       0.0649695 , 0.18969327, 0.19420136, 0.00857421, 0.01025369,
       0.03314771, 0.03235216, 0.00309379, 0.00132591, 0.00026518,
       0.00070715, 0.03252895, 0.03809776])

In [250]:
xx = event_prediction(np.array(X.iloc[99]), model)
xx

16

In [251]:
target_encoder.inverse_transform(np.int64(3).reshape(-1))[0]

'Attempt_2_1'

In [254]:
#def football_match():
player_score, player_sets, player_games = dict(), dict(), dict()

i = 0

home_team = np.random.choice(df_info['home_team_id'].unique()) 
away_team = np.random.choice(df_info['home_team_id'].unique())
t1, t2 = home_team, away_team

t1_name  = encoder.inverse_transform(home_team.reshape(-1,1))[0]
t2_name =  encoder.inverse_transform(away_team.reshape(-1,1))[0]
display_df = pd.DataFrame(columns=[t1_name, t2_name])
tot_goals = {t1_name:0, t2_name:0}
player1, player2 = t1_name, t2_name
win_prob = {player1: "NA", player2: "NA"}

clock = 0

time_limit = 90


while clock <= time_limit:
    if clock < time_limit:
        test_row = np.array([t1, t2, clock,	tot_goals[t1_name], tot_goals[t2_name]])
        event = event_prediction(test_row, model)
        event_name = target_encoder.inverse_transform(np.int64(event).reshape(-1))[0]
        
        if event_name == "Attempt_1_1":
            tot_goals[t1_name] += 1
        elif event_name == "Attempt_2_1":
            tot_goals[t2_name] += 1
        else:
            pass
    
    if clock == time_limit:
        if tot_goals[t1_name] != tot_goals[t2_name]:
            winner = sorted(tot_goals.items(), key=lambda x: x[1])[-1][0]
            break
        else:
            time_limit += 30
    
    clock += 1
    print(clock, event_name)
    print(tot_goals)

1 Corner_1
{'Schalke 04': 0, 'Bordeaux': 0}
2 Foul_1
{'Schalke 04': 0, 'Bordeaux': 0}
3 Offside_2
{'Schalke 04': 0, 'Bordeaux': 0}
4 Attempt_2_0
{'Schalke 04': 0, 'Bordeaux': 0}
5 Offside_1
{'Schalke 04': 0, 'Bordeaux': 0}
6 Corner_2
{'Schalke 04': 0, 'Bordeaux': 0}
7 Attempt_1_0
{'Schalke 04': 0, 'Bordeaux': 0}
8 Offside_1
{'Schalke 04': 0, 'Bordeaux': 0}
9 Attempt_2_0
{'Schalke 04': 0, 'Bordeaux': 0}
10 Offside_1
{'Schalke 04': 0, 'Bordeaux': 0}
11 Attempt_2_1
{'Schalke 04': 0, 'Bordeaux': 1}
12 Hand ball_2
{'Schalke 04': 0, 'Bordeaux': 1}
13 Foul_2
{'Schalke 04': 0, 'Bordeaux': 1}
14 Foul_2
{'Schalke 04': 0, 'Bordeaux': 1}
15 Foul_2
{'Schalke 04': 0, 'Bordeaux': 1}
16 Foul_1
{'Schalke 04': 0, 'Bordeaux': 1}
17 Attempt_1_1
{'Schalke 04': 1, 'Bordeaux': 1}
18 Attempt_2_1
{'Schalke 04': 1, 'Bordeaux': 2}
19 Yellow card_2
{'Schalke 04': 1, 'Bordeaux': 2}
20 Foul_2
{'Schalke 04': 1, 'Bordeaux': 2}
21 Attempt_1_1
{'Schalke 04': 2, 'Bordeaux': 2}
22 Foul_2
{'Schalke 04': 2, 'Bordeaux': 2}
