In [4]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import pickle

from sklearn.preprocessing import LabelEncoder,OneHotEncoder,MinMaxScaler
from sklearn.model_selection import RepeatedKFold,train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, BatchNormalization,Dropout
from tensorflow.keras.models import Sequential
from sklearn.metrics import accuracy_score

import api.util
from api.time_series import *
from api.predictions_converter import PredictionsConverter
from api.sofa_dp import SofaDataProvider

from IPython.display import display
pd.options.display.max_columns = None
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
dp=SofaDataProvider(load=False)
data, labels, info, df=dp.provide_data()
#df=dp._load_data()

## Today

In [5]:
dp=SofaDataProvider(today=True)
df=dp._load_data()

In [6]:
df

Unnamed: 0,awayScoreHT,country,country_id,ds,homeScoreHT,liga,mid,round,sc1,sc2,t1,t2,tid1,tid2,winner,formation_h,formation_a,home_formation,away_formation,vote_home,vote_draw,vote_away,votes,y,pop_r
0,,south-america,40,2021-01-30 20:00:00+00:00,,conmebol-libertadores,9282231,1,,,palmeiras,santos,761,712,draw,4-4-2,4-4-2,20.0,20.0,0.436580,0.177032,0.386388,43094,2021,4.0
1,,england,15,2021-01-30 17:30:00+00:00,,premier-league,8897030,21,,,arsenal,manchester united,240,226,draw,4-2-3-1,4-2-3-1,14.0,14.0,0.324676,0.198349,0.476975,72312,2021,4.0
2,,england,15,2021-01-30 20:00:00+00:00,,premier-league,8897023,21,,,southampton,aston villa,247,96,draw,4-4-2,4-2-3-1,20.0,14.0,0.402716,0.313163,0.284121,27168,2021,4.0
3,,england,15,2021-01-31 12:00:00+00:00,,premier-league,8897050,21,,,chelsea,burnley,4,3,draw,4-3-3,4-4-2,18.0,20.0,0.796923,0.130741,0.072336,13064,2021,2.0
4,,spain,42,2021-01-30 17:30:00+00:00,,laliga,8966530,21,,,valencia,elche cf,101,144,draw,4-4-2,5-4-1,20.0,24.0,0.689419,0.250863,0.059718,22020,2021,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,,romania,34,2021-01-30 18:15:00+00:00,,liga-i,8849141,20,,,fcsb,csm politehnica iashi,607,636,draw,,,,,0.767565,0.193616,0.038819,6234,2021,1.0
76,,romania,34,2021-01-31 10:00:00+00:00,,liga-i,8849243,20,,,fc botosani,fc hermannstadt,499,1499,draw,,,,,0.618750,0.303125,0.078125,320,2021,
77,,romania,34,2021-01-31 12:00:00+00:00,,liga-i,8849250,20,,,astra giurgiu,fc viitorul constanta,627,594,draw,,,,,0.320000,0.306667,0.373333,225,2021,
78,,uruguay,47,2021-01-30 21:00:00+00:00,,primera-division-clausura,9305553,3,,,deportivo maldonado,rentistas,1682,1683,draw,,,,,0.189024,0.457317,0.353659,328,2021,


In [161]:
df=dp._provide_statistics()

TypeError: _provide_statistics() missing 1 required positional argument: 'df_src'

In [112]:
df=dp._provide_statistics()
nulls=pd.DataFrame(df.isna().sum(), columns=['n'])
drop_cols=list(nulls[nulls.n>50000].sort_values(by='n').index)+['Total shots']
cols_to_keep=[x for x in df.columns if not x in drop_cols]
df=df[cols_to_keep]
df=df.reset_index(drop=True)

scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df[df.columns[4:]])
df=pd.concat([df[df.columns[:4]],pd.DataFrame(df_scaled, columns=df.columns[4:])], axis=1)

cols_stats=['possession', 'shont', 'shofft', 'corners', 'offsides', 'fouls', 'cards', 'gksaves']
df1=df[df['ishome']==1].reset_index(drop=True).sort_values(by='mid')
df1=df1.drop(columns=['period', 'ishome'])
df1.columns=['mid']+[x+'1' for x in cols_stats]
df0=df[df['ishome']==0].reset_index(drop=True).sort_values(by='mid')
df0=df0.drop(columns=['mid','period', 'ishome'])
df0.columns=[x+'0' for x in cols_stats]
df=pd.concat([df1,df0], axis=1)
df=df.dropna()
df['possession1']=df['possession1'].str[:-1].astype(float)/100
df['possession0']=df['possession0'].str[:-1].astype(float)/100


In [162]:
df.drop_duplicates()

Unnamed: 0,awayScoreHT,country,country_id,ds,homeScoreHT,liga,mid,round,sc1,sc2,t1,t2,tid1,tid2,winner,formation_h,formation_a,home_formation,away_formation,graph1,graph2,vote_home,vote_draw,vote_away,pop_r,possession1,shont1,shofft1,corners1,offsides1,fouls1,cards1,gksaves1,possession2,shont2,shofft2,corners2,offsides2,fouls2,cards2,gksaves2
0,0.0,england,15,2015-01-10 12:15:00+00:00,0.0,championship,5583876,25,0.0,1.0,ipswich town,derby county,0,86,away,4-4-2,4-3-3,20,18,,,0.492176,0.236131,0.271693,2,0.46,0.16,0.206897,0.153846,0.0625,0.250000,0.272727,0.095238,0.54,0.12,0.103448,0.153846,0.0625,0.214286,0.090909,0.190476
2,0.0,france,19,2015-01-10 13:00:00+00:00,1.0,ligue-2,5510536,19,1.0,0.0,troyes,stade brestois 29,1,79,home,4-4-2,4-4-2,20,20,,,0.578125,0.300781,0.121094,1,0.49,0.04,0.034483,0.076923,0.1250,0.232143,0.181818,0.047619,0.51,0.08,0.275862,0.192308,0.2500,0.285714,0.090909,0.000000
4,1.0,greece,21,2015-01-10 13:00:00+00:00,1.0,super-league,6570345,11,1.0,1.0,veria nfc,ael kalloni,2,67,draw,4-2-3-1,4-2-3-1,14,14,,,0.678571,0.192857,0.128571,0,0.53,0.12,0.172414,0.192308,0.2500,0.303571,0.181818,0.095238,0.47,0.12,0.137931,0.038462,0.3125,0.285714,0.090909,0.142857
6,1.0,england,15,2015-01-10 15:00:00+00:00,2.0,premier-league,5582834,21,2.0,1.0,burnley,queens park rangers,3,442,home,4-4-1-1,4-4-1-1,19,19,,,0.413851,0.232545,0.353604,3,0.48,0.24,0.206897,0.384615,0.0625,0.232143,0.181818,0.285714,0.52,0.32,0.137931,0.230769,0.1875,0.142857,0.181818,0.190476
8,0.0,england,15,2015-01-10 15:00:00+00:00,1.0,premier-league,5582836,21,2.0,0.0,chelsea,newcastle united,4,105,home,4-2-3-1,4-4-2,14,20,,,0.771883,0.094132,0.133985,4,0.56,0.16,0.137931,0.269231,0.3750,0.267857,0.181818,0.238095,0.44,0.20,0.034483,0.153846,0.0000,0.178571,0.272727,0.047619
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87180,0.0,portugal,33,2021-01-10 15:00:00+00:00,1.0,segunda-liga,8956894,15,1.0,0.0,ud oliveirense,feirense,283,280,home,4-3-3,4-3-3,18,18,0.423606,0.576394,0.160300,0.278652,0.561049,0,,,,,,,,,,,,,,,,
87181,0.0,portugal,33,2021-01-10 17:00:00+00:00,0.0,segunda-liga,8956856,15,1.0,1.0,casa pia,arouca,1647,39,draw,4-4-2,4-4-2,20,20,,,0.339020,0.402450,0.258530,0,,,,,,,,,,,,,,,,
87182,1.0,portugal,33,2021-01-10 17:00:00+00:00,1.0,segunda-liga,8956724,15,2.0,1.0,fc vizela,cd cova da piedade,1748,1293,home,4-3-3,4-4-2,18,20,0.584996,0.415004,0.633021,0.286765,0.080214,0,,,,,,,,,,,,,,,,
87183,1.0,romania,34,2021-01-10 17:00:00+00:00,1.0,liga-i,9270007,12,2.0,1.0,fc viitorul constanta,fc hermannstadt,594,1499,home,4-3-3,4-2-3-1,18,14,0.666844,0.333156,0.623808,0.307603,0.068589,0,0.45,0.36,0.413793,0.346154,0.0625,0.214286,0.181818,0.000000,0.55,0.04,0.034483,0.038462,0.0625,0.232143,0.090909,0.333333


Unnamed: 0,Shots on target,Shots off target,Corner kicks,Offsides,Fouls,Yellow cards,Goalkeeper saves
0,0.24,0.103448,0.115385,0.0000,0.232143,0.181818,0.190476
1,0.28,0.206897,0.115385,0.0625,0.232143,0.090909,0.285714
2,0.28,0.206897,0.461538,0.1875,0.160714,0.181818,0.095238
3,0.12,0.137931,0.192308,0.0000,0.285714,0.181818,0.285714
4,0.32,0.206897,0.192308,0.1250,0.482143,0.090909,0.095238
...,...,...,...,...,...,...,...
159143,0.16,0.344828,0.038462,0.1250,0.178571,0.090909,0.238095
159144,0.28,0.206897,0.307692,0.0625,0.285714,0.000000,0.047619
159145,0.12,0.172414,0.076923,0.0000,0.375000,0.181818,0.190476
159146,0.36,0.413793,0.346154,0.0625,0.214286,0.181818,0.000000


In [109]:
pd.concat([df[df.columns[:4]],pd.DataFrame(df_scaled, columns=df.columns[4:])], axis=1)

Unnamed: 0,mid,period,ishome,Ball possession,Shots on target,Shots off target,Corner kicks,Offsides,Fouls,Yellow cards,Goalkeeper saves
0,5076016,ALL,1,45%,0.24,0.103448,0.115385,0.0000,0.232143,0.181818,0.190476
1,5076016,ALL,0,55%,0.28,0.206897,0.115385,0.0625,0.232143,0.090909,0.285714
2,5076018,ALL,1,65%,0.28,0.206897,0.461538,0.1875,0.160714,0.181818,0.095238
3,5076018,ALL,0,35%,0.12,0.137931,0.192308,0.0000,0.285714,0.181818,0.285714
4,5076020,ALL,1,65%,0.32,0.206897,0.192308,0.1250,0.482143,0.090909,0.095238
...,...,...,...,...,...,...,...,...,...,...,...
159143,9258633,ALL,0,44%,0.16,0.344828,0.038462,0.1250,0.178571,0.090909,0.238095
159144,9258635,ALL,1,64%,0.28,0.206897,0.307692,0.0625,0.285714,0.000000,0.047619
159145,9258635,ALL,0,36%,0.12,0.172414,0.076923,0.0000,0.375000,0.181818,0.190476
159146,9270007,ALL,1,45%,0.36,0.413793,0.346154,0.0625,0.214286,0.181818,0.000000


In [96]:
cols_stats=['possession', 'shont', 'shofft', 'corners', 'offsides', 'fouls', 'cards', 'gksaves']
df1=df[df['ishome']==1].reset_index(drop=True).sort_values(by='mid')
df1=df1.drop(columns=['period', 'ishome'])
df1.columns=['mid']+[x+'1' for x in cols_stats]
df0=df[df['ishome']==0].reset_index(drop=True).sort_values(by='mid')
df0=df0.drop(columns=['mid','period', 'ishome'])
df0.columns=[x+'0' for x in cols_stats]
df=pd.concat([df1,df0], axis=1)
df=df.dropna()
df['possession1']=df['possession1'].str[:-1].astype(float)/100
df['possession0']=df['possession0'].str[:-1].astype(float)/100


In [97]:
df



Unnamed: 0,mid,possession1,shont1,shofft1,corners1,offsides1,fouls1,cards1,gksaves1,possession0,shont0,shofft0,corners0,offsides0,fouls0,cards0,gksaves0
0,5076016,0.45,6.0,3.0,3.0,0.0,13.0,2.0,4.0,0.55,7.0,6.0,3.0,1.0,13.0,1.0,6.0
1,5076018,0.65,7.0,6.0,12.0,3.0,9.0,2.0,2.0,0.35,3.0,4.0,5.0,0.0,16.0,2.0,6.0
2,5076020,0.65,8.0,6.0,5.0,2.0,27.0,1.0,2.0,0.35,3.0,4.0,0.0,2.0,10.0,1.0,7.0
3,5076028,0.45,2.0,5.0,3.0,0.0,17.0,5.0,6.0,0.55,8.0,3.0,3.0,2.0,20.0,5.0,0.0
11418,5076032,0.64,6.0,5.0,2.0,1.0,15.0,1.0,2.0,0.36,4.0,6.0,5.0,1.0,24.0,3.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79569,9258631,0.64,6.0,3.0,9.0,3.0,16.0,1.0,1.0,0.36,3.0,4.0,2.0,3.0,15.0,1.0,5.0
79570,9258632,0.52,10.0,3.0,7.0,2.0,11.0,1.0,1.0,0.48,2.0,6.0,4.0,5.0,10.0,2.0,7.0
79571,9258633,0.56,8.0,10.0,6.0,0.0,11.0,3.0,4.0,0.44,4.0,10.0,1.0,2.0,10.0,1.0,5.0
79572,9258635,0.64,7.0,6.0,8.0,1.0,16.0,0.0,1.0,0.36,3.0,5.0,2.0,0.0,21.0,2.0,4.0


In [67]:
df1.columns

Index(['mid', 'period', 'ishome', 'Ball possession', 'Shots on target',
       'Shots off target', 'Corner kicks', 'Offsides', 'Fouls', 'Yellow cards',
       'Goalkeeper saves', 'Total shots'],
      dtype='object')

In [57]:
nulls=pd.DataFrame(df.isna().sum(), columns=['n'])
drop_cols=nulls[nulls.n>50000].sort_values(by='n').index
cols_to_keep=[x for x in df.columns if not x in drop_cols]
drop_cols

Index(['Blocked shots', 'Duels won', 'Shots inside box', 'Shots outside box',
       'Passes', 'Accurate passes', 'Aerials won', 'Red cards',
       'Possession lost', 'Big chances', 'Clearances', 'Big chances missed',
       'Long balls', 'Crosses', 'Dribbles', 'Tackles', 'Interceptions',
       'Hit woodwork', 'Counter attacks', 'Counter attack shots',
       'Counter attack goals'],
      dtype='object')

In [20]:
df_graph.groupby('mid').graph.sum()/91

mid
5076016    -6.417582
5076018    13.714286
5076020    10.626374
5076028    -6.285714
5076032     8.406593
             ...    
9258631    16.098901
9258632     7.362637
9258633     2.846154
9258635    16.857143
9270007    18.318681
Name: graph, Length: 66130, dtype: float64

In [21]:
scaler.fit_transform(df_graph.groupby('mid')[['graph']].sum())

array([[0.46723419],
       [0.62968875],
       [0.60477077],
       ...,
       [0.54198812],
       [0.6550501 ],
       [0.66684402]])

In [14]:
scaler = MinMaxScaler()
df_scaled_graph = scaler.fit_transform(df_graph_pivoted.iloc[:, 1:].T).T
df_scaled_graph_rev = scaler.fit_transform((df_graph_pivoted.iloc[:, 1:]*-1).T).T

In [15]:
df_scaled_graph

array([[0.57718121, 0.59060403, 0.60402685, ..., 0.51677852, 0.51677852,
        0.33557047],
       [0.22868217, 0.22868217, 0.03100775, ..., 0.26356589, 0.24418605,
        1.        ],
       [0.44047619, 0.44047619, 0.44047619, ..., 0.55357143, 0.49404762,
        0.88095238],
       ...,
       [0.42857143, 0.09774436, 0.21052632, ..., 0.61654135, 0.58646617,
        0.81954887],
       [0.38135593, 0.52542373, 0.74576271, ..., 0.        , 0.1440678 ,
        0.00847458],
       [0.42937853, 0.42372881, 0.59887006, ..., 0.51977401, 0.48022599,
        0.43502825]])

In [12]:
find_kmeans(df_scaled_graph, "softdtw", 10)

  0%|          | 0/9 [01:23<?, ?it/s]


KeyboardInterrupt: 

In [12]:
nulls=pd.DataFrame(df_graph_pivoted.isna().sum(), columns=['n'])
#nulls[nulls.n>10000].to_csv('data/nulls.csv')
nulls[nulls.n>1000]

Unnamed: 0_level_0,n
time,Unnamed: 1_level_1
G45.5,11231
G90.5,1067


In [7]:

df_graph

Unnamed: 0,mid,time,graph
0,5076016,1.0,6
1,5076016,2.0,8
2,5076016,3.0,10
3,5076016,4.0,1
4,5076016,5.0,-40
...,...,...,...
6885171,9270007,87.0,68
6885172,9270007,88.0,36
6885173,9270007,89.0,15
6885174,9270007,90.0,8


In [6]:
df.columns

Index(['awayScoreHT', 'country', 'country_id', 'ds', 'homeScoreHT', 'liga',
       'mid', 'round', 'sc1', 'sc2', 't1', 't2', 'tid1', 'tid2', 'winner',
       'formation_h', 'formation_a', 'home_formation', 'away_formation',
       'vote_home', 'vote_draw', 'vote_away', 'pop_r'],
      dtype='object')

In [3]:
data[0]

array([0.49217639, 0.23613087, 0.27169275, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 1.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

In [4]:
labels[0]

array([1., 0., 0.])

# Analysis

In [3]:
data_train, data_test, labels_train, labels_test, info_train, info_test = train_test_split(data, labels, info, test_size=0.2, random_state=42)
print(data_train.shape, data_test.shape)

(69716, 156) (17429, 156)


df.isnull().any()

In [4]:
def get_model(n_inputs, n_outputs):
    model = Sequential()
    model.add(Dense(1024, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
    #model.add(Dropout(0.2))
    model.add(Dense(512, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
    #model.add(Dropout(0.2))
    model.add(Dense(64, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
    model.add(Dense(16, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
    model.add(Dense(n_outputs, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam')
    return model

def evaluate_model(X, y):
    results = list()
    n_inputs, n_outputs = X.shape[1], y.shape[1]
    # define evaluation procedure
    cv = RepeatedKFold(n_splits=5, n_repeats=3, random_state=1)
    # enumerate folds
    for train_ix, test_ix in cv.split(X):
        # prepare data
        X_train, X_test = X[train_ix], X[test_ix]
        y_train, y_test = y[train_ix], y[test_ix]
        # define model
        model = get_model(n_inputs, n_outputs)
        # fit model
        model.fit(X_train, y_train, epochs=10)
        # make a prediction on the test set
        yhat = model.predict(X_test)
        # round probabilities to class labels
        yhat = yhat.round()
        # calculate accuracy
        acc = accuracy_score(y_test, yhat)
        # store result
        print('>%.3f' % acc)
        results.append(acc)
        break
    return results, model

In [5]:
results, model = evaluate_model(data_train, labels_train)
# summarize performance
print('Accuracy: %.3f (%.3f)' % (np.mean(results), np.std(results)))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
>0.500
Accuracy: 0.500 (0.000)


In [10]:
yhat = model.predict(data_test)

In [14]:
def odds2prob(df):
    df['odds_away']=1/df['odds_away']
    df['odds_draw']=1/df['odds_draw']
    df['odds_home']=1/df['odds_home']
    df['margin']=df[['odds_away','odds_draw','odds_home']].sum(axis=1)
    df['odds_away']=df['odds_away']/df['margin']
    df['odds_draw']=df['odds_draw']/df['margin']
    df['odds_home']=df['odds_home']/df['margin']
    return df[['odds_away','odds_draw','odds_home']]

In [18]:
info_test

Unnamed: 0,mid,ts,country,tournament,home_tid,away_tid,homeTeamShort,homeScoreHT,homeScoreFT,awayTeamShort,awayScoreHT,awayScoreFT,winner
10600,6897580,2015-11-07 14:00:00+00:00,italy,serie-b,1212,419,Latina Calcio 1932,0.0,1.0,Cesena,0.0,0.0,home
47569,7895571,2018-11-10 15:15:00+00:00,spain,laliga,925,2106,Getafe,0.0,0.0,Valencia,0.0,1.0,away
17246,6834033,2016-10-07 00:45:00+00:00,south-america,world-cup-qualification-conmebol,324,302,Brazil,4.0,5.0,Bolivia,0.0,0.0,home
71236,8747958,2020-08-23 19:00:00+00:00,brazil,brasileiro-serie-a,2119,961,Vasco,0.0,0.0,Grêmio,0.0,0.0,draw
64437,8246175,2020-02-15 19:00:00+00:00,france,ligue-1,2017,1490,Toulouse,0.0,0.0,Nice,1.0,2.0,away
...,...,...,...,...,...,...,...,...,...,...,...,...,...
64995,8247260,2020-02-28 19:00:00+00:00,france,ligue-2,1883,1694,Sochaux,0.0,1.0,Rodez,0.0,1.0,draw
40229,7471896,2017-09-30 17:30:00+00:00,romania,liga-i,582,169,Dinamo B.,0.0,1.0,Astra,0.0,1.0,draw
80026,8747900,2020-09-05 22:00:00+00:00,brazil,brasileiro-serie-a,493,316,Corinthians,1.0,2.0,Botafogo,1.0,2.0,draw
80557,9030877,2020-09-27 23:30:00+00:00,paraguay,primera-division-apertura,912,966,General Díaz,0.0,0.0,Guaraní,0.0,1.0,away


In [30]:
from api.predictions_converter import PredictionsConverter
#conv_bookies=PredictionsConverter('op', api.util.odds2prob(info_test.copy()).values, labels_test, info_test.copy(), odds=False)
#conv_bookies.make_df()
conv=PredictionsConverter('op', yhat, labels_test, info_test.copy(), odds=False)
conv.make_df()

#conv_bookies.profit()
#conv.profit()
#conv_bookies.performance_metrics()
conv.performance_metrics()

Unnamed: 0,Name,TP,TN,FP,FN,Accuracy,Prevalence,Sensitivity,Specificity,PPV,NPV,AUC,F1,Threshold
0,HOME,4686,4837,2930,1898,0.664,0.459,0.712,0.623,0.615,0.718,0.667,0.66,0.5
1,DRAW,1446,8871,1944,2090,0.719,0.246,0.409,0.82,0.427,0.809,0.615,0.418,0.5
2,AWAY,1930,8674,1446,2301,0.739,0.295,0.456,0.857,0.572,0.79,0.657,0.507,0.5


In [31]:
#conv_bookies1=PredictionsConverter('op', odds2prob(info_test.copy()).values, labels_test, info_test.copy())
#conv_bookies1.make_df(threshold='max')
conv1=PredictionsConverter('op', yhat, labels_test, info_test.copy(), odds=False)
conv1.make_df(threshold='max')

#conv_bookies1.profit()
#conv1.profit()
#conv_bookies1.performance_metrics()
conv1.performance_metrics()

Unnamed: 0,Name,TP,TN,FP,FN,Accuracy,Prevalence,Sensitivity,Specificity,PPV,NPV,AUC,F1,Threshold
0,HOME,5201,5544,3525,2373,0.646,0.455,0.687,0.611,0.596,0.7,0.649,0.638,0.5
1,DRAW,1620,10126,2353,2544,0.706,0.25,0.389,0.811,0.408,0.799,0.6,0.398,0.5
2,AWAY,2137,9931,1807,2768,0.725,0.295,0.436,0.846,0.542,0.782,0.641,0.483,0.5
