<a href="https://colab.research.google.com/github/cappelchi/calcio_notebooks/blob/main/draft/football_GRAPH_spektral_models_train_test_220918.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Project config

In [None]:
!pip install neptune-client neptune-tensorflow-keras

In [None]:
import neptune.new as neptune
#from neptune.new.integrations.tensorflow_keras import NeptuneCallback
def get_credential(frmwork = 'neptune_team'):
    with open('cred_andrey.txt', 'r') as container:
        for line in container:
            if frmwork in line:
                login, psw = line.split(' ')[1], line.split(' ')[2].split('\n')[0]
                return login, psw

In [None]:
#@title Set API key for neptune.ai
set_api = True #@param {type:"boolean"}
if set_api:
    username, api_key = get_credential()

### Installations

In [None]:
!pip install spektral

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting spektral
  Downloading spektral-1.2.0-py3-none-any.whl (140 kB)
[K     |████████████████████████████████| 140 kB 5.1 MB/s 
Installing collected packages: spektral
Successfully installed spektral-1.2.0


### Downloads

In [None]:
data_version = 'data_graph_221019/'
validation_dataset_name = './prem_validation.csv'
dataset_version = 'data/dataset_val_prod_0818'
project = neptune.init_project(
    name="scomesse/football", 
    api_token = api_key
    )

project[data_version + 'nodes'].download('./nodes.csv.gz')
project[data_version + 'edges'].download('./edges.csv.gz')
project[data_version + 'edges_rev'].download('./edges_rev.csv.gz')
project[data_version + 'conversion'].download('./conversion.csv.gz')
project.stop()

https://app.neptune.ai/scomesse/football/
Remember to stop your project once you’ve finished logging your metadata (https://docs.neptune.ai/api/project#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!
All 0 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/scomesse/football/metadata


### Imports

In [None]:
import pandas as pd
import numpy as np
pd.options.display.max_columns = 50
pd.options.display.max_rows = 100
print(pd.__version__)
print(np.__version__)
import subprocess
from tqdm import tqdm
from scipy import sparse as sp

1.3.5
1.21.6


In [None]:
import tensorflow as tf
from tensorflow.keras.losses import BinaryCrossentropy, CategoricalCrossentropy

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.layers import Dropout, Input
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2

In [None]:
from spektral.data.loaders import SingleLoader, DisjointLoader
from spektral.datasets.citation import Citation
from spektral.layers import GCNConv
from spektral.transforms import LayerPreprocess

In [None]:
from spektral.data import Dataset, Graph
from spektral.transforms.normalize_adj import NormalizeAdj
from spektral.layers import ARMAConv
from spektral.transforms import LayerPreprocess

### Code

##### Functions

In [None]:
def run_bash(bashCommand:str, nameCommand = ''):
        process = subprocess.Popen([bashCommand], 
                           shell=True)
        _, error = process.communicate()
        if error:
            print(f'{nameCommand} error:\n', error)

#####Neural models architectures

In [None]:
class ARMAConv_net(Model):
    '''
    This example implements the experiments on citation networks from the paper:
    Graph Neural Networks with convolutional ARMA filters (https://arxiv.org/abs/1901.01343)
    Filippo Maria Bianchi, Daniele Grattarola, Cesare Alippi, Lorenzo Livi
    '''
    def __init__(self):
        super().__init__()
        self.ARMAConv1 = ARMAConv(
                params['channels'],
                iterations=params['iterations'],
                order=params['order'],
                share_weights=params['share_weights'],
                dropout_rate=params['dropout_skip'],
                activation="elu",
                gcn_activation="elu",
                kernel_regularizer=l2(params['l2_reg'])
                        )
        self.Dropout = Dropout(params['dropout'])
        self.ARMAConv2 = ARMAConv(
                params['n_out'],
                iterations=1,
                order=1,
                share_weights=params['share_weights'],
                dropout_rate=params['dropout_skip'],
                activation="sigmoid",
                gcn_activation=None,
                kernel_regularizer=l2(params['l2_reg'])
                        )        

    def call(self, inputs):
        x, a = inputs
        x1 = self.ARMAConv1([x, a])
        x2 = self.Dropout()(x1)
        output = self.ARMAConv2([x2, a])
        return output

In [None]:
class GCNConv_net(Model):
    '''
    Простая GCNConv архитектура
    '''
    def __init__(self):
        super().__init__()

        self.GCNConv = GCNConv(
            params['n_out'], 
            activation = "sigmoid", 
            kernel_regularizer=l2(params['l2_reg']), 
            use_bias=False)

    def call(self, inputs):
        x, a = inputs
        output = self.GCNConv([x, a])
        return output

#####Training

In [None]:
class calcio_graph(Dataset):
    def __init__(self, amount = 700_000, left = 0, train = 500_000, validation = 100_000, **kwargs):
        self.amount = amount
        self.left = left
        self.train = train
        self.validation = validation
        self.mask_tr = self.mask_va = self.mask_te = None
        super().__init__(**kwargs)
    def read(self):
        print('Load nodes...')
        x_df = pd.read_csv('./nodes.csv.gz', usecols = [0, 1, 2], header = 0, names = ['idx','feat', 'y'])
        lower_slice = -self.left
        if self.left == 0:
            lower_slice = None            
        x_df = x_df.iloc[-self.amount - self.left:lower_slice, :].set_index('idx').sort_index()
        x = np.zeros((len(x_df), max(x_df.feat) + 1)).astype(np.float32)
        for cnt, feat in enumerate(x_df.feat):
            x[cnt, feat] = 1
        print('Load edges...')
        start_row = x_df.index.min()
        end_row = x_df.index.max()
        #edges_df = pd.read_csv('./edges.csv.gz')
        edges_df = pd.read_csv('./edges_rev.csv.gz')
        #edges_df = edges_df[(edges_df.source < self.amount) & (edges_df.target < self.amount)]
        edges_df = edges_df[(edges_df.source >= start_row) & \
                            (edges_df.source <= end_row) & \
                            (edges_df.target >= start_row) & \
                            (edges_df.target <= end_row)]
        edges_df.source = edges_df.source - start_row
        edges_df.target = edges_df.target - start_row
        a = sp.csr_matrix((
            np.ones(len(edges_df)), 
            (edges_df.source.values, edges_df.target.values)
                            ), shape=None).astype(np.float32)
        y = x_df.y.values.reshape(-1,1).astype(np.float32)
        self.mask_tr = np.arange(self.amount) < int(self.train)
        self.mask_va = (np.arange(self.amount) >= int(self.train)) & \
                        (np.arange(self.amount) < int(self.amount - self.validation))
        self.mask_te = np.arange(self.amount) >= int(self.amount - self.validation)
        return [Graph(x = x, a = a, y = y)]

In [None]:
class SGCN:
    def __init__(self, K):
        self.K = K

    def __call__(self, graph):
        out = graph.a
        for _ in range(self.K - 1):
            out = out.dot(out)
        out.sort_indices()
        graph.a = out
        return graph

In [None]:
nodes_amount = 700_000
nodes_left = 10_000
nodes_start = nodes_amount + nodes_left
dataset = calcio_graph(
                    amount = nodes_amount,
                    left = nodes_left,
                    #transforms=[LayerPreprocess(GCNConv)]#, SGCN(K)]
                    #transforms=[SGCN(K)]
                    #transforms=[LayerPreprocess(GCNConv), AdjToSpTensor()]
                    transforms=[LayerPreprocess(ARMAConv)]
                    #transforms=[NormalizeAdj()]
                    )

Load nodes...
Load edges...


In [None]:
dataset[0]

Graph(n_nodes=700000, n_node_features=793, n_edge_features=None, n_labels=1)

In [None]:
print(dataset[0].a)

In [None]:
mask_tr, mask_va, mask_te = dataset.mask_tr, dataset.mask_va, dataset.mask_te

In [None]:
mask_tr.sum(), mask_va.sum(), mask_te.sum()

(500000, 100000, 100000)

In [None]:
# Parameters
params = {}
params['l2_reg'] = 5e-5  # L2 regularization rate
params['n_out'] = dataset.n_labels
#n_out = dataset.n_labels  # Number of classes

In [None]:
# Parameters
params['channels'] = 16  # Number of channels in the first layer
params['iterations'] = 1  # Number of iterations to approximate each ARMA(1)
params['order'] = 2  # Order of the ARMA filter (number of parallel stacks)
params['share_weights'] = True  # Share weights in each ARMA stack
params['dropout_skip'] = 0.75  # Dropout rate for the internal skip connection of ARMA
params['dropout'] = 0.5  # Dropout rate for the features
params['learning_rate'] = 1e-2  # Learning rate
params['epochs'] = 20000  # Number of training epochs
params['patience'] = 100  # Patience for early stopping

In [None]:
params['learning_rate'] = 0.005
#learning_rate = 0.005 
params.update(
    {'compile':{
    'optimizer':Adam(learning_rate = params['learning_rate']),
    'loss':BinaryCrossentropy(),
    'weighted_metrics':["acc"]
            }})

In [None]:
############################
#model = GCNConv_net()
#model.compile(
#        **params['compile']
#                )
############################
# Build model
model = ARMAConv_net()
model.compile(
        **params['compile']
                )

In [None]:
# Train model
loader_tr = SingleLoader(dataset, sample_weights=mask_tr)
loader_va = SingleLoader(dataset, sample_weights=mask_va)

In [None]:
def scheduler(epoch, lr):
    lr_dict = {0:5e-2, 1:4e-2, 2:2e-2, 3:1e-2, 4:8e-3,5:4e-3,6:2e-3,7:1e-3,8:8e-4,9:4e-4, 10:2e-4, 11:1e-4, 12:1e-4}
    #lr_dict = {0:1e-5, 1:6e-6, 2:1e-6, 3:9e-7, 4:8e-7,5:7e-7,6:6e-7,7:5e-7,8:4e-7,9:1e-7}
    if epoch in lr_dict:
        return lr_dict[epoch]
    else:
        return 1e-4
LRS_callback = LearningRateScheduler(scheduler)

In [None]:
params.update(
{'MCh_params':{'filepath':'./models_weights','monitor':'val_acc', 'verbose':1,
                   'save_best_only':True, 'save_weights_only':False,
                   }})
ModelCheckpoint_callback = ModelCheckpoint(**params['MCh_params'])

In [None]:
# Parameters
channels = 16  # Number of channels in the first layer
iterations = 1  # Number of iterations to approximate each ARMA(1)
order = 2  # Order of the ARMA filter (number of parallel stacks)
share_weights = True  # Share weights in each ARMA stack
dropout_skip = 0.75  # Dropout rate for the internal skip connection of ARMA
dropout = 0.5  # Dropout rate for the features
l2_reg = 5e-5  # L2 regularization rate
learning_rate = 1e-2  # Learning rate
epochs = 20000  # Number of training epochs
patience = 100  # Patience for early stopping
a_dtype = dataset[0].a.dtype  # Only needed for TF 2.1

In [None]:
a_dtype = dataset[0].a.dtype  # Only needed for TF 2.1

N = dataset.n_nodes  # Number of nodes in the graph
F = dataset.n_node_features  # Original size of node features
n_out = dataset.n_labels  # Number of classes

# Model definition
x_in = Input(shape=(F,))
a_in = Input((N,), sparse=True, dtype=a_dtype)

gc_1 = ARMAConv(
    channels,
    iterations=iterations,
    order=order,
    share_weights=share_weights,
    dropout_rate=dropout_skip,
    activation="elu",
    gcn_activation="elu",
    kernel_regularizer=l2(l2_reg),
)([x_in, a_in])
gc_2 = Dropout(dropout)(gc_1)
gc_2 = ARMAConv(
    n_out,
    iterations=1,
    order=1,
    share_weights=share_weights,
    dropout_rate=dropout_skip,
    activation="sigmoid",
    gcn_activation=None,
    kernel_regularizer=l2(l2_reg),
)([gc_2, a_in])


In [None]:
# Build model
model = Model(inputs=[x_in, a_in], outputs=gc_2)
optimizer = Adam(learning_rate=learning_rate)
model.compile(
    optimizer=optimizer, loss=BinaryCrossentropy(), weighted_metrics=["acc"]
)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 793)]        0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, 700000)]     0           []                               
                                                                                                  
 arma_conv (ARMAConv)           (None, 16)           50784       ['input_1[0][0]',                
                                                                  'input_2[0][0]']                
                                                                                                  
 dropout (Dropout)              (None, 16)           0           ['arma_conv[0][0]']          

In [None]:
epochs = 2
history = model.fit(
    loader_tr.load(),
    steps_per_epoch=loader_tr.steps_per_epoch,
    validation_data=loader_va.load(),
    validation_steps=loader_va.steps_per_epoch,
    epochs=epochs,
    #callbacks=[
        #EarlyStopping(patience=patience, restore_best_weights=True),
        #ModelCheckpoint_callback,
        #LRS_callback
        #],
)

Epoch 1/2
Epoch 2/2


In [None]:
PATH_TO_MODEL = './model.tar.gz'
bashCommand = f"""
tar -zxvf {PATH_TO_MODEL}
"""
run_bash(bashCommand, 'tar_model')

In [None]:
model = load_model('./models_weights')

In [None]:
eval_results = model.evaluate(loader_tr.load(), steps=loader_tr.steps_per_epoch)
print("Done.\n" "Val loss: {}\n" "Val accuracy: {}".format(*eval_results))
eval_results = model.evaluate(loader_va.load(), steps=loader_va.steps_per_epoch)
print("Done.\n" "Val loss: {}\n" "Val accuracy: {}".format(*eval_results))

Done.
Val loss: 0.4904237985610962
Val accuracy: 0.5597839951515198
Done.
Val loss: 0.09960535913705826
Val accuracy: 0.543150007724762


In [None]:
# Evaluate model
print("Evaluating model.")
loader_te = SingleLoader(dataset, sample_weights=mask_te)
eval_results = model.evaluate(loader_te.load(), steps=loader_te.steps_per_epoch)
print("Done.\n" "Test loss: {}\n" "Test accuracy: {}".format(*eval_results))

Evaluating model.
Done.
Test loss: 0.09933696687221527
Test accuracy: 0.5528200268745422


In [None]:
predict_production = model.predict(loader_tr.load(), steps=loader_tr.steps_per_epoch)



In [None]:
print('nodes_start = ', nodes_start, 'nodes_left = ', nodes_left)
nodes_df = pd.read_csv(
    './nodes.csv.gz', 
    usecols = [0, 2, 6], 
    header = 0, 
    names = ['idx', 'y', 'match_id']
    ).iloc[-nodes_start:-nodes_left, :].set_index('idx').sort_index()

nodes_start =  710000 nodes_left =  10000


In [None]:
mask_tr_match = mask_tr[::2]
mask_va_match = mask_va[::2]
mask_te_match = mask_te[::2]

In [None]:
predict_df = pd.DataFrame(columns = ['home', 'not_home'])
predict_df['home'] = nodes_df.y.values[::2]
predict_df['not_home'] = nodes_df.y.values[1::2]
predict_df['pred_home'] = predict_production[::2]
predict_df['pred_not_home'] = predict_production[1::2]
predict_df['result'] = (predict_df['pred_home'] > predict_df['pred_not_home']).astype(int)

In [None]:
train_wins = (predict_df['home'] == predict_df['result'])[mask_tr_match].sum()
val_wins = (predict_df['home'] == predict_df['result'])[mask_va_match].sum()
test_wins = (predict_df['home'] == predict_df['result'])[mask_te_match].sum()
last_day_wins = (predict_df['home'] == predict_df['result'])[-1000:].sum()
print('Train wins: ', train_wins, ' ',  round(train_wins / mask_tr_match.sum(), 2), '%')
print('Val wins: ', val_wins, ' ',  round(val_wins / mask_va_match.sum(), 2), '%')
print('Test wins: ', test_wins, ' ',  round(test_wins / mask_te_match.sum(), 2), '%')
print('Last day wins: ', last_day_wins, ' ',  round(last_day_wins / 1000, 2), '%')

Train wins:  140611   0.56 %
Val wins:  27175   0.54 %
Test wins:  27907   0.56 %
Last day wins:  563   0.56 %


In [None]:
last_day_wins = (predict_df['home'] == predict_df['result'])[-5000:].sum()
print('Last day wins: ', last_day_wins, ' ',  round(last_day_wins / 5000, 2), '%')

Last day wins:  2754   0.55 %


In [None]:
predict_df.tail(50)

Unnamed: 0,home,not_home,pred_home,pred_not_home,result
349950,0,1,0.460829,0.572726,0
349951,1,0,0.431599,0.556822,0
349952,1,0,0.461068,0.559404,0
349953,1,0,0.469211,0.532149,0
349954,1,0,0.480084,0.519506,0
349955,1,0,0.472737,0.536153,0
349956,0,1,0.487741,0.509877,0
349957,1,0,0.461005,0.549776,0
349958,1,0,0.486183,0.559036,0
349959,1,0,0.523636,0.523059,1


In [None]:
pd.read_csv('./edges.csv.gz')

Unnamed: 0,source,target
0,0,1
1,2,3
2,4,5
3,6,7
4,8,9
...,...,...
10680340,5352195,5364729
10680341,5352196,5364683
10680342,5352459,5354673
10680343,5352625,5359158


In [None]:
eval_results = model.evaluate(loader_va.load(), steps=loader_va.steps_per_epoch)
print("Done.\n" "Val loss: {}\n" "Val accuracy: {}".format(*eval_results))

Done.
Val loss: 0.08375976234674454
Val accuracy: 0.7184399962425232


In [None]:
# Evaluate model
print("Evaluating model.")
loader_te = SingleLoader(dataset, sample_weights=mask_te)
eval_results = model.evaluate(loader_te.load(), steps=loader_te.steps_per_epoch)
print("Done.\n" "Test loss: {}\n" "Test accuracy: {}".format(*eval_results))

Evaluating model.
Done.
Test loss: 0.08535648137331009
Test accuracy: 0.6975100040435791


In [None]:
predict_production = model.predict(loader_va.load(), steps=loader_va.steps_per_epoch)



In [None]:
predict_production.shape

(700000, 1)

In [None]:
nodes_df = pd.read_csv(
    './nodes.csv.gz', 
    usecols = [0, 2, 6], 
    header = 0, 
    names = ['idx', 'y', 'match_id']
    ).iloc[-nodes_start:-nodes_left, :].set_index('idx').sort_index()

#### Calculate validation

In [None]:
predict_df = pd.DataFrame(columns = ['home', 'not_home'])
predict_df['home'] = nodes_df[mask_va].y.values[::2]
predict_df['not_home'] = nodes_df[mask_va].y.values[1::2]
predict_df['pred_home'] = predict_production[mask_va][::2]
predict_df['pred_not_home'] = predict_production[mask_va][1::2]
predict_df['result'] = (predict_df['pred_home'] > predict_df['pred_not_home']).astype(int)
(predict_df['home'] == predict_df['result']).sum()

42219

In [None]:
predict_df

Unnamed: 0,home,not_home,pred_home,pred_not_home,result
0,0,1,0.402605,0.493420,0
1,1,0,0.467355,0.509141,0
2,0,1,0.476668,0.546723,0
3,0,1,0.516562,0.605974,0
4,1,0,0.595249,0.442725,1
...,...,...,...,...,...
49995,0,1,0.492508,0.513929,0
49996,0,1,0.406367,0.561839,0
49997,0,1,0.482784,0.573863,0
49998,0,1,0.499893,0.516207,0


#### Calculate test

In [None]:
#predict_production = model.predict(loader_te.load(), steps=loader_te.steps_per_epoch)



In [None]:
predict_df = pd.DataFrame(columns = ['home', 'not_home'])
predict_df['home'] = nodes_df[mask_te].y.values[::2]
predict_df['not_home'] = nodes_df[mask_te].y.values[1::2]
predict_df['pred_home'] = predict_production[mask_te][::2]
predict_df['pred_not_home'] = predict_production[mask_te][1::2]
predict_df['result'] = (predict_df['pred_home'] > predict_df['pred_not_home']).astype(int)
(predict_df['home'] == predict_df['result']).sum()

40222

In [None]:
PATH_TO_MODEL = './model.tar.gz'
bashCommand = f"""
tar -czvf {PATH_TO_MODEL} {params['MCh_params']['filepath']}
"""
run_bash(bashCommand, 'tar_model')

42073

In [None]:
predict_df['idx_home'] = nodes_df.index[mask_te][::2]
predict_df['idx_not_home'] = nodes_df.index[mask_te][1::2]

In [None]:
predict_df['match_id_home'] = nodes_df[mask_te].match_id.values[::2]
predict_df['match_id_not_home'] = nodes_df[mask_te].match_id.values[1::2]

In [None]:
predict_df[predict_df.match_id_home != predict_df.match_id_not_home]

Unnamed: 0,home,not_home,pred_home,pred_not_home,result,idx_home,idx_not_home,match_id_home,match_id_not_home


In [None]:
predict_df

Unnamed: 0,home,not_home,pred_home,pred_not_home,result
0,0,1,0.817898,0.496136,1
1,0,1,0.181242,0.487676,0
2,1,0,0.478412,0.515747,0
3,1,0,0.153708,0.828872,0
4,1,0,0.509116,0.504974,1
...,...,...,...,...,...
69995,1,0,0.314261,0.688905,0
69996,1,0,0.689033,0.312998,1
69997,0,1,0.310236,0.325164,0
69998,1,0,0.326383,0.326463,0


In [None]:
validation_dataset_name = './prem_validation.csv'
project = neptune.init_project(
    name="scomesse/football", 
    api_token = api_key
    )
project['data/validation_prem_220818'].download(validation_dataset_name)
project.stop()

In [None]:
prem_df = pd.read_csv('./prem_validation.csv')

In [None]:
prem_df = prem_df[prem_df.Id.isin(list(predict_df.match_id_home))]

In [None]:
predict_dict = predict_df[predict_df.match_id_home.isin(list(prem_df.Id))].set_index('match_id_home')[['home', 'result']].to_dict(orient = 'index')

In [None]:
prem_df[['result', 'predict']] = [[predict_dict[key]['home'], predict_dict[key]['result']] for key in prem_df.Id.values]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[col] = igetitem(value, i)


In [None]:
prem_df

Unnamed: 0,Id,Idbook,W1,X,W2,W1live,Xlive,W2live,result,predict
7614,2eqn3i74041ragwpuzow7gbo4,13812880,3.600,4.65,1.700,3.600,4.65,1.700,0,0
7615,59i869it1synl0z3di62wss9g,13812877,2.880,4.15,2.030,2.880,4.15,2.030,1,1
7616,dj1uww92e3x26gg2gw79r6g44,13812875,1.116,8.80,14.000,1.116,8.80,14.000,1,0
7618,4hkznpu6g79a8h32ionlfagpg,13813781,1.760,4.10,3.720,1.760,4.10,3.720,1,1
7619,b232tm66r4b7jsi881a6mo6qc,13813779,2.550,3.92,2.300,2.420,3.94,2.410,0,0
...,...,...,...,...,...,...,...,...,...,...
7987,dgtvqq4bwqkgnal8dx3zwccgk,13857861,3.700,3.42,1.930,3.700,3.42,1.930,1,1
7988,1nb3ab0vbihddp9qh2qviix3o,13857924,2.240,4.25,2.510,2.240,4.25,2.510,0,0
7989,9q0423juf175stwpppkxi9kic,13857923,1.250,5.80,9.200,1.250,5.80,9.200,1,1
7990,3y4e7iany0lgbj75cp30g8t90,13857930,8.700,7.10,1.210,8.700,7.10,1.210,0,0


In [None]:
k1_win_list = []
bet_cnt = 0
bet_win = 0
for k1, res, pred in zip(prem_df.W1live, prem_df.result, prem_df.predict):
    if pred == 1:
        bet_cnt += 1
        if res == 1:
            k1_win_list.append(k1 - 1)
            bet_win += 1
        else:
            k1_win_list.append(-1)
    else:
        k1_win_list.append(0)
print('Sum: ', round(sum(k1_win_list), 2))
print('Bet count:', bet_cnt)
print('Bet wins:', bet_win, ' - bet/win = ', round(bet_win / bet_cnt, 2))

Sum:  164.69
Bet count: 177
Bet wins: 147  - bet/win =  0.83


In [None]:
k1_win_list = []
bet_cnt = 0
bet_win = 0
for k1, res, pred in zip(prem_df.W1live, prem_df.result, prem_df.predict):
    if pred == 0:
        bet_cnt += 1
        if res == 0:
            k1_win_list.append(0.95 / (k1 - 1))
            bet_win += 1
        else:
            k1_win_list.append(-1)
    else:
        k1_win_list.append(0)
print('Sum: ', round(sum(k1_win_list), 2))
print('Bet count:', bet_cnt)
print('Bet wins:', bet_win, ' - bet/win = ', round(bet_win / bet_cnt, 2))

Sum:  104.81
Bet count: 198
Bet wins: 166  - bet/win =  0.84


In [None]:
loader_te = SingleLoader(dataset, sample_weights=mask_te)

In [None]:
for x in loader_te.load():
    print(x)
    break

((<tf.Tensor: shape=(500100, 793), dtype=float32, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>, <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7fe6f118a290>), <tf.Tensor: shape=(500100, 1), dtype=float32, numpy=
array([[1.],
       [0.],
       [0.],
       ...,
       [0.],
       [1.],
       [0.]], dtype=float32)>, <tf.Tensor: shape=(500100,), dtype=bool, numpy=array([ True,  True,  True, ..., False, False, False])>)
