<a href="https://colab.research.google.com/github/cappelchi/calcio_notebooks/blob/main/draft/football_live_validation_binary_heft_7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Project config

In [3]:
try:
    import neptune.new as neptune
except:
    !pip install neptune-client >> None
    import neptune.new as neptune
#from neptune.new.integrations.tensorflow_keras import NeptuneCallback
def get_credential(frmwork = 'neptune_team'):
    with open('credential.txt', 'r') as container:
        for line in container:
            if frmwork in line:
                login, psw = line.split(' ')[1], line.split(' ')[2].split('\n')[0]
                return login, psw
     

In [4]:
#@title Set API key for neptune.ai
set_api = True #@param {type:"boolean"}
if set_api:
    username, api_key = get_credential()

### Installations

In [5]:
!pip install catboost >> None

### Downloads

In [6]:
data_version = 'football_live_npz_230131/'
project = neptune.init_project(
    project="scomesse/football", 
    api_token = api_key
    )
project[data_version + 'dataset'].download('./dataset.npz')
project[data_version + 'description'].download('./save_discription.txt')
project[data_version + 'additional_data'].download('./additional_data.npz')
project[data_version + 'time'].download('./time.csv')
params = project[data_version + 'params'].fetch()
project.stop()

https://app.neptune.ai/scomesse/football/
Remember to stop your project once you’ve finished logging your metadata (https://docs.neptune.ai/api/project#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!
All 0 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/scomesse/football/metadata


### Imports

In [7]:
import pandas as pd
import numpy as np
pd.options.display.max_columns = 50
pd.options.display.max_rows = 100
print(pd.__version__)
print(np.__version__)

1.3.5
1.21.6


In [8]:
import plotly.graph_objects as go
import plotly.express as px
import plotly.subplots as sp
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

In [9]:
from catboost import CatBoost
from catboost import utils
from catboost import CatBoostClassifier, CatBoostRegressor
from catboost import Pool, cv
from catboost.utils import eval_metric
np.random.seed(147)

In [10]:
from tqdm import tqdm
from scipy.stats import poisson
import os, psutil

In [11]:
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
from sklearn.metrics import confusion_matrix, classification_report, roc_curve
from sklearn.metrics import multilabel_confusion_matrix

### Code

#### Functions

In [43]:
def calculate_multiclass(probability_2x:np.array, line_2x:np.array):
    '''
    input : 0 - over, 1 - under
    output: 0 - over, 1 - under
    '''
    probline = probability_2x * line_2x
    best_odd_result = np.argmax(probline, axis = 1)
    best_odd_float = np.take_along_axis(probline, best_odd_result.reshape(-1, 1), axis = 1)
    return {
        'argmax':best_odd_result,
        'float':best_odd_float[:,0],
    }

In [129]:
def get_profit_curve2(prob_dict, y_true, Line_production, bet_type = 'fixed'):
    '''
    Line_production -> line[W1, X2]
    y_true -> data_npz['y_train_multi'] == kf_res
    '''
    #fixed, divk, divk-1
    #simple, complex
    # считаем прдикты по исходу и вероятности умноженной на кэф (over, under)
    preds_int, preds_float = calculate_multiclass(prob_dict, Line_production).values()
    #preds_int = preds_int[:,0] 
    preds_vec = preds_int * (-1) + 1 # -1:over(home), 1:under(away+)
    # вектор результатов
    win_vec, lose_vec = (preds_vec == y_true), (preds_vec != y_true)
    line_vec = np.take_along_axis(Line_production, preds_int.reshape(-1, 1), axis = 1)[:, 0]
    threshold = []
    profit = []
    bet_qty_list = []
    for th in np.linspace(0.,1.0,1001):
        threshold.append(th)
        vector_th = (preds_float / 10) > th
        bet_qty_list.append(vector_th.sum())
        if bet_type == 'divk':
            #                       размер ставки                                   кэф
            profit.append(np.sum((1/line_vec[win_vec & vector_th].astype(np.float32)) * \
                                (line_vec[win_vec & vector_th] - 1).astype(np.float32)) -\
                                np.sum((1/line_vec[lose_vec & vector_th].astype(np.float32))))
        else:
            profit.append(np.sum((line_vec[win_vec & vector_th] - 1).astype(np.float32)) - np.sum(lose_vec & vector_th))

    return threshold, profit, bet_qty_list

In [130]:
def get_profit_validation2(prob_dict, final_goal_diff, Line_production,  model_name):
    '''
    y_true - numpy вектор, shape:(x,) истинные значения в формате 0|1
    x_predicted - numpy вектор, shape:(x,) предикт (probability (float)) в формате 0.
    Line_production - numpy вектор, shape:(x,) вектор коэфициентов в формате float 1.
    '''
    scatters_dicts = dict(
        scatter1 = dict(x = [], y = [], name = '', fill = 'tozeroy', yaxis = '', xaxis = ''),
        scatter2 = dict(x = [], y = [], name = '', line = dict(color='rgb(33,113,181)', dash='dash'), yaxis = '', xaxis = ''),
        scatter3 = dict(x = [], y = [], name = '', line = dict(color='rgb(107,174,214)', dash='dash'), yaxis = '', xaxis = '')
                    )
    qty_color = 'blue'
    prof_qty_color = 'rgb(8,48,107)'
    bet_type_list = ['fixed', 'divk']
    bet_size_list = ['1', '1/K']
    strategy_name_list = ['threshold', 'pred*k']
    #domain_list = [[0.55, 1], [0., 0.5]]
    layout_dict = {}
    data_list = []
    title_text = f'Handicap profit & bet qty for validation model in neptune.ai: {model_name}'
    for cnt_bet, bet_type in enumerate(bet_type_list):
        y_anchor = str((cnt_bet*3) + 1)
        threshold, profit, bet_qty_list = get_profit_curve2(
            prob_dict, final_goal_diff, Line_production, bet_type = bet_type
            )
        xaxis_num = str((cnt_bet + 1))
        layout_dict.update({
                'xaxis' + xaxis_num:{
                    'domain':[0.5 * cnt_bet, 0.5 * cnt_bet + 0.5],
                    'title':f'bet={bet_size_list[cnt_bet]}',
                    'anchor':'y' + y_anchor}
                            })
        for cnt_scatter in range(1, 4):
            yaxis_num = str((cnt_bet*3) + cnt_scatter)
            scatter_num = 'scatter' + str(cnt_scatter)
            scatters_dicts[scatter_num]['x'] = threshold
            if cnt_scatter == 1:
                scatters_dicts[scatter_num]['y'] = profit
                scatters_dicts[scatter_num]['name'] = 'profit_' +xaxis_num
                scatters_dicts[scatter_num]['xaxis'] = 'x' + xaxis_num
                scatters_dicts[scatter_num]['yaxis'] = 'y' + yaxis_num
                layout_dict.update({
                    'yaxis' + yaxis_num:{
                        #'domain':domain_list[cnt_str],
                        'title':'', #'Profit',
                        'range':[-10,int(max(profit) * 1.1)],
                        'anchor':'x' + xaxis_num
                }})
            elif cnt_scatter == 2:
                scatters_dicts[scatter_num]['y'] = np.array(profit) / np.array(bet_qty_list)
                scatters_dicts[scatter_num]['name'] = 'profit_' + xaxis_num
                scatters_dicts[scatter_num]['xaxis'] = 'x' + xaxis_num
                scatters_dicts[scatter_num]['yaxis'] = 'y' + yaxis_num
                layout_dict.update({
                    'yaxis' + yaxis_num:{
                        #'domain':domain_list[cnt_str],
                        'title':'', 'zeroline':True,
                        'side':'right', 'anchor':'x' + xaxis_num,
                        'overlaying':'y' + y_anchor}})
            elif cnt_scatter == 3:
                scatters_dicts[scatter_num]['y'] = np.array(bet_qty_list) / 1000
                scatters_dicts[scatter_num]['name'] = 'profit_' + xaxis_num
                scatters_dicts[scatter_num]['xaxis'] = 'x' + xaxis_num
                scatters_dicts[scatter_num]['yaxis'] = 'y' + yaxis_num
                layout_dict.update({
                    'yaxis' + yaxis_num:{
                        #'domain':domain_list[cnt_str],
                        'visible':False, 'showgrid':True,
                        'side':'right', 'anchor':'x' + xaxis_num,
                        'overlaying':'y' + y_anchor}})
            data_list += [go.Scatter(**scatters_dicts['scatter' + str(cnt_scatter)])]
    layout_dict.update({
        'width':1400,
        'height':800,
        'title_x':0.5,
        'title_text':title_text,
        'paper_bgcolor':'rgb(229, 237, 247)',
        'plot_bgcolor':'rgb(229, 237, 247)',
        'showlegend':False 
                        })
    layout = go.Layout(**layout_dict)
    return go.Figure(data=data_list, layout=layout)
     

In [46]:
def plot_equity2(prob_dict, final_goal_diff, Line_production, th, model_name, x_date = np.empty(()), bet_type = 'fixed', data_split = 'validation'):
    #fixed, divk, divk-1
    #simple, complex
    preds_int, preds_float, preds_float_adj = calculate_multiclass(prob_dict, Line_production[:,1:3]).values()
    preds_float_adj = preds_float_adj[:,0] 
    preds_vec = preds_int * (2) - 1 # -1:over, 1:under
    res_vec = ((final_goal_diff + Line_production[:, 0]) > 0) * (-1) + \
             ((final_goal_diff + Line_production[:, 0]) < 0) * (1)
    win_vec, lose_vec = (preds_vec * res_vec) == 1, (preds_vec * res_vec) == -1
    line_vec = np.take_along_axis(Line_production[:,1:3], preds_int.reshape(-1, 1), axis = 1)[:, 0]
    vector_th = (preds_float_adj / 10) > th
    if bet_type == 'divk':
        bet_sum = np.sum(1/line_vec[win_vec & vector_th].astype(np.float32))
        mean_bet = np.mean(1/line_vec[win_vec & vector_th].astype(np.float32))
        #                       размер ставки                                   кэф
        y = np.cumsum(((line_vec - 1) / line_vec) * (win_vec & vector_th).astype(np.float32)) -\
                            np.cumsum((1/line_vec) * (lose_vec & vector_th).astype(np.float32))
    else:
        mean_bet = 1
        bet_sum = np.sum(win_vec & vector_th)
        y = np.cumsum((line_vec - 1) * (win_vec & vector_th).astype(np.float32)) - np.cumsum(lose_vec & vector_th)
    bet_qty = vector_th.sum()
    title_text = f'Handicap Equity Curve {data_split} | threshold={th} | bet_type:{bet_type}<br>' + \
                f'bet_mean: {np.round(mean_bet, 2)} | ROI: {np.round(y[-1] * 100 / bet_sum, 4)}%' + \
                f' | Bet quantity: {bet_qty}<br>' + f'Model in neptune.ai: FOOT-{model_name}'
    
    layout_dict = {}
    if x_date.shape:
        trace_equity = go.Scatter(y=y[(win_vec|lose_vec)&vector_th], x= x_date[(win_vec|lose_vec)&vector_th], fill='tozeroy', xaxis = 'x1') #'toself'
        layout_dict.update({'xaxis1':{'showgrid':True, 'ticklabelmode':'period', 'tickformat':'%d\n%b\n%Y'}})
        data_list = [trace_equity]
    else:
        trace_equity = go.Scatter(y = y, fill='tozeroy', xaxis = 'x1', yaxis = 'y1') #'toself'
        layout_dict.update({'xaxis1':{'anchor':'y1'}, 'yaxis1':{'domain':[0.4, 1]}})
        max_profit = np.maximum.accumulate(y)
        trace_drawdown =  go.Scatter(y = (y - max_profit) / mean_bet, fill='tozeroy', xaxis = 'x2', yaxis = 'y2')
        layout_dict.update({'xaxis2':{'anchor':'y2'}, 'yaxis2':{'domain':[0., 0.35], 'title':'drawdown inmean(bet)'}})
        data_list = [trace_equity, trace_drawdown]

    layout_dict.update({
    'width':1400,
    'height':800,
    'title_x':0.5,
    'title_text':title_text,
    'paper_bgcolor':'rgb(229, 237, 247)',
    'plot_bgcolor':'rgb(229, 237, 247)',
    'showlegend':False 
                    })
     #, trace_time_dd]
    layout = go.Layout(**layout_dict)
    return go.Figure(data=data_list, layout=layout)

In [119]:
def get_profit_validation(y_true, x_predicted, Line_production, model_name, reverse_bet = False):
    '''
    y_true - numpy вектор, shape:(x,) истинные значения в формате 0|1
    x_predicted - numpy вектор, shape:(x,) предикт (probability (float)) в формате 0.
    Line_production - numpy вектор, shape:(x,) вектор коэфициентов в формате float 1.
    '''
    scatters_dicts = dict(
        scatter1 = dict(x = [], y = [], name = '', fill = 'tozeroy', yaxis = '', xaxis = ''),
        scatter2 = dict(x = [], y = [], name = '', line = dict(color='rgb(33,113,181)', dash='dash'), yaxis = '', xaxis = ''),
        scatter3 = dict(x = [], y = [], name = '', line = dict(color='rgb(107,174,214)', dash='dash'), yaxis = '', xaxis = '')
                    )
    qty_color = 'blue'
    prof_qty_color = 'rgb(8,48,107)'
    bet_type_list = ['fixed', 'divk']
    bet_size_list = ['1', '1/K']
    strategy_list = ['simple', 'complex']
    strategy_name_list = ['threshold', 'pred*k']
    domain_list = [[0.55, 1], [0., 0.5]]
    layout_dict = {}
    data_list = []
    title_text = f'Profit & bet qty for validation model in neptune.ai: {model_name}'
    for cnt_str, strategy in enumerate(strategy_list):
        for cnt_bet, bet_type in enumerate(bet_type_list):
            y_anchor = str((9 * cnt_str) + (cnt_bet*3) + 1)
            threshold, profit, bet_qty_list = get_profit_curve(
                y_true, x_predicted, Line_production,
                bet_type = bet_type, strategy = strategy, reverse_bet = reverse_bet)
            xaxis_num = str((2 * cnt_str) + (cnt_bet + 1))
            layout_dict.update({
                    'xaxis' + xaxis_num:{
                        'domain':[0.5 * cnt_bet, 0.5 * cnt_bet + 0.5],
                        'title':f'bet={bet_size_list[cnt_bet]}, strategy:{strategy_name_list[cnt_str]}',
                        'anchor':'y' + y_anchor}
                                })
            for cnt_scatter in range(1, 4):
                yaxis_num = str((9 * cnt_str) + (cnt_bet*3) + cnt_scatter)
                scatter_num = 'scatter' + str(cnt_scatter)
                scatters_dicts[scatter_num]['x'] = threshold
                if cnt_scatter == 1:
                    scatters_dicts[scatter_num]['y'] = profit
                    scatters_dicts[scatter_num]['name'] = 'profit_' +xaxis_num
                    scatters_dicts[scatter_num]['xaxis'] = 'x' + xaxis_num
                    scatters_dicts[scatter_num]['yaxis'] = 'y' + yaxis_num
                    layout_dict.update({
                        'yaxis' + yaxis_num:{
                            'domain':domain_list[cnt_str],
                            'title':'', #'Profit',
                            'range':[-10,int(max(profit) * 1.1)],
                            'anchor':'x' + xaxis_num
                    }})
                elif cnt_scatter == 2:
                    scatters_dicts[scatter_num]['y'] = np.array(profit) / np.array(bet_qty_list)
                    scatters_dicts[scatter_num]['name'] = 'profit_' + xaxis_num
                    scatters_dicts[scatter_num]['xaxis'] = 'x' + xaxis_num
                    scatters_dicts[scatter_num]['yaxis'] = 'y' + yaxis_num
                    layout_dict.update({
                        'yaxis' + yaxis_num:{
                            'domain':domain_list[cnt_str],
                            'title':'', 'zeroline':True,
                            'side':'right', 'anchor':'x' + xaxis_num,
                            'overlaying':'y' + y_anchor}})
                elif cnt_scatter == 3:
                    scatters_dicts[scatter_num]['y'] = np.array(bet_qty_list) / 1000
                    scatters_dicts[scatter_num]['name'] = 'profit_' + xaxis_num
                    scatters_dicts[scatter_num]['xaxis'] = 'x' + xaxis_num
                    scatters_dicts[scatter_num]['yaxis'] = 'y' + yaxis_num
                    layout_dict.update({
                        'yaxis' + yaxis_num:{
                            'domain':domain_list[cnt_str],
                            'visible':False, 'showgrid':True,
                            'side':'right', 'anchor':'x' + xaxis_num,
                            'overlaying':'y' + y_anchor}})
                data_list += [go.Scatter(**scatters_dicts['scatter' + str(cnt_scatter)])]
    layout_dict.update({
        'width':1400,
        'height':800,
        'title_x':0.5,
        'title_text':title_text,
        'paper_bgcolor':'rgb(229, 237, 247)',
        'plot_bgcolor':'rgb(229, 237, 247)',
        'showlegend':False 
                        })
    layout = go.Layout(**layout_dict)
    return go.Figure(data=data_list, layout=layout)

In [120]:
def get_profit_curve(y, y_pred, Line_production, bet_type = 'fixed', strategy = 'simple', reverse_bet = False):
    #fixed, divk, divk-1
    #simple, complex
    threshold = []
    profit = []
    bet_qty_list = []
    if bet_type == 'divk':
        profit_size = (Line_production - 1) / Line_production
        bet_size = Line_production
    else:
        profit_size = (Line_production - 1)
        bet_size = Line_production /Line_production
    for th in np.linspace(0.,0.2,1001):
        threshold.append(th)
        if reverse_bet:
            if strategy == 'simple':
                vector_th = y_pred < th
            if strategy == 'complex':
                vector_th = (y_pred * Line_production / 10) < th
            bet_qty_list.append(vector_th.sum())
            profit.append(((-1) * (y[vector_th] - 1) * profit_size[vector_th] + ((-1) * y[vector_th]) / bet_size[vector_th]).sum())
        else:            
            if strategy == 'simple':
                vector_th = y_pred > th
            if strategy == 'complex':
                vector_th = (y_pred * Line_production / 10) > th
            bet_qty_list.append(vector_th.sum())
            profit.append((y[vector_th] * profit_size[vector_th] + (y[vector_th] - 1) / bet_size[vector_th]).sum())

    return threshold, profit, bet_qty_list

In [121]:
def plot_equity(y_true, y_pred, Line_production, th, model_name, bet_type = 'fixed', 
                strategy = 'simple', data_split = 'validation', reverse_bet = False):\
    #fixed, divk, divk-1
    #simple, complex
    if bet_type == 'divk':
        profit_size = (Line_production - 1) / Line_production
        bet_size = Line_production
    else:
        profit_size = (Line_production - 1)
        bet_size = Line_production /Line_production
    if strategy == 'simple':
        vector_th = y_pred > th
    if strategy == 'complex':
        vector_th = (y_pred * Line_production / 10) > th
    mean_bet = np.mean(1/bet_size[vector_th])
    y = (y_true[vector_th] * profit_size[vector_th] + (y_true[vector_th] - 1) / bet_size[vector_th]).cumsum()
    bet_qty = vector_th.sum()
    title_text = f'Equity Curve {data_split} | threshold={th} | bet_type:{bet_type} | strategy:{strategy}<br>' + \
                f'bet_mean: {np.round(mean_bet, 2)} | ROI: {np.round(y[-1] * 100 / np.sum(1/bet_size[vector_th]), 4)}%' + \
                f' | Bet quantity: {bet_qty}<br>' + f'Model in neptune.ai: FOOT-{model_name}'
    layout_dict = {}
    #fig = go.Figure()
    trace_equity = go.Scatter(y = y, fill='tozeroy', xaxis = 'x1', yaxis = 'y1') #'toself'
    layout_dict.update({'xaxis1':{'anchor':'y1'}, 'yaxis1':{'domain':[0.4, 1]}})
    max_profit = np.maximum.accumulate(y)
    trace_drawdown =  go.Scatter(y = (y - max_profit) / mean_bet, fill='tozeroy', xaxis = 'x2', yaxis = 'y2')
    layout_dict.update({'xaxis2':{'anchor':'y2'}, 'yaxis2':{'domain':[0., 0.35], 'title':'drawdown inmean(bet)'}})

    layout_dict.update({
    'width':1400,
    'height':800,
    'title_x':0.5,
    'title_text':title_text,
    'paper_bgcolor':'rgb(229, 237, 247)',
    'plot_bgcolor':'rgb(229, 237, 247)',
    'showlegend':False 
                    })
    data_list = [trace_equity, trace_drawdown] #, trace_time_dd]
    layout = go.Layout(**layout_dict)
    return go.Figure(data=data_list, layout=layout)

#### Load Data

In [16]:
dataset_name = './dataset.npz'
data_npz = np.load(dataset_name)
X_train, X_test, X_holdout = data_npz['X_train'], data_npz['X_test'], data_npz['X_holdout']

In [124]:
#@title Выбор таргета
target_type = "binary_away" #@param ["binary_home", "binary_draw", "binary_away"]
k_vec = [False for _ in range(13)]
if target_type == 'binary_home':
    kf_col = 1
    kf_res = 2
    k_vec[1], k_vec[5] = (True, True)
    y_train, line_train, active_train  =  1 * (data_npz['y_train_multi'] == kf_res), data_npz['K_train'][:, k_vec], data_npz['K_train'][:,0] == 1
    y_test, line_test, active_test  =  1 * (data_npz['y_test_multi'] == kf_res), data_npz['K_test'][:, k_vec], data_npz['K_test'][:,0] == 1
    y_holdout, line_holdout, active_holdout = 1 * (data_npz['y_holdout_multi'] == kf_res), data_npz['K_holdout'][:, k_vec], data_npz['K_holdout'][:,0] == 1
    #zero_train, zero_test, zero_holdout = line_train > 1, line_test > 1, line_holdout > 1
    model_head = 'FOOT-LIVEBC'
elif target_type == 'binary_draw':
    kf_col = 2
    kf_res = 1
    k_vec[2], k_vec[6] = (True, True)
    y_train, line_train, active_train  =  1 * (data_npz['y_train_multi'] == kf_res), data_npz['K_train'][:, k_vec], data_npz['K_train'][:,0] == 1
    y_test, line_test, active_test  =  1 * (data_npz['y_test_multi'] == kf_res), data_npz['K_test'][:, k_vec], data_npz['K_test'][:,0] == 1
    y_holdout, line_holdout, active_holdout = 1 * (data_npz['y_holdout_multi'] == kf_res), data_npz['K_holdout'][:, k_vec], data_npz['K_holdout'][:,0] == 1
    zero_train, zero_test, zero_holdout = line_train > 1, line_test > 1, line_holdout > 1
    model_head = 'FOOT-LIVEBCDRAW'
elif target_type == 'binary_away':
    kf_col = 3
    kf_res = 0
    k_vec[3], k_vec[4] = (True, True)
    y_train, line_train, active_train  =  1 * (data_npz['y_train_multi'] == kf_res), data_npz['K_train'][:, k_vec], data_npz['K_train'][:,0] == 1
    y_test, line_test, active_test  =  1 * (data_npz['y_test_multi'] == kf_res), data_npz['K_test'][:, k_vec], data_npz['K_test'][:,0] == 1
    y_holdout, line_holdout, active_holdout = 1 * (data_npz['y_holdout_multi'] == kf_res), data_npz['K_holdout'][:, k_vec], data_npz['K_holdout'][:,0] == 1
    zero_train, zero_test, zero_holdout = line_train > 1, line_test > 1, line_holdout > 1
    model_head = 'FOOT-LIVEBCAWAY'
#---------------
process = psutil.Process(os.getpid())
print('Mem usage: ', round(process.memory_info().rss / 1024 ** 3, 2), 'GiB')  # in bytes 
#---------------

Mem usage:  3.33 GiB


#### Download models and restore predicts

In [125]:
# Модель для 1-ой команды
model_num = 7 # Указываем номер модели
neptune_model = model_head
neptune_model_version = neptune_model + f'-{model_num}'
model_version_params = dict(
    project = 'scomesse/football',
    model = neptune_model,
    api_token = api_key,
    with_id = neptune_model_version
)
PATH_TO_MODEL = './booster.model'
model_version = neptune.init_model_version(**model_version_params)
model_version['model'].download(PATH_TO_MODEL)
params1 = model_version['team_parameters'].fetch()
model_version.stop()

https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7
Remember to stop your model_version once you’ve finished logging your metadata (https://docs.neptune.ai/api/model_version#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!
All 0 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7/metadata


In [126]:
if target_type == 'binary_home':
    booster = CatBoost()
    booster.load_model('./booster.model')
    train_preds = booster.predict(X_train, prediction_type="Probability")
    test_preds = booster.predict(X_test, prediction_type="Probability")
    holdout_preds = booster.predict(X_holdout, prediction_type="Probability")
    print("Train Accuracy : % 4f"% eval_metric(y_train, train_preds, "Accuracy")[0])
    print("Test  Accuracy : %.4f"%eval_metric(y_test, test_preds, "Accuracy")[0])
    print("Holdout  Accuracy : %.4f"%eval_metric(y_holdout, holdout_preds, "Accuracy")[0])
    train_preds = train_preds[:,1]
    test_preds = test_preds[:,1]
    holdout_preds = holdout_preds[:,1]
elif target_type == 'binary_draw':
    booster = CatBoost()
    booster.load_model('./booster.model')
    train_preds = booster.predict(X_train, prediction_type="Probability")
    test_preds = booster.predict(X_test, prediction_type="Probability")
    holdout_preds = booster.predict(X_holdout, prediction_type="Probability")
    print("Train Accuracy : % 4f"% eval_metric(y_train, train_preds, "Accuracy")[0])
    print("Test  Accuracy : %.4f"%eval_metric(y_test, test_preds, "Accuracy")[0])
    print("Holdout  Accuracy : %.4f"%eval_metric(y_holdout, holdout_preds, "Accuracy")[0])
    train_preds = train_preds[:,1]
    test_preds = test_preds[:,1]
    holdout_preds = holdout_preds[:,1]
elif target_type == 'binary_away':
    booster = CatBoost()
    booster.load_model('./booster.model')
    train_preds = booster.predict(X_train, prediction_type="Probability")
    test_preds = booster.predict(X_test, prediction_type="Probability")
    holdout_preds = booster.predict(X_holdout, prediction_type="Probability")
    print("Train Accuracy : % 4f"% eval_metric(y_train, train_preds, "Accuracy")[0])
    print("Test  Accuracy : %.4f"%eval_metric(y_test, test_preds, "Accuracy")[0])
    print("Holdout  Accuracy : %.4f"%eval_metric(y_holdout, holdout_preds, "Accuracy")[0])
    train_preds = train_preds[:,1]
    test_preds = test_preds[:,1]
    holdout_preds = holdout_preds[:,1]
#---------------
process = psutil.Process(os.getpid())
print('Mem usage: ', round(process.memory_info().rss / 1024 ** 3, 2), 'GiB')  # in bytes 
#---------------

Train Accuracy :  0.761436
Test  Accuracy : 0.7598
Holdout  Accuracy : 0.7627
Mem usage:  3.38 GiB


In [21]:
active_rows_dict ={}
active_rows_dict['train'] = (active_train & (line_train.sum(axis = 1) > 0))
active_rows_dict['test'] = (active_test & (line_test.sum(axis = 1) > 0))
active_rows_dict['holdout'] = (active_holdout & (line_holdout.sum(axis = 1) > 0))

In [127]:
prob_dict = {} #clmn - 0:over(home_win) ||| clmn - 1:under (away_win)
prob_dict['train'] = np.hstack((
    train_preds[active_rows_dict['train']].reshape(-1, 1), 
    (1 - train_preds)[active_rows_dict['train']].reshape(-1, 1)
                                    ))
prob_dict['test'] = np.hstack((
    test_preds[active_rows_dict['test']].reshape(-1, 1), 
    (1 - test_preds)[active_rows_dict['test']].reshape(-1, 1)
                                    ))
prob_dict['holdout'] = np.hstack((
    holdout_preds[active_rows_dict['holdout']].reshape(-1, 1), 
    (1 - holdout_preds)[active_rows_dict['holdout']].reshape(-1, 1)
                                    ))

#### Plot Profit validation

In [112]:
fig = get_profit_validation(
    y_test[active_rows_dict['test']], 
    test_preds[active_rows_dict['test']], 
    line_test[active_rows_dict['test'], 0], 
    'test: ' + neptune_model + f'-{model_num}'
    )
fig.show()

In [107]:
fig.write_html(f'test: {neptune_model}-{model_num}_profit_report.html') #neptune_model + f'-{model_num}'
model_version = neptune.init_model_version(**model_version_params)
model_version[f'profit_validation_test'].upload(neptune.types.File.as_html(fig))
model_version.stop()

In [123]:
threshold = 0.1076
bet_type = 'divk'
strategy = 'complex'
fig = plot_equity(
    y_test[active_rows_dict['test']], 
    test_preds[active_rows_dict['test']], 
    line_test[active_rows_dict['test'], 0], 
    threshold, neptune_model + f'-{model_num}',
    bet_type = bet_type, 
    strategy = strategy, 
    data_split = 'holdout', 
    reverse_bet = False
    )
fig.show()

In [131]:
fig = get_profit_validation2(
    prob_dict['test'], 
    y_test, 
    line_test[active_rows_dict['test']],  
    'test: ' + neptune_model + f'-{model_num}')
fig.show()


elementwise comparison failed; this will raise an error in the future.


invalid value encountered in true_divide

