<a href="https://colab.research.google.com/github/cappelchi/calcio_notebooks/blob/main/draft/football_live_validation_experimental_heft_3_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

[CatBoost - An In-Depth Guide [Python API]](https://coderzcolumn.com/tutorials/machine-learning/catboost-an-in-depth-guide-python#9)<br>
[Catboost](https://catboost.ai/en/docs/concepts/python-reference_pool)<br>
[Cross-Validation Techniques](https://medium.com/geekculture/cross-validation-techniques-33d389897878)

### Project config

In [None]:
try:
    import neptune.new as neptune
except:
    !pip install neptune-client >> None
    import neptune.new as neptune
#from neptune.new.integrations.tensorflow_keras import NeptuneCallback
def get_credential(frmwork = 'neptune_team'):
    with open('credential.txt', 'r') as container:
        for line in container:
            if frmwork in line:
                login, psw = line.split(' ')[1], line.split(' ')[2].split('\n')[0]
                return login, psw
     

In [None]:
#@title Set API key for neptune.ai
set_api = True #@param {type:"boolean"}
if set_api:
    username, api_key = get_credential()

### Installations

In [None]:
!pip install catboost >> None

### Downloads

In [None]:
data_version = 'football_live_npz_230131/'
project = neptune.init_project(
    project="scomesse/football", 
    api_token = api_key
    )
project[data_version + 'dataset'].download('./dataset.npz')
project[data_version + 'description'].download('./save_discription.txt')
params = project[data_version + 'params'].fetch()
project.stop()

https://app.neptune.ai/scomesse/football/
Remember to stop your project once you’ve finished logging your metadata (https://docs.neptune.ai/api/project#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!
All 0 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/scomesse/football/metadata


### Imports

In [None]:
import pandas as pd
import numpy as np
pd.options.display.max_columns = 50
pd.options.display.max_rows = 100
print(pd.__version__)
print(np.__version__)

1.3.5
1.21.6


In [None]:
import plotly.graph_objects as go
import plotly.express as px
import plotly.subplots as sp
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

In [None]:
from catboost import CatBoost
from catboost import utils
from catboost import CatBoostClassifier, CatBoostRegressor
from catboost import Pool, cv
from catboost.utils import eval_metric
np.random.seed(147)

In [None]:
from tqdm import tqdm
from scipy.stats import poisson

In [None]:
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
from sklearn.metrics import confusion_matrix, classification_report, roc_curve

### Code

#### Functions

In [None]:
def get_f1_curve(y, y_pred):
    threshold = []
    f1 = []
    for th in np.linspace(0,1,24):
        threshold.append(th)
        f1.append(f1_score(y, (y_pred > th).astype(int)))
    return threshold, f1

In [None]:
def plot_f1(y_true, x_predicted, data_split = 'train'):
    threshold, f1 = get_f1_curve(y_true, x_predicted)
    fig = px.area(
        x = threshold, y = f1,
        title=f'F1 Curve {data_split}',
        labels=dict(x='threshold', y='F1'),
    )
    fig.update_yaxes(scaleanchor="x", scaleratio=1)
    fig.update_xaxes(constrain='domain')
    fig.update_layout(
        width = 600,
        title_x=0.5,
        paper_bgcolor='rgb(229, 237, 247)',
        plot_bgcolor='rgb(229, 237, 247)',    
        )
    fig.show()
    return fig

In [None]:
def plot_confusion_matrix(cfm, data_split = 'train', threshold = ''):
    title_text = 'confusion matrix ' + data_split
    if threshold != '':
        title_text = title_text +' | ' + f'threshold = {threshold}'
    x = ['Away', 'Home']
    y = ['Away', 'Home']
    fig = px.imshow(cfm, x=x, y=y, color_continuous_scale='Purples', text_auto=True)
    fig.update_xaxes(title_text = 'Predicted Label')
    fig.update_yaxes(title_text = 'True Label')
    fig.update_layout(
        height = 400,
        width = 600,
        title_text = title_text,
        title_font_size=20,
        title_x=0.5,
        paper_bgcolor='rgb(229, 237, 247)',
        plot_bgcolor='rgb(229, 237, 247)',    
        )
    fig.update_coloraxes(showscale=False)
    fig.show()
    return fig

In [None]:
def get_profit_validation(y_true, x_predicted, Line_production, model_name, reverse_bet = False):
    '''
    y_true - numpy вектор, shape:(x,) истинные значения в формате 0|1
    x_predicted - numpy вектор, shape:(x,) предикт (probability (float)) в формате 0.
    Line_production - numpy вектор, shape:(x,) вектор коэфициентов в формате float 1.
    '''
    scatters_dicts = dict(
        scatter1 = dict(x = [], y = [], name = '', fill = 'tozeroy', yaxis = '', xaxis = ''),
        scatter2 = dict(x = [], y = [], name = '', line = dict(color='rgb(33,113,181)', dash='dash'), yaxis = '', xaxis = ''),
        scatter3 = dict(x = [], y = [], name = '', line = dict(color='rgb(107,174,214)', dash='dash'), yaxis = '', xaxis = '')
                    )
    qty_color = 'blue'
    prof_qty_color = 'rgb(8,48,107)'
    bet_type_list = ['fixed', 'divk']
    bet_size_list = ['1', '1/K']
    strategy_list = ['simple', 'complex']
    strategy_name_list = ['threshold', 'pred*k']
    domain_list = [[0.55, 1], [0., 0.5]]
    layout_dict = {}
    data_list = []
    title_text = f'Profit & bet qty for validation model in neptune.ai: {model_name}'
    for cnt_str, strategy in enumerate(strategy_list):
        for cnt_bet, bet_type in enumerate(bet_type_list):
            y_anchor = str((9 * cnt_str) + (cnt_bet*3) + 1)
            threshold, profit, bet_qty_list = get_profit_curve(
                y_true, x_predicted, Line_production,
                bet_type = bet_type, strategy = strategy, reverse_bet = reverse_bet)
            xaxis_num = str((2 * cnt_str) + (cnt_bet + 1))
            layout_dict.update({
                    'xaxis' + xaxis_num:{
                        'domain':[0.5 * cnt_bet, 0.5 * cnt_bet + 0.5],
                        'title':f'bet={bet_size_list[cnt_bet]}, strategy:{strategy_name_list[cnt_str]}',
                        'anchor':'y' + y_anchor}
                                })
            for cnt_scatter in range(1, 4):
                yaxis_num = str((9 * cnt_str) + (cnt_bet*3) + cnt_scatter)
                scatter_num = 'scatter' + str(cnt_scatter)
                scatters_dicts[scatter_num]['x'] = threshold
                if cnt_scatter == 1:
                    scatters_dicts[scatter_num]['y'] = profit
                    scatters_dicts[scatter_num]['name'] = 'profit_' +xaxis_num
                    scatters_dicts[scatter_num]['xaxis'] = 'x' + xaxis_num
                    scatters_dicts[scatter_num]['yaxis'] = 'y' + yaxis_num
                    layout_dict.update({
                        'yaxis' + yaxis_num:{
                            'domain':domain_list[cnt_str],
                            'title':'', #'Profit',
                            'range':[-10,int(max(profit) * 1.1)],
                            'anchor':'x' + xaxis_num
                    }})
                elif cnt_scatter == 2:
                    scatters_dicts[scatter_num]['y'] = np.array(profit) / np.array(bet_qty_list)
                    scatters_dicts[scatter_num]['name'] = 'profit_' + xaxis_num
                    scatters_dicts[scatter_num]['xaxis'] = 'x' + xaxis_num
                    scatters_dicts[scatter_num]['yaxis'] = 'y' + yaxis_num
                    layout_dict.update({
                        'yaxis' + yaxis_num:{
                            'domain':domain_list[cnt_str],
                            'title':'', 'zeroline':True,
                            'side':'right', 'anchor':'x' + xaxis_num,
                            'overlaying':'y' + y_anchor}})
                elif cnt_scatter == 3:
                    scatters_dicts[scatter_num]['y'] = np.array(bet_qty_list) / 1000
                    scatters_dicts[scatter_num]['name'] = 'profit_' + xaxis_num
                    scatters_dicts[scatter_num]['xaxis'] = 'x' + xaxis_num
                    scatters_dicts[scatter_num]['yaxis'] = 'y' + yaxis_num
                    layout_dict.update({
                        'yaxis' + yaxis_num:{
                            'domain':domain_list[cnt_str],
                            'visible':False, 'showgrid':True,
                            'side':'right', 'anchor':'x' + xaxis_num,
                            'overlaying':'y' + y_anchor}})
                data_list += [go.Scatter(**scatters_dicts['scatter' + str(cnt_scatter)])]
    layout_dict.update({
        'width':1400,
        'height':800,
        'title_x':0.5,
        'title_text':title_text,
        'paper_bgcolor':'rgb(229, 237, 247)',
        'plot_bgcolor':'rgb(229, 237, 247)',
        'showlegend':False 
                        })
    layout = go.Layout(**layout_dict)
    return go.Figure(data=data_list, layout=layout)
     

In [None]:
def get_profit_curve(y, y_pred, Line_production, bet_type = 'fixed', strategy = 'simple', reverse_bet = False):
    #fixed, divk, divk-1
    #simple, complex
    threshold = []
    profit = []
    bet_qty_list = []
    if bet_type == 'divk':
        profit_size = (Line_production - 1) / Line_production
        bet_size = Line_production
    else:
        profit_size = (Line_production - 1)
        bet_size = Line_production /Line_production
    for th in np.linspace(0,1,1001):
        threshold.append(th)
        if reverse_bet:
            if strategy == 'simple':
                vector_th = y_pred < th
            if strategy == 'complex':
                vector_th = (y_pred * Line_production / 10) < th
            bet_qty_list.append(vector_th.sum())
            profit.append(((-1) * (y[vector_th] - 1) * profit_size[vector_th] + ((-1) * y[vector_th]) / bet_size[vector_th]).sum())
        else:            
            if strategy == 'simple':
                vector_th = y_pred > th
            if strategy == 'complex':
                vector_th = (y_pred * Line_production / 10) > th
            bet_qty_list.append(vector_th.sum())
            profit.append((y[vector_th] * profit_size[vector_th] + (y[vector_th] - 1) / bet_size[vector_th]).sum())

    return threshold, profit, bet_qty_list

In [None]:
def plot_equity(y_true, y_pred, Line_production, th, model_name, bet_type = 'fixed', 
                strategy = 'simple', data_split = 'validation', reverse_bet = False):\
    #fixed, divk, divk-1
    #simple, complex
    if bet_type == 'divk':
        profit_size = (Line_production - 1) / Line_production
        bet_size = Line_production
    else:
        profit_size = (Line_production - 1)
        bet_size = Line_production /Line_production
    if strategy == 'simple':
        vector_th = y_pred > th
    if strategy == 'complex':
        vector_th = (y_pred * Line_production / 10) > th
    mean_bet = np.mean(1/bet_size[vector_th])
    y = (y_true[vector_th] * profit_size[vector_th] + (y_true[vector_th] - 1) / bet_size[vector_th]).cumsum()
    bet_qty = vector_th.sum()
    title_text = f'Equity Curve {data_split} | threshold={th} | bet_type:{bet_type} | strategy:{strategy}<br>' + \
                f'bet_mean: {np.round(mean_bet, 2)} | ROI: {np.round(y[-1] * 100 / np.sum(1/bet_size[vector_th]), 4)}%' + \
                f' | Bet quantity: {bet_qty}<br>' + f'Model in neptune.ai: FOOT-{model_name}'
    layout_dict = {}
    #fig = go.Figure()
    trace_equity = go.Scatter(y = y, fill='tozeroy', xaxis = 'x1', yaxis = 'y1') #'toself'
    layout_dict.update({'xaxis1':{'anchor':'y1'}, 'yaxis1':{'domain':[0.4, 1]}})
    max_profit = np.maximum.accumulate(y)
    trace_drawdown =  go.Scatter(y = (y - max_profit) / mean_bet, fill='tozeroy', xaxis = 'x2', yaxis = 'y2')
    layout_dict.update({'xaxis2':{'anchor':'y2'}, 'yaxis2':{'domain':[0., 0.35], 'title':'drawdown inmean(bet)'}})

    #cnt = 0
    #dd = []
    #for var1 in (y - max_profit):
    #    if var1 < 0:
    #        cnt += 1
    #    else:
    #        cnt = 0
    #    dd.append(cnt)
    #trace_time_dd = go.Bar(y = dd, xaxis = 'x3', yaxis = 'y3')
    #layout_dict.update({'xaxis3':{'anchor':'y3', 'title':'time from last max in bets'}, 'yaxis3':{'domain':[0., 0.2]}})

    layout_dict.update({
    'width':1400,
    'height':800,
    'title_x':0.5,
    'title_text':title_text,
    'paper_bgcolor':'rgb(229, 237, 247)',
    'plot_bgcolor':'rgb(229, 237, 247)',
    'showlegend':False 
                    })
    data_list = [trace_equity, trace_drawdown] #, trace_time_dd]
    layout = go.Layout(**layout_dict)
    return go.Figure(data=data_list, layout=layout)

#### Load Data

1. регрессия
2. мультиклассовая класификация {AWAY:0,DRAW:1, HOME:2} 
3. бинарная классификация: <br>
    a. HOME vs (DRAW & AWAY)<br>
    б. DRAW vs (HOME & AWAY)<br>
    в. AWAY vs (HOME & DRAW)<br>

In [None]:
dataset_name = './dataset.npz'
data_npz = np.load(dataset_name)
X_train, X_test, X_holdout = data_npz['X_train'], data_npz['X_test'], data_npz['X_holdout']

In [None]:
#@title Выбор таргета
target_type = "binary_away" #@param ["regression1", "regression2", "multiclass", "binary_home", "binary_draw", "binary_away"]
if target_type == 'regression1':
    y_train, y_test = data_npz['y_train_regression1'], data_npz['y_test_regression1']
    model_head = 'FOOT-LIVEBST1'
elif target_type == 'regression2':
    y_train, y_test = data_npz['y_train_regression2'], data_npz['y_test_regression2']
    model_head = 'FOOT-LIVEBST2'
elif target_type == 'multiclass':
    y_train, y_test =  data_npz['y_train_multi'], data_npz['y_test_multi']
    model_head = 'FOOT-LIVEMC'
elif target_type == 'binary_home':
    y_train, line_train, active_train  =  1 * (data_npz['y_train_multi'] == 2), data_npz['K_train'][:,1], data_npz['K_train'][:,0] == 1
    y_test, line_test, active_test  =  1 * (data_npz['y_test_multi'] == 2), data_npz['K_test'][:,1], data_npz['K_test'][:,0] == 1
    y_holdout, line_holdout, active_holdout = 1 * (data_npz['y_holdout_multi'] == 2), data_npz['K_holdout'][:,1], data_npz['K_holdout'][:,0] == 1
    zero_train, zero_test, zero_holdout = line_train > 1, line_test > 1, line_holdout > 1
    model_head = 'FOOT-LIVEBC'
elif target_type == 'binary_draw':
    y_train, y_test =  1 * (data_npz['y_train_multi'] == 1), 1 * (data_npz['y_test_multi'] == 1)
    model_head = 'FOOT-LIVEBCDRAW'
elif target_type == 'binary_away':
    y_train, line_train, active_train  =  1 * (data_npz['y_train_multi'] == 0), data_npz['K_train'][:,1], data_npz['K_train'][:,0] == 1
    y_test, line_test, active_test  =  1 * (data_npz['y_test_multi'] == 0), data_npz['K_test'][:,1], data_npz['K_test'][:,0] == 1
    y_holdout, line_holdout, active_holdout = 1 * (data_npz['y_holdout_multi'] == 0), data_npz['K_holdout'][:,1], data_npz['K_holdout'][:,0] == 1
    zero_train, zero_test, zero_holdout = line_train > 1, line_test > 1, line_holdout > 1
    model_head = 'FOOT-LIVEBCAWAY'

In [None]:
#dataset_name = './dataset.npz'
#data_npz = np.load(dataset_name)
#X_train, X_test = data_npz['X_train'], data_npz['X_test']
#y_train1, y_test1 = data_npz['y_train_regression1'], data_npz['y_test_regression1']
#y_train2, y_test2 = data_npz['y_train_regression2'], data_npz['y_test_regression2']

In [None]:
X_train.shape, X_test.shape, X_holdout.shape

((11197708, 41), (2798988, 41), (144925, 41))

In [None]:
cols = [element for element in
params['features'].replace('[', '').replace(']','').replace(' ','').replace("'","").split(',')]

#### Download models and restore predicts

In [None]:
# Модель для 1-ой команды
model_num = 7 # Указываем номер модели
neptune_model = model_head
neptune_model_version = neptune_model + f'-{model_num}'
model_version_params = dict(
    project = 'scomesse/football',
    model = neptune_model,
    api_token = api_key,
    with_id = neptune_model_version
)
PATH_TO_MODEL = './booster.model'
model_version = neptune.init_model_version(**model_version_params)
model_version['model'].download(PATH_TO_MODEL)
params1 = model_version['team_parameters'].fetch()
model_version.stop()

https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7
Remember to stop your model_version once you’ve finished logging your metadata (https://docs.neptune.ai/api/model_version#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!
All 0 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7/metadata


In [None]:
booster = CatBoost()
booster.load_model('./booster.model')

<catboost.core.CatBoost at 0x7f62abf5fd30>

In [None]:
if target_type == 'regression1':
    pass
elif target_type == 'regression2':
    pass
elif target_type == 'multiclass':
    pass
elif target_type == 'binary_home':
    booster = CatBoost()
    booster.load_model('./booster.model')
    train_preds = booster.predict(X_train, prediction_type="Probability")
    test_preds = booster.predict(X_test, prediction_type="Probability")
    holdout_preds = booster.predict(X_holdout, prediction_type="Probability")
    print("Train Accuracy : % 4f"% eval_metric(y_train, train_preds, "Accuracy")[0])
    print("Test  Accuracy : %.4f"%eval_metric(y_test, test_preds, "Accuracy")[0])
    print("Holdout  Accuracy : %.4f"%eval_metric(y_holdout, holdout_preds, "Accuracy")[0])
    train_preds = train_preds[:,1]
    test_preds = test_preds[:,1]
    holdout_preds = holdout_preds[:,1]
elif target_type == 'binary_draw':
    pass
elif target_type == 'binary_away':
    booster = CatBoost()
    booster.load_model('./booster.model')
    train_preds = booster.predict(X_train, prediction_type="Probability")
    test_preds = booster.predict(X_test, prediction_type="Probability")
    holdout_preds = booster.predict(X_holdout, prediction_type="Probability")
    print("Train Accuracy : % 4f"% eval_metric(y_train, train_preds, "Accuracy")[0])
    print("Test  Accuracy : %.4f"%eval_metric(y_test, test_preds, "Accuracy")[0])
    print("Holdout  Accuracy : %.4f"%eval_metric(y_holdout, holdout_preds, "Accuracy")[0])
    train_preds = train_preds[:,1]
    test_preds = test_preds[:,1]
    holdout_preds = holdout_preds[:,1]

Train Accuracy :  0.761436
Test  Accuracy : 0.7598
Holdout  Accuracy : 0.7627


In [None]:
np.sum((y_train == 1) & (train_preds > 0.5)) / np.sum(train_preds > 0.5), \
np.sum((y_train == 0) & (train_preds < 0.5)) / np.sum(train_preds < 0.5), \
np.sum((y_train == 1) & (train_preds < 0.5)) / np.sum(train_preds < 0.5), \
np.sum((y_train == 0) & (train_preds > 0.5)) / np.sum(train_preds > 0.5)

(0.6978642587356652,
 0.778210715768321,
 0.22178928423167904,
 0.3021357412643349)

In [None]:
np.sum((y_train == 1) & (train_preds > 0.5)), \
np.sum((y_train == 0) & (train_preds < 0.5)), \
np.sum((y_train == 1) & (train_preds < 0.5)), \
np.sum((y_train == 0) & (train_preds > 0.5))

(1631488, 6894852, 1965026, 706342)

#### F1 score

##### F1 sore train

In [None]:
fig = plot_f1(
    y_train,
    train_preds,
    data_split = 'train'
)
model_version = neptune.init_model_version(**model_version_params)
model_version['f1_train'].upload(neptune.types.File.as_html(fig))
model_version.stop()

https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7
Remember to stop your model_version once you’ve finished logging your metadata (https://docs.neptune.ai/api/model_version#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.
All 1 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7/metadata


##### F1 score test

In [None]:
fig = plot_f1(
    y_test,
    test_preds,
    data_split = 'test'
)
model_version = neptune.init_model_version(**model_version_params)
model_version['f1_test'].upload(neptune.types.File.as_html(fig))
model_version.stop()

https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7
Remember to stop your model_version once you’ve finished logging your metadata (https://docs.neptune.ai/api/model_version#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.
All 1 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7/metadata


#### Confusion Matrix

In [None]:
treshold = 0.3

In [None]:
cfm_train = confusion_matrix(y_train, (train_preds > 0.5).round().astype(int))
fig = plot_confusion_matrix(cfm_train, data_split = 'train', threshold = 0.5)
model_version = neptune.init_model_version(**model_version_params)
model_version[f'confusion_matrix_train_0.5'].upload(neptune.types.File.as_html(fig))
model_version.stop()

https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7
Remember to stop your model_version once you’ve finished logging your metadata (https://docs.neptune.ai/api/model_version#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.
All 1 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7/metadata


In [None]:
cfm_train = confusion_matrix(y_train, (train_preds > treshold).round().astype(int))
fig = plot_confusion_matrix(cfm_train, data_split = 'train', threshold = treshold)
model_version = neptune.init_model_version(**model_version_params)
model_version[f'confusion_matrix_train_{treshold}'].upload(neptune.types.File.as_html(fig))
model_version.stop()

https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7
Remember to stop your model_version once you’ve finished logging your metadata (https://docs.neptune.ai/api/model_version#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.
All 1 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7/metadata


In [None]:
cfm_test = confusion_matrix(y_test, (test_preds > 0.5).round().astype(int))
fig = plot_confusion_matrix(cfm_test, data_split = 'test', threshold = 0.5)
model_version = neptune.init_model_version(**model_version_params)
model_version[f'confusion_matrix_test_0.5'].upload(neptune.types.File.as_html(fig))
model_version.stop()

https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7
Remember to stop your model_version once you’ve finished logging your metadata (https://docs.neptune.ai/api/model_version#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.
All 1 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7/metadata


In [None]:
cfm_test = confusion_matrix(y_test, (test_preds > treshold).round().astype(int))
fig = plot_confusion_matrix(cfm_test, data_split = 'test', threshold = treshold)
model_version = neptune.init_model_version(**model_version_params)
model_version[f'confusion_matrix_test_{treshold}'].upload(neptune.types.File.as_html(fig))
model_version.stop()

https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7
Remember to stop your model_version once you’ve finished logging your metadata (https://docs.neptune.ai/api/model_version#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.
All 1 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7/metadata


In [None]:
cfm_holdout = confusion_matrix(y_holdout, (holdout_preds > 0.5).round().astype(int))
fig = plot_confusion_matrix(cfm_holdout, data_split = 'holdout', threshold = 0.5)
model_version = neptune.init_model_version(**model_version_params)
model_version[f'confusion_matrix_holdout_0.5'].upload(neptune.types.File.as_html(fig))
model_version.stop()

https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7
Remember to stop your model_version once you’ve finished logging your metadata (https://docs.neptune.ai/api/model_version#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.
All 1 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7/metadata


In [None]:
cfm_holdout = confusion_matrix(y_holdout, (holdout_preds > treshold).round().astype(int))
fig = plot_confusion_matrix(cfm_holdout, data_split = 'holdout', threshold = treshold)
model_version = neptune.init_model_version(**model_version_params)
model_version[f'confusion_matrix_holdout_{treshold}'].upload(neptune.types.File.as_html(fig))
model_version.stop()

https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7
Remember to stop your model_version once you’ve finished logging your metadata (https://docs.neptune.ai/api/model_version#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.
All 1 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7/metadata


#### Profit validation

##### TRAIN Profit validation

In [None]:
fig = get_profit_validation(
    y_train[zero_train & active_train], 
    train_preds[zero_train & active_train], 
    line_train[zero_train & active_train], 
    neptune_model + f'-{model_num}'
    )
fig.show()


invalid value encountered in true_divide



In [None]:
fig.write_html(f'train: {neptune_model}-{model_num}_profit_report.html') #neptune_model + f'-{model_num}'
model_version = neptune.init_model_version(**model_version_params)
model_version[f'profit_validation_train'].upload(neptune.types.File.as_html(fig))
model_version.stop()

https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7
Remember to stop your model_version once you’ve finished logging your metadata (https://docs.neptune.ai/api/model_version#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.
All 1 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7/metadata


In [None]:
np.sum(zero_train & active_train)

3210863

In [None]:
threshold = 0.104
bet_type = 'divk'
strategy = 'complex'
fig = plot_equity(
    y_train[zero_train & active_train], 
    train_preds[zero_train & active_train],
    line_train[zero_train & active_train],
    threshold, neptune_model + f'-{model_num}',
    bet_type = bet_type, 
    strategy = strategy, 
    data_split = 'train', 
    reverse_bet = False
    )
fig.show()

In [None]:
model_version = neptune.init_model_version(**model_version_params)
model_version[f'equity_train_{bet_type}_{strategy}_th_{threshold}'].upload(neptune.types.File.as_html(fig))
model_version.stop()

https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7
Remember to stop your model_version once you’ve finished logging your metadata (https://docs.neptune.ai/api/model_version#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.
All 1 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7/metadata


##### TEST Profit validation

In [None]:
np.sum(zero_test & active_test)

808201

In [None]:
fig = get_profit_validation(
    y_test[zero_test & active_test], 
    test_preds[zero_test & active_test], 
    line_test[zero_test & active_test], 
    'test: ' + neptune_model + f'-{model_num}'
    )
fig.show()

In [None]:
fig.write_html(f'test: {neptune_model}-{model_num}_profit_report.html') #neptune_model + f'-{model_num}'
model_version = neptune.init_model_version(**model_version_params)
model_version[f'profit_validation_test'].upload(neptune.types.File.as_html(fig))
model_version.stop()

https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7
Remember to stop your model_version once you’ve finished logging your metadata (https://docs.neptune.ai/api/model_version#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.
All 1 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7/metadata


In [None]:
threshold = 0.105
bet_type = 'divk' #'fixed' #'divk'
strategy = 'complex' #'simple' #'complex'
fig = plot_equity(
    y_test[zero_test & active_test], 
    test_preds[zero_test & active_test], 
    line_test[zero_test & active_test], 
    threshold, neptune_model + f'-{model_num}',
    bet_type = bet_type, 
    strategy = strategy, 
    data_split = 'test', 
    reverse_bet = False
    )
fig.show()

In [None]:
model_version = neptune.init_model_version(**model_version_params)
model_version[f'equity_test_{bet_type}_{strategy}_th_{threshold}'].upload(neptune.types.File.as_html(fig))
model_version.stop()

https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7
Remember to stop your model_version once you’ve finished logging your metadata (https://docs.neptune.ai/api/model_version#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.
All 1 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7/metadata


In [None]:
fig = get_profit_validation(
    y_holdout[zero_holdout & active_holdout], 
    holdout_preds[zero_holdout & active_holdout], 
    line_holdout[zero_holdout & active_holdout], 
    'holdout: ' + neptune_model + f'-{model_num}'
    )
fig.show()

In [None]:
fig.write_html(f'holdout: {neptune_model}-{model_num}_profit_report.html') #neptune_model + f'-{model_num}'
model_version = neptune.init_model_version(**model_version_params)
model_version[f'profit_validation_holdout'].upload(neptune.types.File.as_html(fig))
model_version.stop()

https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBC/v/FOOT-LIVEBC-2
Remember to stop your model_version once you’ve finished logging your metadata (https://docs.neptune.ai/api/model_version#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.
All 1 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBC/v/FOOT-LIVEBC-2/metadata


In [None]:
np.sum(zero_holdout & active_holdout)

117272

In [87]:
threshold = 0.105
bet_type = 'divk'
strategy = 'complex'
fig = plot_equity(
    y_holdout[zero_holdout & active_holdout], 
    holdout_preds[zero_holdout & active_holdout], 
    line_holdout[zero_holdout & active_holdout], 
    threshold, neptune_model + f'-{model_num}',
    bet_type = bet_type, 
    strategy = strategy, 
    data_split = 'holdout', 
    reverse_bet = False
    )
fig.show()

In [None]:
model_version = neptune.init_model_version(**model_version_params)
model_version[f'equity_holdout_{bet_type}_{strategy}_th_{threshold}'].upload(neptune.types.File.as_html(fig))
model_version.stop()

https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7
Remember to stop your model_version once you’ve finished logging your metadata (https://docs.neptune.ai/api/model_version#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.
All 1 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7/metadata


In [96]:
model_version = neptune.init_model_version(**model_version_params)
del model_version['equity_train_divk_complex_th_0.094']
model_version.stop()

https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7
Remember to stop your model_version once you’ve finished logging your metadata (https://docs.neptune.ai/api/model_version#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.
All 1 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBCAWAY/v/FOOT-LIVEBCAWAY-7/metadata


In [None]:
#print("Test  RMSE : %.4f"%eval_metric(y_test, test_preds, "Accuracy")[0])
#print("Train RMSE : % 4f"% eval_metric(y_train1, train_preds1, "RMSE")[0])
#print("Test  R2 : %.4f"%eval_metric(y_test1, test_preds1, "R2")[0])
#print("Train R2 : % 4f"%eval_metric(y_train1, train_preds1, "R2")[0])

Test  RMSE : 0.7164


In [None]:
# Модель для 1-ой команды
model_num = 1 # Указываем номер модели
neptune_model = f'FOOT-LIVEBST2'
neptune_model_version = neptune_model + f'-{model_num}'
model_version_params = dict(
    project = 'scomesse/football',
    model = neptune_model,
    api_token = api_key,
    with_id = neptune_model_version
)
PATH_TO_MODEL = './booster_team2.model'
model_version = neptune.init_model_version(**model_version_params)
model_version['team2_model'].download(PATH_TO_MODEL)
params2 = model_version['team_parameters'].fetch()
model_version.stop()

https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBST2/v/FOOT-LIVEBST2-1
Remember to stop your model_version once you’ve finished logging your metadata (https://docs.neptune.ai/api/model_version#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
Shutting down background jobs, please wait a moment...
Done!
All 0 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/scomesse/football/m/FOOT-LIVEBST2/v/FOOT-LIVEBST2-1/metadata


In [None]:
booster_team2 = CatBoost()
booster_team2.load_model('./booster_team2.model')

<catboost.core.CatBoost at 0x7fbec4bf1790>

In [None]:
test_preds2 = booster_team2.predict(X_test)
train_preds2 = booster_team2.predict(X_train)

In [None]:
print("Test  RMSE : %.4f"%eval_metric(y_test2, test_preds2, "RMSE")[0])
print("Train RMSE : % 4f"% eval_metric(y_train2, train_preds2, "RMSE")[0])
print("Test  R2 : %.4f"%eval_metric(y_test2, test_preds2, "R2")[0])
print("Train R2 : % 4f"%eval_metric(y_train2, train_preds2, "R2")[0])

Test  RMSE : 0.0497
Train RMSE :  0.049284
Test  R2 : 0.1868
Train R2 :  0.193117


#### Load Curren Score & Final Results

In [None]:
score1_train, score2_train =  data_npz['score1_train'], data_npz['score2_train']
result1_train, result2_train =  data_npz['result1_train'], data_npz['result2_train']
score1_test, score2_test =  data_npz['score1_test'], data_npz['score2_test']
result1_test, result2_test =  data_npz['result1_test'], data_npz['result2_test']

In [None]:
sc1_test_array = np.vstack([poisson.pmf(score, mu = test_preds1 * 21, loc = 0) for score in range(7)]).T
sc2_test_array = np.vstack([poisson.pmf(score, mu = test_preds2 * 21, loc = 0) for score in range(7)]).T

In [None]:
prob_dict = {}
prob_dict[0] = np.sum((sc1_test_array * sc2_test_array), axis = 1)
for diff in range(1, 7):
    prob_dict[diff] = np.sum(sc1_test_array[:, diff:] * sc2_test_array[:, :-diff], axis = 1)
    prob_dict[-diff] = np.sum(sc1_test_array[:, :-diff] * sc2_test_array[:, diff:], axis = 1)

In [None]:
diff_prob_arr = np.hstack([prob_dict[6 - arr].reshape(-1, 1) for arr in range(13)])

In [None]:
curdiff = score1_test - score2_test
curdiff[curdiff > 6] = 6
curdiff[curdiff < -6] = -6
#curdiff = curdiff + 6

In [None]:
line_prob = np.zeros((curdiff.shape[0], 3))
for score_diff in range(13):
    line_prob[:,0] += diff_prob_arr[:,score_diff] * np.array([curdiff > -6 + score_diff])[0]
    line_prob[:,1] += diff_prob_arr[:,score_diff] * np.array([curdiff == -6 + score_diff])[0]
    line_prob[:,2] += diff_prob_arr[:,score_diff] * np.array([curdiff < -6 + score_diff])[0]

In [None]:
np.sum(line_prob[:, 0] > 0.5) / curdiff.shape[0]

0.3724400980120842

In [None]:
np.sum(np.argmax(line_prob, axis = 1) == 0), np.sum(np.argmax(line_prob, axis = 1) == 1), np.sum(np.argmax(line_prob, axis = 1) == 2)

(1555138, 90997, 930282)

In [None]:
np.sum(test_preds1 > test_preds2)

1670851