In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.dont_write_bytecode = True

In [2]:
#imports
import os
import yaml
import json
import warnings
from itertools import product

import mlflow

import pandas as pd
from sklearn.model_selection import train_test_split
from catboost import CatBoostClassifier
from sklearn.metrics import f1_score, recall_score, roc_auc_score
import numpy as np
from datetime import datetime

from callables import parse_json
from callables import generate_features
from callables import process_features
from callables import model_train
from utils import plot_roc_curve, plot_confusion_matrix, plot_feature_importnaces, find_max_recall 

warnings.filterwarnings('ignore')

In [3]:
#params
with open("params.yml", 'r') as file:
    params = yaml.safe_load(file)

root_path = params['root_path']
filepath_input = root_path + params['filepath_input']
model_name = params['model_name']

feature_cols = params['feature_cols']

# shift_backwards = 1
# shift_forward = -1

anomaly_crtiretion = params['anomaly_crtiretion']

cb_params = params['cb_params']

In [4]:
mlflow.set_tracking_uri('http://127.0.0.1:5000')
mlflow.set_experiment("trading_model_b_shift_f_shift_recall_v2")

2022/06/17 13:58:36 INFO mlflow.tracking.fluent: Experiment with name 'trading_model_b_shift_f_shift_recall' does not exist. Creating a new experiment.


<Experiment: artifact_location='./mlruns/1', experiment_id='1', lifecycle_stage='active', name='trading_model_b_shift_f_shift_recall', tags={}>

In [5]:
shift_backwards_values = list(range(1,51))
shift_forward_values = list(range(1,51))

In [None]:
for shift_backwrd in shift_backwards_values:
        
    for shift_frwrd in shift_forward_values:
        
        #mlflow_start
        mlflow.start_run(run_name = f'b_{shift_backwrd}_f_{shift_frwrd}')
        
        #read
        df = parse_json(filepath_input)
        df = generate_features(
            df, feature_cols,
            shift_backwrd, shift_frwrd, 
            anomaly_crtiretion
        )
        df_event, df_period, df_model = process_features(df, feature_cols)

        #split
        train, test = train_test_split(df_model, test_size = 0.2, random_state = 42, shuffle = False)

        x_train = train.drop('target', axis = 1)
        y_train = train.target.astype(int)

        x_test = test.drop('target', axis = 1)
        y_test = test.target.astype(int)

        #fit best model
        model = CatBoostClassifier(**cb_params)
        model.fit(x_train, y_train)

        # predict train probabilities
        y_train_pred_proba = model.predict_proba(x_train)
        y_train_pred_proba = y_train_pred_proba[:, 1]

        # predict test probabilities
        y_test_pred_proba = model.predict_proba(x_test)
        y_test_pred_proba = y_test_pred_proba[:, 1]

        #opt_cutoff
        opt_cutoff = find_max_recall(y_test, y_test_pred_proba)

        #calculate metrics
        recall_score_train = recall_score(y_train, (y_train_pred_proba > opt_cutoff), pos_label=1, average='binary')
        recall_score_test  = recall_score(y_test , (y_test_pred_proba  > opt_cutoff), pos_label=1, average='binary')

        #________________________________________________________________________

        # mlflow log params

        mlflow.log_param("shift_bckwrd", shift_backwrd)
        mlflow.log_param("shift_frwrd", shift_frwrd)
        mlflow.log_param("anomaly_crtiretion", anomaly_crtiretion)

        # mlflow log metrics
        mlflow.log_metric("recall_score_train", recall_score_train)
        mlflow.log_metric("recall_score_test", recall_score_test)

       #________________________________________________________________________
    
        #mlflow_end
        mlflow.end_run()
        
        print(f'success_run_b_{shift_backwrd}_f_{shift_frwrd}')

success_run_b_1_f_1
success_run_b_1_f_2
success_run_b_1_f_3
success_run_b_1_f_4
success_run_b_1_f_5
success_run_b_1_f_6
success_run_b_1_f_7
success_run_b_1_f_8
success_run_b_1_f_9
success_run_b_1_f_10
success_run_b_1_f_11
success_run_b_1_f_12
success_run_b_1_f_13
success_run_b_1_f_14
success_run_b_1_f_15
success_run_b_1_f_16
success_run_b_1_f_17
success_run_b_1_f_18
success_run_b_1_f_19
success_run_b_1_f_20
success_run_b_1_f_21
success_run_b_1_f_22
success_run_b_1_f_23
success_run_b_1_f_24
success_run_b_1_f_25
success_run_b_1_f_26
success_run_b_1_f_27
success_run_b_1_f_28
success_run_b_1_f_29
success_run_b_1_f_30
success_run_b_1_f_31
success_run_b_1_f_32
success_run_b_1_f_33
success_run_b_1_f_34
success_run_b_1_f_35
success_run_b_1_f_36
success_run_b_1_f_37
success_run_b_1_f_38
success_run_b_1_f_39
success_run_b_1_f_40
success_run_b_1_f_41
success_run_b_1_f_42
success_run_b_1_f_43
success_run_b_1_f_44
success_run_b_1_f_45
success_run_b_1_f_46
success_run_b_1_f_47
success_run_b_1_f_48
s