In [8]:
from utils_csa import show_model_accuracy, save_obj, load_obj, evaluate_model, evaluate_model_formated, get_value_count

import pandas.io.sql as psql
import pandas as pd
import numpy as np

from sqlalchemy import create_engine

from pytz import timezone
from datetime import datetime, timedelta, date

from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import make_scorer, precision_score

from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier

# Lib to import ipynb : https://pypi.org/project/import-ipynb/
import import_ipynb
import algocryptos_preprocessing as alg_preproc
import algocryptos_gridsearch as alg_gridsearch
import algocryptos_backtesting_new as alg_backtesting

#from pprint import pprint
import sys

#pd.set_option('display.max_rows', 150)
#pd.set_option('display.max_columns', 150)

#np.set_printoptions(threshold='nan')
dict_hours_labels  = {3:'3h', 6:'6h', 12:'12h', 24:'1d', 24*2:'2d', 24*3:'3d', 24*7:'7d', 24*15:'15d', 24*30:'30d'}

str_sql = 'postgresql://dbuser:algocryptos@localhost:9091/algocryptos'
connection = create_engine(str_sql)

CONF_1 = 'conf_1'
CONF_2 = 'conf_2'


# SQL queries
# ===========
# select * from logging_trt_3 
# where status = 'Success'
# order by backtest_pct_change_portfolio desc

# select co.symbol, lo.param_id_crypto_to_predict, max(lo.backtest_pct_change_portfolio) as pct_change_portfolio_maxi, max(backtest_pct_change_market) as pct_change_market
# from logging_trt_3 lo
# inner join coins co on (co.id_cryptocompare = lo.param_id_crypto_to_predict)
# group by co.symbol, lo.param_id_crypto_to_predict
# order by pct_change_portfolio_maxi desc

In [10]:
# dynamic params
def get_params(conf):   
    param_datasets_nb_cryptos = []
    params_y = {}
    param_crypto_learning = []
    param_crypto_predicting = []
    thresholds = []
    factor = []
    predict_only_one_crypto = True
    
    do_scale = True
    do_pca = True
    
    if conf == CONF_1:
        param_datasets_nb_cryptos = [5, 10, 20, 30, 100] # 100 = max
        
        params_y = {'3h':[2.5, 5, 7.5, 10],
                    '6h':[2.5, 5, 7.5, 10],
                    '12h':[2.5, 5, 7.5, 10],
                    '1d':[2.5, 5, 7.5, 10],
                    '2d':[10, 15, 20],
                    '3d':[10, 20, 30],
                    '7d':[10, 20, 30, 50],
                    '15d':[10, 20, 30, 50, 100],
                    '30d':[10, 20, 30, 50, 100, 200]}

        # cf. XLS pour choix des cryptos
        param_crypto_learning = [1182, 7605, 5031, 202330, 4614, 166503, 3808, 321992, 5038, 310829, 127356, 3807, 204788, 27368, 5324, 
                           5285, 166390, 24854, 236131, 41192, 347235,187440, 186277, 16713, 4432, 112392, 808414, 19745, 107672, 
                           716725, 324068, 5039, 5280, 172091, 309621, 4430, 13072, 20333, 4433, 20131, 33022, 17778, 24294, 890645]

        # cf. XLS pour choix des cryptos
        param_crypto_predicting = [1182, 7605, 5031, 4614, 166503, 3808, 321992, 5038, 310829, 127356, 3807, 204788, 27368, 5324, 
                             5285, 166390, 236131, 41192, 187440, 186277, 112392, 808414, 19745, 107672, 716725, 324068, 172091, 
                             309621, 13072, 20333, 20131, 33022, 17778, 24294, 890645]

        thresholds = [0.5, 0.7, 0.8, 0.9]
        
        factors = [-1, +1]
        
        predict_only_one_crypto = False
        
    elif conf == CONF_2:
        param_datasets_nb_cryptos = [5, 10, 15, 20]
        
        params_y = {'1d':[2.5, 3, 3.5, 4, 4.5, 5, 5.5, 6, 6.5]}
        
        # cf. XLS pour choix des cryptos
#         param_crypto_learning = [1182, 7605, 5031, 202330, 4614, 166503, 3808, 321992, 5038, 310829, 127356, 3807, 204788, 27368, 5324, 
#                            5285, 166390, 24854, 236131, 41192, 347235,187440, 186277, 16713, 4432, 112392, 808414, 19745, 107672, 
#                            716725, 324068, 5039, 5280, 172091, 309621, 4430, 13072, 20333, 4433, 20131, 33022, 17778, 24294, 890645]
        
        # Sans BTC
        param_crypto_learning = [7605, 5031, 202330, 4614, 166503, 3808, 321992, 5038, 310829, 127356, 3807, 204788, 27368, 5324, 
                           5285, 166390, 24854, 236131, 41192, 347235,187440, 186277, 16713, 4432, 112392, 808414, 19745, 107672, 
                           716725, 324068, 5039, 5280, 172091, 309621, 4430, 13072, 20333, 4433, 20131, 33022, 17778, 24294, 890645]

        
        # cf. XLS pour choix des cryptos
        param_crypto_predicting = param_crypto_learning
        
        thresholds = np.linspace(0.5, 0.85, num=36)
        
        factors = [+1]
    
    return param_datasets_nb_cryptos, params_y, param_crypto_learning, param_crypto_predicting, thresholds, factors, \
        predict_only_one_crypto, do_scale, do_pca

In [11]:
def get_XGBClassifier(true_count, false_count):
    scale_pos_weight = false_count / true_count
    return XGBClassifier()

def get_MLPClassifier():
    return MLPClassifier(hidden_layer_sizes = [130, 130], solver='lbfgs',
                     random_state = 0)

def get_RandomForestClassifier():
    return RandomForestClassifier(random_state=0, class_weight="balanced", max_depth=5, n_estimators=80)

def get_classifiers(conf, true_count, false_count):
    classifiers = {}
    if conf == CONF_1:
        classifiers = {'XGBClassifier':get_XGBClassifier(true_count, false_count), 'MLPClassifier':get_MLPClassifier(), 
                                              'RandomForestClassifier':get_RandomForestClassifier()}
    elif conf == CONF_2:
        classifiers = {'XGBClassifier':get_XGBClassifier(true_count, false_count)}
    return classifiers

def start_systematization(conf=CONF_2):
    # ------------------ PARAMETERS for SYSTEMATIZATION ------------------ #

    param_datasets_nb_cryptos, params_y, param_crypto_learning, param_crypto_predicting, thresholds, factors, \
        predict_only_one_crypto, do_scale, do_pca = get_params(conf)

    # loop on different dataset size with different number of cryptos for learning (cf. params)
    for nb_cryptos in param_datasets_nb_cryptos:
        # get n first cryptos ids
        ids_cryptos = param_crypto_learning[:nb_cryptos]
        ids_cryptos_to_predict = param_crypto_predicting[:nb_cryptos]
        dict_df = alg_preproc.get_global_datasets_for_cryptos(ids_cryptos)

        # loop on different y (prediction in future +3h, +6h, +1d, ect.)
        for id_crypto_to_predict in ids_cryptos_to_predict:
            for term, arr_pct_change in params_y.items():
                df_logging = pd.DataFrame()
                y_to_be_considered = 'y_+XXX_classif'.replace('XXX', term)
                # loop on different price change in percentage for target
                for close_price_targeted in arr_pct_change:            
                    # do +/- (increase or decrease close price %)
                    for factor in factors:
                        X_train, X_test, y_train, y_test, X_train_close_price, X_test_close_price = alg_preproc.get_preprocessed_data(dict_df, dict_hours_labels, 
                                                                                         close_price_targeted * factor, 
                                                                                         predict_only_one_crypto,
                                                                                         do_scale=do_scale, 
                                                                                         do_pca=do_pca,
                                                                                         id_cryptocompare=str(id_crypto_to_predict))

                        # For each algo, find good base params
                        true_count, false_count = get_value_count(y_train[y_to_be_considered].values)
                        for clf_name, clf in get_classifiers(conf, true_count, false_count).items():
                            try:
                                clf = clf.fit(X_train, y_train[y_to_be_considered])

                                for threshold in thresholds:

                                    # backtesting
                                    base_asset = 'XXX' # to be replaced
                                    quote_asset = 'USDT'
                                    trading_pair = base_asset + quote_asset
                                    backtest = alg_backtesting.Backtesting_strategy(clf, X_test, {trading_pair: X_test_close_price}, close_price_targeted, threshold, base_asset, quote_asset, trading_pair, trace=False)
                                    simulation_time, final_amount, all_fees_paid, pct_change_portfolio, pct_change_market, nb_trades = backtest.do_backtest()

                                    # logging - extral function to hide this shit !
                                    p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12 = evaluate_model_formated(clf, X_train, y_train[y_to_be_considered].values, threshold, type(clf) != MLPClassifier)
                                    p13, p14, p15, p16, p17, p18, p19, p20, p21, p22, p23, p24 = evaluate_model_formated(clf, X_test, y_test[y_to_be_considered].values, threshold, type(clf) != MLPClassifier)
                                    log_values = pd.Series([datetime.now(), 'Success', '', len(ids_cryptos), term, close_price_targeted * factor, clf_name, do_scale, do_pca, threshold, id_crypto_to_predict, simulation_time, final_amount, all_fees_paid, pct_change_portfolio, pct_change_market, nb_trades, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16, p17, p18, p19, p20, p21, p22, p23, p24])
                                    df_logging = df_logging.append(log_values, ignore_index=True)
                                    
                            except Exception as e:
                                # logging
                                log_values = pd.Series([datetime.now(), 'Error', str(e), len(ids_cryptos), term, close_price_targeted * factor, clf_name, do_scale, do_pca, None, id_crypto_to_predict, None, None, None, None, None, None,
                                                       None, None, None, None, None, None, None, None, None, None, None, None,
                                                       None, None, None, None, None, None, None, None, None, None, None, None])
                                df_logging = df_logging.append(log_values, ignore_index=True)

                # insert data into database
                df_logging.columns = ['timestamp', 'status', 'error_message', 'param_crypto_learning', 'param_term', 'param_close_price_targeted', 
                                      'param_Algo', 'param_do_scale', 'param_do_pca', 'param_threshold', 'param_id_crypto_to_predict',
                                      'backtest_simulation_time', 'backtest_final_amount', 'backtest_all_fees_paid', 'backtest_pct_change_portfolio', 'backtest_pct_change_market', 'backtest_nb_trades',
                                      'train_confusion_TN', 'train_confusion_FP', 'train_confusion_FN',
                                      'train_confusion_TP', 'train_precision_score', 'train_recall_score', 'train_f1_score', 'train_support_True', 'train_support_False',
                                      'train_feat_importance_1', 'train_feat_importance_2', 'train_feat_importance_3',
                                      'test_confusion_TN', 'test_confusion_FP', 'test_confusion_FN', 'test_confusion_TP', 
                                      'test_precision_score', 'test_recall_score', 'test_f1_score', 'test_support_True', 
                                      'test_support_False', 'test_feat_importance_1', 'test_feat_importance_2', 'test_feat_importance_3']
                df_logging.to_sql(name='logging_trt_4', con=connection, if_exists = 'append', index=False)

In [12]:
start_systematization()

Crypto : 7605
Crypto : 5031
Crypto : 202330
Crypto : 4614
Crypto : 166503


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


KeyboardInterrupt: 