In [1]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import make_union, make_pipeline, Pipeline

from sklearn.feature_selection import SelectFromModel
from sklearn.feature_selection import RFE, RFECV

from fenginering.indicator import *
from fenginering.transformer import *
from fenginering.preprocessing import Crossover
from data.get_data import *

from fenginering.function import *

import warnings
warnings.filterwarnings('ignore')

%load_ext autoreload
%autoreload 2

In [2]:
start = "2022"
end = "2023"
data = get_data('BTC', '1d').loc[start : end]
data.drop(columns = ['symbol'], inplace = True)
data.columns = data.columns.astype(str)
data.shape

(449, 5)

In [None]:
N = data.shape[0]
n = int(0.7*N)
Train = data[:n]
Test = data[n:]

target = get_classification_target(Train)
features = Train

target_t = get_classification_target(Test)
features_t = Test

target.value_counts().plot(kind = 'barh', color = 'red')

# Combinaison de MA

In [None]:
sma_list = [3, 7, 10, 14, 21, 28, 30, 40, 45, 60, 75, 80, 96]
sma_list_h = range(3, 2, 200)

In [None]:
cross = Crossover(Train)
features = cross.ma(sma, sma_list)

cross = Crossover(Test)
features_test = cross.ma(sma, sma_list)

# Test Train

In [None]:
# Train
train = features.join(target)
features_train = train.drop(columns = 'target')
target_train = train['target']

# Test
test = features_test.join(target_test)
features_test = test.drop(columns = 'target')
target_test = test['target']

In [None]:
from sklearn.tree import DecisionTreeClassifier

#model = DecisionTreeClassifier()
model = RandomForestClassifier()
model.fit(features_train, target_train)

In [None]:
y_pred = model.predict(features_test)
y_pred_proba = model.predict_proba(features_test)

from evaluation.classification import *

evaluation = Evaluation(y_true = target_test, y_pred = y_pred, proba_pred = y_pred_proba[:,1])
evaluation.metrics()
evaluation.show()

In [None]:
def feature_importances(model):
    Importance = pd.DataFrame({'Importance' : model.feature_importances_*100},
                          index = model.feature_names_in_)
    Importance.sort_values('Importance', axis = 0, ascending = True).plot(kind = 'barh',
                                                                          color = 'r',
                                                                          figsize = (10, 20))

feature_importances(model)

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {'max_depth' : np.arange(1, 20),
              'criterion' : ['gini', 'entropy', 'log_loss'],
              'min_samples_leaf' : [15, 20, 30, 40, 55, 60, 80, 100, 200],
              }
score = 'accuracy'


grid = GridSearchCV(model, param_grid, scoring=score ,cv = 5)
grid.fit(features_train, target_train)

In [None]:
model_g = grid.best_estimator_
model_g.fit(features_train, target_train)

In [None]:
y_pred = model_g.predict(features_test)
y_pred_proba = model_g.predict_proba(features_test)

from evaluation.classification import *

evaluation = Evaluation(y_true = target_test, y_pred = y_pred, proba_pred = y_pred_proba[:,1])
evaluation.metrics()
evaluation.show()

In [None]:
lev = 100

up = 0.7
down = 0.3

backtest = pd.DataFrame()

backtest['close'] = Test.close
backtest['returns'] = Test.close.pct_change()
backtest['target'] = target_test
backtest['pred'] = y_pred
backtest['pred_proba'] = y_pred_proba[:,0]

backtest['pred2'] = np.where(backtest['pred_proba'] > 0.7, 1,
         np.where(backtest['pred_proba'] < 0.3, -1, 0)
         )

backtest['cum_ret'] = (1 + backtest['returns']).cumprod()

backtest['strategy'] = backtest['returns']*backtest['pred']
backtest['strategy_cum_ret'] = (1 + backtest['strategy']).cumprod()

backtest['leverage'] = np.where(backtest['pred_proba'] > 0.8, lev,
                                np.where(backtest['pred_proba'] < 0.2, lev, 1)
                                )
backtest['strategy2'] = backtest['returns'] * backtest['pred2'] * backtest['leverage']


backtest['strategy_cum_ret2'] = (1 + backtest['strategy2']).cumprod()

#backtest

#backtest[['cum_ret', 'strategy_cum_ret', 'strategy_cum_ret2']].plot(figsize = (15, 10))
px.line(backtest[['cum_ret', 'strategy_cum_ret', 'strategy_cum_ret2']])