In [None]:
#This is not made by Hansen1015, I just edit a little bit of the code that makes it more reliable.
#the author is https://github.com/Bturan19
import os
from pathlib import Path

project_root = "./freqtrade"
i=0
try:
    os.chdirdir(project_root)
    assert Path('LICENSE').is_file()
except:
    while i<4 and (not Path('LICENSE').is_file()):
        os.chdir(Path(Path.cwd(), '../'))
        i+=1
    project_root = Path.cwd()
print(Path.cwd())

In [None]:

import json
from freqtrade.configuration import Configuration

# Load config from multiple files
config = Configuration.from_files(["config.json"])
#print(config)

# Show the config in memory
#print(json.dumps(config['original_config'], indent=2))

## Start From Here

In [None]:
import pandas as pd
pd.options.display.max_columns = 999
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# import jtplot submodule from jupyterthemes
from jupyterthemes import jtplot
jtplot.style()
from tqdm import tqdm

import sklearn
import xgboost
import catboost
import scipy
import talib.abstract as ta
from scipy import signal
from sklearn.preprocessing import StandardScaler, MinMaxScaler

#### Read Files 

In [None]:
### Helper Functions

def add_features(df):
    """
    This functions takes an OHLC dataframe;
    creates functions and return dataframe
    :params: pandas.DataFrame
    
    :output: pandas.DataFrame
    """

    #sma diff
    for i in [3,5,8,13,21,34,55,89,120,240]:
        df[f"smadiff_{i}"] = (df['close'].rolling(i).mean() - df['close'])
    #max diff
    for i in [3,5,8,13,21,34,55,89,120,240]:
        df[f"maxdiff_{i}"] = (df['close'].rolling(i).max() - df['close'])
    #min diff
    for i in [3,5,8,13,21,34,55,89,120,240]:
        df[f"maxdiff_{i}"] = (df['close'].rolling(i).min() - df['close'])
    #volatiliy
    for i in [3,5,8,13,21,34,55,89,120,240]:
        df[f"std_{i}"] = df['close'].rolling(i).std()
    
    #Return
    for i in [3,5,8,13,21,34,55,89,120,240]:
        df[f"ma_{i}"] = df['close'].pct_change(i).rolling(i).mean()
    
    df['z_score_120'] = ((df.ma_13 - df.ma_13.rolling(21).mean() + 1e-9) 
                         / (df.ma_13.rolling(21).std() + 1e-9))
    
    #Target
    df[ "target"] = df['close'].shift(-7).pct_change(1).rolling(7).mean()
    
    return df
    

In [None]:
headers = ["date", "open", "high", "low", "close", "volume"]

df = []

for file in os.listdir("user_data/data/binance/"):
    if file.endswith("1h.json"):
        filename = os.path.join("user_data/data/binance/", file)
        print(f"Starting read: {filename}")
        d1 = pd.read_json(filename)
        d1.columns = headers
        d1["stock"] = file[:-8]
        d1 = add_features(d1)
        df.append(d1)

df = pd.concat(df)
print("Shape: ", df.shape)

In [None]:
# Format date
df["date"] = pd.to_datetime(df["date"], unit='ms')
df = df.reset_index(drop=True)

In [None]:
ax = df.loc[df.stock=='BTC_USDT', 'close'].plot(figsize=(12,8))
df.loc[df.stock=='BTC_USDT', 'close'].rolling(30).std().plot(ax=ax, secondary_y=True)

### Feature Engineering 

In [None]:
# Drop new cryptos

stock_size = df.groupby("stock").size().sort_values().reset_index()
stock_size.columns = ["stock", "size"]
uneligible_stocks = stock_size.loc[stock_size["size"] < 241].stock.to_list()

df = df.loc[~df.stock.isin(uneligible_stocks)]

In [None]:
df['time_hourmin'] = df.date.dt.hour * 60 + df.date.dt.minute
df['time_dayofweek'] = df.date.dt.dayofweek
df['time_hour'] = df.date.dt.hour

In [None]:
#df["target"] = df.target / df.meanp_tar
#HOLD

In [None]:
df = df.loc[df.date < '2021-04-10 05:00:00']
print(df.shape)

In [None]:
for st in df.stock.unique()[:10]:
    a = df.loc[df.stock==st].set_index("date")
    a.target.tail(80).plot()

In [None]:
df.target.clip(-0.05,0.05).hist()

In [None]:
df = df.dropna()

In [None]:
df['target'] = pd.qcut(df.target, 5, labels=False)

### Train-Test Split 

In [None]:
df.date.agg(['min', 'max'])

In [None]:

config = {
    'train_split_date': pd.to_datetime('2019-04-09'),
    'val_split_date': pd.to_datetime('2020-11-01'),
    'test_split_date': pd.to_datetime('2021-03-01'),
}

train = df[(df.date >= config['train_split_date']) &
           (df.date < config['val_split_date'])]

val   = df[(df.date >= config['val_split_date']) &
           (df.date < config['test_split_date'])]

test  = df[(df.date >= config['test_split_date'])]

In [None]:
print(train.shape, train.target.mean())
print(val.shape, val.target.mean())
print(test.shape, test.target.mean())

In [None]:
df.columns

In [None]:
col_not_use = ["date", 'open', 'high', 'low', 'close',
               "target","meanp_tar", 
               "logclose", "sqrtclose", "stock"
              ]
col_use = [c for c in df.columns if c not in col_not_use]

In [None]:
from catboost import CatBoostClassifier

In [None]:
model_return = CatBoostClassifier(iterations=10000,
                          #boosting_type='dart',
                          #random_state=42,
                          #num_leaves=41,
                          learning_rate=0.004,
                          #max_bin =10,
                          task_type='GPU')
model_return.fit(train[col_use], train['target'],
             #eval_metric='binary_error',
             verbose=100,
             #early_stopping_rounds=200,
             eval_set=[(val[col_use], val['target'])],
                )

In [None]:
def plot_feature_importance(importance,names,model_type):
    
    #Create arrays from feature importance and feature names
    feature_importance = np.array(importance)
    feature_names = np.array(names)
    
    #Create a DataFrame using a Dictionary
    data={'feature_names':feature_names,'feature_importance':feature_importance}
    fi_df = pd.DataFrame(data)
    
    #Sort the DataFrame in order decreasing feature importance
    fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)
    
    #Define size of bar plot
    plt.figure(figsize=(10,14))
    #Plot Searborn bar chart
    plt.barh(width=fi_df['feature_importance'], y=fi_df['feature_names'])
    #Add chart labels
    plt.title(model_type + 'FEATURE IMPORTANCE')
    plt.xlabel('FEATURE IMPORTANCE')
    plt.ylabel('FEATURE NAMES')
plot_feature_importance(model_return.get_feature_importance(),col_use,'CATBOOST')

In [None]:
preds = pd.DataFrame(model_return.predict_proba(test[col_use]))
from sklearn.metrics import log_loss
print(log_loss(test.target, preds))
preds.columns = [f"pred{i}" for i in range(5)]
preds

In [None]:
import pickle

with open('user_data/notebooks/model_portfolio.pkl', 'wb') as f:
    pickle.dump([model_return], f)

In [None]:
test = test.reset_index(drop=True)
test = pd.concat([test, preds], axis=1)

In [None]:
for i in range(5):
    test.groupby("date").tail(1)[f"pred{i}"].hist(alpha=.5, label=f"pred{i}")
plt.legend()

In [None]:
ax = test.loc[test.stock == 'ADA_USDT', 'close'].plot(figsize=(12,8))
test.loc[test.stock == 'ADA_USDT', 'pred4'].plot(ax=ax, secondary_y=True)

In [None]:
test.loc[test.pred4 > .25].stock.unique()

In [None]:
test.date.min()

In [None]:
test.loc[test.date == '2021-03-01 00:00:00'].sort_values(by=['pred4'], ascending=False)

In [None]:
ax = test.loc[test.stock == 'DOGE/USDT', 'close'].plot(figsize=(12,8))
test.loc[test.stock == 'DOGE/USDT', 'pred4'].plot(ax=ax, secondary_y=True)

In [None]:
# SELECT FIRST N ASSET

test["buy"] = 0
for time in test.date.unique():
    stocks = test.loc[test.date == time].sort_values(by=['pred4'], 
                                                     ascending=False)["stock"][:10]
    test.loc[(test.date == time) &
             (test.stock.isin(stocks)), "buy"] = 1

In [None]:
test["buy"] = test["buy"].shift(1)

In [None]:
test["return"] = 1 + (test.close.shift(-1).pct_change())

## Re-Diversified Portfolio in Every 3 Hour

In [None]:
test["return"] = 1 + (test.close.shift(-1).pct_change())

test["return"] = (test["return"] * 
                  test["return"].shift(-1) * 
                  test["return"].shift(-2) *
                  test["return"].shift(-3) * 
                  test["return"].shift(-4)
                 ) 

In [None]:
test["return"].fillna(1.0, inplace=True)

In [None]:
amount = [10000]
benchmark = [10000]

for i, time in enumerate(sorted(test.date.unique())[:-5]):
    if i % 3== 0:
        returns = test.loc[(test.date == time) & (test.buy ==1), "return"].iloc[:10].mean()
        returns -= returns * 0.001
        if np.isnan(returns):
            returns = 1.
        if returns >= 1.5:
            returns = 1.5
        print(len(test.loc[(test.date == time) & (test.buy ==1), "return"]))
        amount.append((amount[-1] * returns) - amount[-1]*0.001)
        benchmark.append((benchmark[-1] * test.loc[(test.date == time), "return"].mean()))
    else:
        continue

In [None]:
plt.plot(amount[:-1])
plt.plot(benchmark[:-1])

In [None]:
# Weighted portfolio

amount = [10000]
benchmark = [10000]

for i, time in enumerate(sorted(test.date.unique())[:-5]):
    if i % 3== 0:
        returns = test.loc[(test.date == time) & (test.buy ==1)].sort_values(by='pred4')["return"]
        weights = np.array([.3, 0.15, 0.15, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05])
        returns = (returns * weights).sum() / weights.sum()
        #returns -= returns * 0.001
        if returns >= 1.5:
            returns = 1.5
        amount.append((amount[-1] * returns) - amount[-1]*0.001)
        benchmark.append((benchmark[-1] * test.loc[(test.date == time), "return"].mean()))
    else:
        continue

In [None]:
plt.plot(amount[:-1])
plt.plot(benchmark[:-1])

In [None]:
# Approach 2
r = pd.Series(amount).diff()

sr = r.mean()/r.std() * np.sqrt(252 * 24 /3)
print(sr)

In [None]:
result = pd.DataFrame()
result["returns"] = pd.Series(amount)

In [None]:
result['rolling_SR'] = result.returns.rolling(8*7).apply(lambda x: 
                                                         (x.mean() - 0.02) / x.std(), 
                                                         raw = True)
result.fillna(0, inplace = True)
result[result['rolling_SR'] > 0].rolling_SR.plot(style='-', lw=3, color='orange', 
                                         label='Sharpe', figsize = (10,7))\
                                         .axhline(y = 4., color = "blue", lw = 3,
                                                 linestyle = '--')

plt.ylabel('Sharpe ratio')
plt.legend(loc='best')
plt.title('Rolling Sharpe ratio (1 week)')
plt.show()

print('---------------------------------------------------------------')
print('In case you want to check the result data\n')
print(result.tail()) # I use tail, beacause of the size of your window.