In [1]:
#!/usr/bin/python3
from binance.client import Client
from time import time
import pickle as pickle
from datetime import datetime
import pandas as pd
import pandas_ta as ta
from pycaret.classification import load_model, predict_model
from pycaret.classification import *
from pycaret.classification import ClassificationExperiment
from lightgbm import LGBMClassifier
import random


def get_client():
    fn = '../../key/binance-key.pickle'
    # fn = '/home/era/key/binance-key.pickle'
    with open(fn, 'rb') as handle:
        k = pickle.load(handle)
    return Client(k['API_KEY'], k['API_SECRET'])


client = get_client()


def get_unix_timestamp(date_string):
    """
    Converts the input date string to Unix timestamp.

    Parameters:
        date_string (str): Input date string in the format "dd/mm/yyyy hh:mm:ss".

    Returns:
        int: Unix timestamp of the given date.
    """
    try:
        date_obj = datetime.strptime(date_string, "%d/%m/%Y %H:%M:%S")
        timestamp = int(date_obj.timestamp())
        return timestamp
    except ValueError:
        print("Invalid date format. Please use the format 'dd/mm/yyyy hh:mm:ss'.")
        return None

def get_historical_data(start_timestamp, end_timestamp, coin_pair): 
    data = []
    tot = (end_timestamp - start_timestamp)/(900*500)
    cntr = 0
    for current_sts in range(start_timestamp, end_timestamp+1, 900*500):
        next_ets = current_sts + 900*500 if (current_sts + 900*500) < end_timestamp else end_timestamp
        print(current_sts, next_ets, f'100% completed') if next_ets == end_timestamp else print(current_sts, next_ets, f'{round(cntr*100/tot, 1)}% completed')
        cntr += 1
        # Futures market
        klines = client.futures_historical_klines(coin_pair, '15m', current_sts*1000, next_ets*1000, limit=500)
        # Spot market
        # klines = client.get_historical_klines(coin_pair, interval, current_sts*1000, next_ets*1000, limit=500)
        
        for kline in klines:
            timestamp = kline[0]/1000
            open_price = float(kline[1])
            high_price = float(kline[2])
            low_price = float(kline[3])
            close_price = float(kline[4])
            volume = float(kline[5])

            data.append([timestamp, open_price, high_price, low_price, close_price, volume])

    df = pd.DataFrame(data, columns=['time', 'open', 'high', 'low', 'close', 'volume'])
    # df.to_csv(f'/home/ubuntu/data/{coin_pair}-{interval}.csv', index=False)
    # print('Data Exported')
    print(f'Historical Data of {coin_pair} Downloaded')
    return df

def generate_features(df, coin_pair, fn, drop):
    candlestick_frame = 12
    pnl_threshold = 3


    try:
        df.ta.strategy("all")
    except Exception as e:
        print(e)
        ts = list(df['time'])
        open = list(df['open'])
        high = list(df['high'])
        low = list(df['low'])
        close = list(df['close'])
        volume = list(df['volume'])
        tot = len(ts)
        long_runup_lst = []
        long_drawdown_lst = []
        short_runup_lst = []
        short_drawdown_lst = []

        for idx in range(tot):
            if (idx >= candlestick_frame) and (idx <= tot - candlestick_frame):
                max_high = max(high[idx+1:idx+candlestick_frame])
                min_low = min(low[idx+1:idx+candlestick_frame])
                entry_price = open[idx+1]
                long_runup_lst.append(round((max_high*100/entry_price)-100, 6))
                long_drawdown_lst.append(round((min_low*100/entry_price)-100, 6))
                short_runup_lst.append(round((entry_price*100/min_low)-100, 6))
                short_drawdown_lst.append(round((entry_price*100/max_high)-100, 6))
            else:
                long_runup_lst.append(0)
                long_drawdown_lst.append(0)
                short_runup_lst.append(0)
                short_drawdown_lst.append(0)     


        long=[]
        short=[]
        dont_trade=[]
        signal = []

        for idx in range(tot):
            if (idx >= candlestick_frame) and (idx <= tot - candlestick_frame):
                if long_runup_lst[idx] >= pnl_threshold:
                    signal.append('long')
                elif short_runup_lst[idx] >= pnl_threshold:
                    signal.append('short')
                else:
                    signal.append('dont_trade')
            else:
                signal.append('dont_trade')

        df['coin'] = [coin_pair]*len(signal)
        df['signal'] = signal

        if drop:

            long_indices = df[df['signal'].str.contains('long', case=False)].index
            short_indices = df[df['signal'].str.contains('short', case=False)].index
            dont_trade_indices = list(df[df['signal'].str.contains('dont_trade', case=False)].index)
            num_indices_to_pick  = len(dont_trade_indices) - min([len(long_indices), len(short_indices)])
            random_indices = random.sample(dont_trade_indices, num_indices_to_pick)
            df = df.drop(random_indices)

            df.reset_index(drop=True, inplace=True)

            df = df.drop(columns = ['time', 'open', 'high', 'low', 'close', 'volume'], axis=1)
            
        dataset_fn = f'../../data/{fn}.csv'
        pd.DataFrame(df).to_csv(dataset_fn, index=False)
        print(f'{coin_pair} Features Generated and saved')
        return df
    
def generate_training_and_testing_features(coin_pair):
    binace_all = pd.read_csv(f'../../data/{coin_pair}_binace_all.csv')
    all_features = generate_features(binace_all, coin_pair, f'{coin_pair}_all_features', False)
    testing_features = all_features[list(all_features['time']).index(1661558400):].to_csv(f'../../data/{coin_pair}_testing_dataset.csv', index=False)
    binace_all = pd.read_csv(f'../../data/{coin_pair}_binace_all.csv')
    training_features = generate_features(binace_all[0:list(all_features['time']).index(1661558400)], coin_pair, f'{coin_pair}_training_features', True)
    # training_features.to_csv(f'../../data/{coin_pair}_training_features.csv', index=False)

def train_model(coin_pair):
    exp = ClassificationExperiment()
    dataset_path = '../../data/'
    data = pd.read_csv(f'../../data/{coin_pair}_training_features.csv')
    s = setup(data, target = 'signal', session_id = 123, use_gpu=True)
    model = create_model(LGBMClassifier())
    validation_scores = pull()
    # accuracy_mean = validation_scores['Accuracy']['CV-Val']['Mean']

    # save pipeline
    model_name = f'{coin_pair}'
    save_model(model, f'../../models/{model_name}')
    print(f'{model_name} model saved.')
    plot_model(model, plot = 'confusion_matrix', plot_kwargs = {'percent': True})
    # plot_model(model, plot = 'feature_all')

def validate_model(coin_pair):
    # Load trained Pipeline
    model_name = f'{coin_pair}'
    model = load_model(f'../../models/{model_name}')

    data = pd.read_csv(f'../../data/{coin_pair}_testing_dataset.csv')
    correct_signals = list(data['signal'])
    tm = list(data['time'])
    data = data.drop(columns = ['time', 'open', 'high', 'low', 'close', 'volume', 'signal'], axis=1)
    predictions = predict_model(model, data)
    predictions['correct_signal'] = correct_signals
    predictions['time'] = tm
    predictions.to_csv(f'../../data/{coin_pair}_predictions.csv', index=False)



def ts_to_local_time(ts):
    UTC_OFFSET = 14400
    lt = datetime.utcfromtimestamp(ts+UTC_OFFSET).strftime('%d-%m-%Y %H:%M:%S')
    return lt

def calc_compounded_pnl(pnls):
    try:
        s=100.0
        for i in pnls:
            s = (1+(i/100))*s
        return round(s - 100.0,1)
    except:
        return 0

def sim_historical_data(coin_pair, interval, start_timestamp, end_timestamp): 
    data = []
    tot = (end_timestamp - start_timestamp)/(900*500)
    cntr = 0
    for current_sts in range(start_timestamp, end_timestamp+1, 900*500):
        next_ets = current_sts + 900*500 if (current_sts + 900*500) < end_timestamp else end_timestamp
        print(current_sts, next_ets, f'100% completed') if next_ets == end_timestamp else print(current_sts, next_ets, f'{round(cntr*100/tot, 1)}% completed')
        cntr += 1
        # Futures market
        klines = client.futures_historical_klines(coin_pair, interval, current_sts*1000, next_ets*1000, limit=500)
        # Spot market
        # klines = client.get_historical_klines(coin_pair, interval, current_sts*1000, next_ets*1000, limit=500)
        
        for kline in klines:
            timestamp = kline[0]/1000
            open_price = float(kline[1])
            high_price = float(kline[2])
            low_price = float(kline[3])
            close_price = float(kline[4])
            volume = float(kline[5])

            data.append([timestamp, open_price, high_price, low_price, close_price, volume])

    df = pd.DataFrame(data, columns=['time', 'open', 'high', 'low', 'close', 'volume'])
    # df.to_csv(f'../../data/{coin_pair}-{interval}.csv', index=False)
    return df

def sim_trade(coin_pair, sig, start_timestamp, end_timestamp):
    hd = sim_historical_data(coin_pair, '1m', start_timestamp, end_timestamp)
    tm = list(hd['time'])
    entyr_price = float(list(hd['open'])[0])
    high = list(hd['high'])
    low = list(hd['low'])
    close = list(hd['close'])
    tp = 3.0
    run_up = 3.0
    pnl = 0.0
    for i in range(len(tm)):
        run_up = round((entyr_price*100/min(low))-100,2) if sig == 'short' else round((max(high)*100/entyr_price)-100,2)
        if sig == 'short':
            pnl = round((entyr_price*100/low[i])-100,2)
        if sig == 'long':
            pnl = round((high[i]*100/entyr_price)-100,2)
        if pnl >= tp:
            return pnl, int(tm[i]), run_up
    return pnl, int(tm[i]), run_up


def update_csv(fn, val):
    with open(fn, "a") as myfile:
        myfile.write(val)

def sim_and_log_trade(coin_pair):
    predictions = pd.read_csv(f'../../data/{coin_pair}_predictions.csv')
    signal_log = pd.DataFrame()
    signal_log['Signal'] = list(predictions['prediction_label'])
    signal_log['Confidence_Score'] = list(predictions['prediction_score'])
    signal_log['Coin'] = list(predictions['coin'])
    signal_log['Log_Time'] = list(predictions['time'])
    signal_log['LogTS'] = list(predictions['time'])
    signal_log.to_csv(f'../../data/{coin_pair}_signal_log.csv', index=False)


    signal_log_df = pd.read_csv(f'../../data/{coin_pair}_signal_log.csv')
    signal = list(signal_log_df['Signal'])
    confidence_Score = list(signal_log_df['Confidence_Score'])
    coin = list(signal_log_df['Coin'])
    log_Time = list(signal_log_df['Log_Time'])
    logTS = list(signal_log_df['LogTS'])
    last_ts = 0
    dta = []
    tot = len(logTS)
    for idx, start_timestamp in enumerate(logTS):
        
        sig = signal[idx]
        cof_score = confidence_Score[idx]
        if (start_timestamp >= last_ts) and (cof_score >= 0.5) and (sig != 'dont_trade'):
            end_timestamp = start_timestamp + 10800
            coin_pair = coin[idx]
            pnl, last_ts, run_up = sim_trade(f'{coin_pair}', sig, int(start_timestamp), int(end_timestamp))
            pnl = 3 if pnl >= 3 else pnl
            entry_time = ts_to_local_time(start_timestamp)
            exit_time = ts_to_local_time(last_ts)
            dta.append({'coin_pair': coin_pair, 'pnl': pnl, 'cof_score':cof_score, 'run_up': run_up, 'type': sig, 'entry_time': entry_time, 'exit_time': exit_time, 'entry_ts': start_timestamp, 'exit_ts': last_ts})
            print('PnL:', pnl)
            print(f'{round((idx*100/tot), 1) }% completed')

    print(f'100% completed')

    exp_df = pd.DataFrame(dta)
    pnls = list(exp_df['pnl'])
    c_pnl = calc_compounded_pnl(pnls)
    update_csv('../../data/compounded_pnls.csv', f'{coin_pair}, {c_pnl}\n')
    exp_df['compounded_pnl'] = [c_pnl]+['']*((len(pnls))-2)+[c_pnl]
    exp_df.to_csv(f'../../data/{coin_pair}_sim_trade.csv', index=False)


In [None]:
# coins_list = ['BTC', 'ETH', 'BNB', 'TRX', 'TON', 'MATIC', 'EOS', 'COMP', 'SHIB', 'DOGE', 'MKR', 'VET', 'FIL', 'GALA', 'LTC', 'BCH', 'SAND', 'SOL', 'DYDX', 'FTM', 'XLM', 'UNI', 'GRT', 'LINK', 'XTZ', 'SNX', 'ZEC', 'AXS', 'AVAX', 'ADA', 'EGLD', 'ATOM', 'MANA', 'KAVA', 'XRP', 'NEO', 'CHZ', 'ETC', 'DOT', 'RUNE', 'ALGO', 'AAVE', 'NEAR', 'CRV', 'THETA', 'HBAR']
coins_list = ['GALA']
for i in range(len(coins_list)):
    coin_pair = f'{coins_list[i]}USDT'
    print('working on', coin_pair)
    # start_timestamp = get_unix_timestamp('1/1/2016 00:00:00')
    start_timestamp = get_unix_timestamp('1/6/2022 00:00:00')
    end_timestamp = int(time())
    # end_timestamp = 1661558400
    binace_all = get_historical_data(start_timestamp, end_timestamp, coin_pair)
    binace_all.to_csv(f'../../data/{coin_pair}_binace_all.csv', index=False)
    generate_training_and_testing_features(coin_pair)
    train_model(coin_pair)
    validate_model(coin_pair)
    sim_and_log_trade(coin_pair)

In [5]:
sorted_feat = ['coin', 'signal', 'DPO_20', 'PVT', 'PVI_1', 'BEARP_13', 'SUPERTl_7_3.0', 'ADOSC_3_10', 'UO_7_14_28', 'SQZPRO_ON_WIDE', 'CMF_20', 'D_9_3', 'K_9_3', 'PSARl_0.02_0.2', 'ACCBL_20', 'SQZPRO_ON_NARROW', 'DCL_20_20', 'CHOP_14_1_100', 'HWL', 'KCLe_20_2', 'STCstoch_10_12_26_0.5', 'BBL_5_2.0', 'J_9_3', 'THERMOl_20_2_0.5', 'STOCHd_14_3_3', 'STOCHk_14_3_3', 'INERTIA_20_14', 'QQEs_14_5_4.236', 'HILOl_13_21', 'RVI_14', 'SQZ_NO', 'SQZPRO_NO', 'STC_10_12_26_0.5', 'EBSW_40_10', 'ABER_XG_5_15', 'STOCHRSId_14_14_3_3', 'HA_low', 'RSX_14', 'ADX_14', 'AD', 'SUPERT_7_3.0', 'AOBV_LR_2', 'CTI_12', 'HWM', 'HWMA_0.2_0.1_0.1', 'AROONOSC_14', 'STOCHRSIk_14_14_3_3', 'KVOs_34_55_13', 'MCGD_10', 'SSF_10_2', 'HA_open', 'BBM_5_2.0', 'MIDPOINT_2', 'ABER_ZG_5_15', 'OHLC4', 'HA_close', 'HL2', 'MIDPRICE_2', 'HLC3', 'NVI_1', 'LDECAY_5', 'JMA_7_0', 'CDL_SHORTLINE', 'FWMA_10', 'EMA_10', 'HMA_10', 'PPOs_12_26_9', 'PWMA_10', 'TRIMA_10', 'SWMA_10', 'SMA_10', 'RMA_10', 'HILO_13_21', 'ITS_9', 'LR_14', 'SINWMA_14', 'CDL_ENGULFING', 'CDL_3INSIDE', 'TTM_TRND_6', 'SMI_5_20_5', 'ALMA_10_6.0_0.85', 'TEMA_10', 'DEMA_10', 'ACCBM_20', 'CDL_PIERCING', 'KCBe_20_2', 'QTL_30_0.5', 'MEDIAN_30', 'low_Z_30_1', 'SMIs_5_20_5', 'CDL_LONGLEGGEDDOJI', 'T3_10_0.7', 'CDL_RICKSHAWMAN', 'KVO_34_55_13', 'AO_5_34', 'CDL_HIKKAKE', 'CDL_INVERTEDHAMMER', 'DCM_20_20', 'OBV_min_2', 'MFI_14', 'CKSPs_10_3_20', 'CDL_ADVANCEBLOCK', 'CCI_14_0.015', 'HA_high', 'CDL_DOJI_10_0.1', 'HILOs_13_21', 'QQE_14_5_4.236_RSIMA', 'PVR', 'HWU', 'AMATe_LR_8_21_2', 'KAMA_10_2_30', 'IKS_26', 'CG_10', 'CDL_STALLEDPATTERN', 'CDL_EVENINGSTAR', 'CKSPl_10_3_20', 'OBV', 'OBVe_4', 'ISB_26', 'RSI_14', 'CMO_14', 'ISA_9', 'RVGI_14_4', 'VIDYA_14', 'PPO_12_26_9', 'open_Z_30_1', 'ABER_SG_5_15', 'CDL_HIGHWAVE', 'CDL_DOJISTAR', 'APO_12_26', 'TSI_13_25_13', 'PSL_12', 'CDL_SPINNINGTOP', 'BBU_5_2.0', 'PSARaf_0.02_0.2', 'CDL_MORNINGDOJISTAR', 'CDL_IDENTICAL3CROWS', 'OBV_max_2', 'SMIo_5_20_5', 'close_Z_30_1', 'RVGIs_14_4', 'PGO_14', 'OBVe_12', 'KST_10_15_20_30_10_10_10_15', 'STCmacd_10_12_26_0.5', 'MACD_12_26_9', 'CDL_GRAVESTONEDOJI', 'BIAS_SMA_26', 'MACDs_12_26_9', 'KSTs_9', 'CDL_HARAMICROSS', 'MACDh_12_26_9', 'CDL_MORNINGSTAR', 'CDL_DRAGONFLYDOJI', 'CDL_HAMMER', 'high_Z_30_1', 'QQE_14_5_4.236', 'TSIs_13_25_13', 'MOM_10', 'QS_10', 'CDL_HARAMI', 'PSARs_0.02_0.2', 'ENTP_10', 'CDL_GAPSIDESIDEWHITE', 'CDL_HIKKAKEMOD', 'INC_1', 'CDL_TAKURI', 'ROC_10', 'EOM_14_100000000', 'SQZ_ON', 'SQZPRO_ON_NORMAL', 'KCUe_20_2', 'BOP', 'SQZ_OFF', 'COPC_11_14_10', 'SQZPRO_20_2.0_20_2_1.5_1', 'SQZ_20_2.0_20_1.5', 'CDL_BELTHOLD', 'AROONU_14', 'DEC_1', 'BBP_5_2.0', 'CDL_INSIDE', 'KURT_30', 'ACCBU_20', 'DCU_20_20', 'BR_26', 'LOGRET_1', 'CDL_3OUTSIDE', 'PCTRET_1', 'AR_26', 'CDL_3LINESTRIKE', 'CDL_LONGLINE', 'SLOPE_1', 'CDL_SEPARATINGLINES', 'CDL_SHOOTINGSTAR', 'SUPERTs_7_3.0', 'EFI_13', 'CDL_RISEFALL3METHODS', 'CDL_3WHITESOLDIERS', 'CFO_9', 'CDL_XSIDEGAP3METHODS', 'AMATe_SR_8_21_2', 'THERMOs_20_2_0.5', 'CDL_CLOSINGMARUBOZU', 'PPOh_12_26_9', 'VHF_28', 'ER_10', 'CDL_MARUBOZU', 'TRIX_30_9', 'CDL_MATCHINGLOW', 'QQEl_14_5_4.236', 'FISHERT_9_1', 'FISHERTs_9_1', 'PSARr_0.02_0.2', 'TRIXs_30_9', 'CDL_HANGINGMAN', 'AOBV_SR_2', 'SKEW_30', 'MASSI_9_25', 'ICS_26', 'PVOs_12_26_9', 'SUPERTd_7_3.0', 'AROOND_14', 'BULLP_13', 'SQZPRO_OFF', 'PVOh_12_26_9', 'PVO_12_26_9', 'TOS_STDEVALL_L_2', 'TOS_STDEVALL_U_3', 'TOS_STDEVALL_L_1', 'TOS_STDEVALL_LR', 'TOS_STDEVALL_U_1', 'TOS_STDEVALL_U_2', 'TOS_STDEVALL_L_3', 'THERMO_20_2_0.5', 'VAR_30', 'PVOL', 'UI_14', 'MAD_30', 'STDEV_30', 'TRUERANGE_1', 'PDIST', 'DMP_14', 'DMN_14', 'BBB_5_2.0', 'THERMOma_20_2_0.5', 'ATRr_14', 'ABER_ATR_5_15', 'NATR_14', 'CDL_2CROWS', 'CDL_3BLACKCROWS', 'CDL_3STARSINSOUTH', 'CDL_ABANDONEDBABY', 'CDL_BREAKAWAY', 'CDL_CONCEALBABYSWALL', 'CDL_COUNTERATTACK', 'CDL_DARKCLOUDCOVER', 'CDL_EVENINGDOJISTAR', 'CDL_HOMINGPIGEON', 'CDL_INNECK', 'CDL_KICKING', 'CDL_KICKINGBYLENGTH', 'CDL_LADDERBOTTOM', 'CDL_MATHOLD', 'CDL_ONNECK', 'CDL_STICKSANDWICH', 'CDL_TASUKIGAP', 'CDL_THRUSTING', 'CDL_TRISTAR', 'CDL_UNIQUE3RIVER', 'CDL_UPSIDEGAP2CROWS']
best_feat = sorted_feat[:20]