In [10]:
#!/usr/bin/python3
from binance.client import Client
from time import time
import pickle as pickle
from datetime import datetime
import pandas as pd
import pandas_ta as ta
from pycaret.classification import load_model, predict_model

import random


def get_client():
    fn = '../../key/binance-key.pickle'
    # fn = '/home/era/key/binance-key.pickle'
    with open(fn, 'rb') as handle:
        k = pickle.load(handle)
    return Client(k['API_KEY'], k['API_SECRET'])


client = get_client()


def get_unix_timestamp(date_string):
    """
    Converts the input date string to Unix timestamp.

    Parameters:
        date_string (str): Input date string in the format "dd/mm/yyyy hh:mm:ss".

    Returns:
        int: Unix timestamp of the given date.
    """
    try:
        date_obj = datetime.strptime(date_string, "%d/%m/%Y %H:%M:%S")
        timestamp = int(date_obj.timestamp())
        return timestamp
    except ValueError:
        print("Invalid date format. Please use the format 'dd/mm/yyyy hh:mm:ss'.")
        return None

def get_historical_data(start_timestamp, end_timestamp, coin_pair): 
    data = []
    tot = (end_timestamp - start_timestamp)/(900*500)
    cntr = 0
    for current_sts in range(start_timestamp, end_timestamp+1, 900*500):
        next_ets = current_sts + 900*500 if (current_sts + 900*500) < end_timestamp else end_timestamp
        print(current_sts, next_ets, f'100% completed') if next_ets == end_timestamp else print(current_sts, next_ets, f'{round(cntr*100/tot, 1)}% completed')
        cntr += 1
        # Futures market
        klines = client.futures_historical_klines(coin_pair, '15m', current_sts*1000, next_ets*1000, limit=500)
        # Spot market
        # klines = client.get_historical_klines(coin_pair, interval, current_sts*1000, next_ets*1000, limit=500)
        
        for kline in klines:
            timestamp = kline[0]/1000
            open_price = float(kline[1])
            high_price = float(kline[2])
            low_price = float(kline[3])
            close_price = float(kline[4])
            volume = float(kline[5])

            data.append([timestamp, open_price, high_price, low_price, close_price, volume])

    df = pd.DataFrame(data, columns=['time', 'open', 'high', 'low', 'close', 'volume'])
    # df.to_csv(f'/home/ubuntu/data/{coin_pair}-{interval}.csv', index=False)
    # print('Data Exported')
    print(f'Historical Data of {coin_pair} Downloaded')
    return df

def generate_features(df, coin_pair, fn, drop):
    candlestick_frame = 12
    pnl_threshold = 3


    # df = pd.read_csv('/home/ubuntu/data/ETHUSDT-15m.csv')
    try:
        df.ta.strategy("all")
    except Exception as e:
        print(e)
        ts = list(df['time'])
        open = list(df['open'])
        high = list(df['high'])
        low = list(df['low'])
        close = list(df['close'])
        volume = list(df['volume'])
        tot = len(ts)
        long_runup_lst = []
        long_drawdown_lst = []
        short_runup_lst = []
        short_drawdown_lst = []

        for idx in range(tot):
            if (idx >= candlestick_frame) and (idx <= tot - candlestick_frame):
                max_high = max(high[idx+1:idx+candlestick_frame])
                min_low = min(low[idx+1:idx+candlestick_frame])
                entry_price = open[idx+1]
                long_runup_lst.append(round((max_high*100/entry_price)-100, 6))
                long_drawdown_lst.append(round((min_low*100/entry_price)-100, 6))
                short_runup_lst.append(round((entry_price*100/min_low)-100, 6))
                short_drawdown_lst.append(round((entry_price*100/max_high)-100, 6))
            else:
                long_runup_lst.append(0)
                long_drawdown_lst.append(0)
                short_runup_lst.append(0)
                short_drawdown_lst.append(0)     


        long=[]
        short=[]
        dont_trade=[]
        signal = []

        for idx in range(tot):
            if (idx >= candlestick_frame) and (idx <= tot - candlestick_frame):
                if long_runup_lst[idx] >= pnl_threshold:
                    signal.append('long')
                elif short_runup_lst[idx] >= pnl_threshold:
                    signal.append('short')
                else:
                    signal.append('dont_trade')
            else:
                signal.append('dont_trade')

        df['coin'] = [coin_pair]*len(signal)
        df['signal'] = signal

        if drop:

            long_indices = df[df['signal'].str.contains('long', case=False)].index
            short_indices = df[df['signal'].str.contains('short', case=False)].index
            dont_trade_indices = list(df[df['signal'].str.contains('dont_trade', case=False)].index)
            num_indices_to_pick  = len(dont_trade_indices) - min([len(long_indices), len(short_indices)])
            random_indices = random.sample(dont_trade_indices, num_indices_to_pick)
            df = df.drop(random_indices)

            df.reset_index(drop=True, inplace=True)

            df = df.drop(columns = ['time', 'open', 'high', 'low', 'close', 'volume'], axis=1)
            
        dataset_fn = f'../../data/{fn}-dataset.csv'
        pd.DataFrame(df).to_csv(dataset_fn, index=False)
        print(f'{coin_pair} Features Generated and saved')
        return df

In [None]:
coins_list = ['ETH']
for i in range(len(coins_list)):
    coin_pair = f'{coins_list[i]}USDT'
    print('working on', coin_pair)
    start_timestamp = get_unix_timestamp('1/1/2016 00:00:00')
    end_timestamp = int(time())
    # end_timestamp = 1661558400
    binace_all = get_historical_data(start_timestamp, end_timestamp, coin_pair)

In [102]:
binace_all.to_csv('../../data/ETHUSDT_binace_all.csv', index=False)

In [103]:
all_features = generate_features(binace_all, 'ETH', 'ETH_all_features', False)

0it [00:00, ?it/s]

[!] VWAP volume series is not datetime ordered. Results may not be as expected.
[!] VWAP price series is not datetime ordered. Results may not be as expected.


123it [00:21,  5.74it/s]


'RangeIndex' object has no attribute 'to_period'
ETH Features Generated and saved


In [106]:
testing_features = all_features[list(all_features['time']).index(1661558400):].to_csv('../../data/ETHUSDT_testing_dataset.csv', index=False)

In [None]:
binace_all = pd.read_csv('../../data/ETHUSDT_binace_all.csv')
training_features = generate_features(binace_all, 'ETH', 'ETH_training_features', True)

In [None]:
import pandas as pd
from pycaret.classification import *
from pycaret.classification import ClassificationExperiment
from lightgbm import LGBMClassifier


exp = ClassificationExperiment()
dataset_path = '../../data/'
data = pd.read_csv(f'{dataset_path}ETH_training_features-dataset.csv')
s = setup(data, target = 'signal', session_id = 123, use_gpu=True)
model = create_model(LGBMClassifier())
validation_scores = pull()
# accuracy_mean = validation_scores['Accuracy']['CV-Val']['Mean']

# save pipeline
model_name = 'ETHUSDT'
save_model(model, f'../../models/{model_name}')
print(f'{model_name} model saved.')
plot_model(model, plot = 'confusion_matrix', plot_kwargs = {'percent': True})
# plot_model(model, plot = 'feature_all')

In [None]:
import pandas as pd
from pycaret.classification import load_model, predict_model



# Load trained Pipeline
model_name = 'ETHUSDT'
model = load_model(f'../../models/{model_name}')

data = pd.read_csv('../../data/ETHUSDT_testing_dataset.csv')
correct_signals = list(data['signal'])
tm = list(data['time'])
data = data.drop(columns = ['time', 'open', 'high', 'low', 'close', 'volume', 'signal'], axis=1)
predictions = predict_model(model, data)

# predictions = predict_model(model, data)

# for i in range(1,10,1):
#     data=df[i:i+1]
# #     correct_predition = list(data['signal'])[0]
#     predictions = predict_model(model, data)
#     result = {"prediction": predictions["prediction_label"].iloc[0], "prediction_score": predictions["prediction_score"].iloc[0]}
#     print('correct_predition', 'correct_predition', 'bot predictions', result['prediction'], 'prediction_score', result['prediction_score'])

In [8]:
predictions['correct_signal'] = correct_signals
predictions['time'] = tm


In [None]:
predictions.to_csv('../../data/predictions.csv', index=False)

In [2]:
import pandas as pd

predictions = pd.read_csv('../../data/predictions.csv')
signal_log = pd.DataFrame()
signal_log['Signal'] = list(predictions['prediction_label'])
signal_log['Confidence_Score'] = list(predictions['prediction_score'])
signal_log['Coin'] = list(predictions['coin'])
signal_log['Log_Time'] = list(predictions['time'])
signal_log['LogTS'] = list(predictions['time'])

signal_log.to_csv('../../data/signal_log.csv', index=False)

In [None]:
from binance.client import Client
from binance.enums import *
from time import time
import pickle as pickle
from datetime import datetime
import pandas as pd



def get_client():
    fn = '../../key/binance-key.pickle'
    # fn = '/home/era/key/binance-key.pickle'
    with open(fn, 'rb') as handle:
        k = pickle.load(handle)
    return Client(k['API_KEY'], k['API_SECRET'])


client = get_client()



def get_unix_timestamp(date_string):
    """
    Converts the input date string to Unix timestamp.

    Parameters:
        date_string (str): Input date string in the format "dd/mm/yyyy hh:mm:ss".

    Returns:
        int: Unix timestamp of the given date.
    """
    try:
        date_obj = datetime.strptime(date_string, "%d/%m/%Y %H:%M:%S")
        timestamp = int(date_obj.timestamp())
        return timestamp
    except ValueError:
        print("Invalid date format. Please use the format 'dd/mm/yyyy hh:mm:ss'.")
        return None
def ts_to_local_time(ts):
    UTC_OFFSET = 14400
    lt = datetime.utcfromtimestamp(ts+UTC_OFFSET).strftime('%d-%m-%Y %H:%M:%S')
    return lt

def calc_compounded_pnl(pnls):
    try:
        s=100.0
        for i in pnls:
            s = (1+(i/100))*s
        return round(s - 100.0,1)
    except:
        return 0

def get_historical_data(coin_pair, interval, start_timestamp, end_timestamp): 
    data = []
    tot = (end_timestamp - start_timestamp)/(900*500)
    cntr = 0
    for current_sts in range(start_timestamp, end_timestamp+1, 900*500):
        next_ets = current_sts + 900*500 if (current_sts + 900*500) < end_timestamp else end_timestamp
        print(current_sts, next_ets, f'100% completed') if next_ets == end_timestamp else print(current_sts, next_ets, f'{round(cntr*100/tot, 1)}% completed')
        cntr += 1
        # Futures market
        klines = client.futures_historical_klines(coin_pair, interval, current_sts*1000, next_ets*1000, limit=500)
        # Spot market
        # klines = client.get_historical_klines(coin_pair, interval, current_sts*1000, next_ets*1000, limit=500)
        
        for kline in klines:
            timestamp = kline[0]/1000
            open_price = float(kline[1])
            high_price = float(kline[2])
            low_price = float(kline[3])
            close_price = float(kline[4])
            volume = float(kline[5])

            data.append([timestamp, open_price, high_price, low_price, close_price, volume])

    df = pd.DataFrame(data, columns=['time', 'open', 'high', 'low', 'close', 'volume'])
    # df.to_csv(f'../../data/{coin_pair}-{interval}.csv', index=False)
    return df

def sim_trade(coin_pair, sig, start_timestamp, end_timestamp):
    hd = get_historical_data(coin_pair, '1m', start_timestamp, end_timestamp)
    tm = list(hd['time'])
    entyr_price = float(list(hd['open'])[0])
    high = list(hd['high'])
    low = list(hd['low'])
    close = list(hd['close'])
    tp = 3.0
    run_up = 3.0
    pnl = 0.0
    for i in range(len(tm)):
        run_up = round((entyr_price*100/min(low))-100,2) if sig == 'short' else round((max(high)*100/entyr_price)-100,2)
        if sig == 'short':
            pnl = round((entyr_price*100/low[i])-100,2)
        if sig == 'long':
            pnl = round((high[i]*100/entyr_price)-100,2)
        if pnl >= tp:
            return pnl, int(tm[i]), run_up
    return pnl, int(tm[i]), run_up


signal_log_df = pd.read_csv('../../data/signal_log.csv')
signal = list(signal_log_df['Signal'])
confidence_Score = list(signal_log_df['Confidence_Score'])
coin = list(signal_log_df['Coin'])
log_Time = list(signal_log_df['Log_Time'])
logTS = list(signal_log_df['LogTS'])
last_ts = 0
dta = []
tot = len(logTS)
for idx, start_timestamp in enumerate(logTS):
    print(f'{round((idx*100/tot), 1) }% completed')
    sig = signal[idx]
    cof_score = confidence_Score[idx]
    if (start_timestamp >= last_ts) and (cof_score >= 0.5) and (sig != 'dont_trade'):
        end_timestamp = start_timestamp + 10800
        coin_pair = coin[idx]
        pnl, last_ts, run_up = sim_trade(f'{coin_pair}USDT', sig, int(start_timestamp), int(end_timestamp))
        pnl = 3 if pnl > 3 else pnl
        entry_time = ts_to_local_time(start_timestamp)
        exit_time = ts_to_local_time(last_ts)
        dta.append({'coin_pair': coin_pair, 'pnl': pnl, 'cof_score':cof_score, 'run_up': run_up, 'type': sig, 'entry_time': entry_time, 'exit_time': exit_time, 'entry_ts': start_timestamp, 'exit_ts': last_ts})
        print('PnL:', pnl)

exp_df = pd.DataFrame(dta)
pnls = list(exp_df['pnl'])
c_pnl = calc_compounded_pnl(pnls)
exp_df['compounded_pnl'] = [c_pnl]+['']*((len(pnls))-2)+[c_pnl]
exp_df.to_csv(f'../../data/sim_trade.csv', index=False)