# Data preperation notebook

In [1]:
import requests
import time
import pandas as pd

# Define the endpoint and parameters
base_url = "https://api-testnet.bybit.com"
endpoint = "/v5/market/kline"
symbol = "BTCUSD"  # Example trading pair
interval = "15"    # Kline interval, e.g., "1", "5", "15", "60", "240", etc.
limit = 10        # Number of klines to fetch
start_time = int(time.time()) - 3600 * 24  # Start time in seconds (e.g., 24 hours ago)

# Construct the URL
url = f"{base_url}{endpoint}?category=linear&symbol={symbol}&interval={interval}&limit={limit}"#&start_time={start_time}"

# Make the GET request
response = requests.get(url,headers={}, data={})
data = response.json() 

print(data)
print(pd.Timestamp(data["time"],unit="ms"))
print(data["result"]["list"])



{'retCode': 0, 'retMsg': 'OK', 'result': {'symbol': 'BTCUSD', 'category': 'linear', 'list': [['1717155000000', '68307', '68390', '68290', '68390', '804949', '11.78445278'], ['1717154100000', '68374', '68376', '68307', '68307', '3248163', '47.53099732'], ['1717153200000', '68260.5', '68374', '68259', '68374', '2723421', '39.87751514'], ['1717152300000', '68214', '68264.5', '68154', '68260.5', '2229925', '32.70486497'], ['1717151400000', '68103.5', '68219.5', '68083', '68214', '1117495', '16.40528511'], ['1717150500000', '68135', '68139', '68098', '68103.5', '2800305', '41.10975304'], ['1717149600000', '67894.5', '68135', '67893.5', '68135', '1392218', '20.47693368'], ['1717148700000', '67940', '67946', '67844', '67894.5', '2758784', '40.6267141'], ['1717147800000', '67877.5', '67941', '67865', '67940', '2101264', '30.95606874'], ['1717146900000', '68099', '68109', '67874', '67877.5', '2389131', '35.17834913']]}, 'retExtInfo': {}, 'time': 1717155605559}
2024-05-31 11:40:05.559000
[['1717

In [2]:
#def prepare_data(data):
print(pd.to_datetime(int(data["result"]["list"][0][0]),unit='ms'))
print(pd.to_datetime(int(data["result"]["list"][1][0]),unit='ms'))
print(pd.to_datetime(int(data["result"]["list"][2][0]),unit='ms'))
print(pd.to_datetime(int(data["result"]["list"][3][0]),unit='ms'))



2024-05-31 11:30:00
2024-05-31 11:15:00
2024-05-31 11:00:00
2024-05-31 10:45:00


In [3]:
import pandas as pd
import numpy as np

def prepare_data(data):
    
    # define pandas DF 
    cols = ['time','open', 'high', 'low', 'close','vol','vol_coin']
    df = pd.DataFrame(data["result"]["list"], columns=cols)
    
    df['time'] = df['time'].astype(int)
    
    df["time"] = pd.to_datetime(df["time"],unit="ms")
    df["day"] = df["time"].dt.day
    df["hour"] = df["time"].dt.hour

    # the 10-day moving average
    df["10MA"] = df["close"].astype('float64').mean()
    
    # Calculate the standard deviation of the closing prices over the same 20-day period
    df["10STD"] = df["close"].astype('float64').std()
    
    # volume weighted close price
    df["vwap"] = df.close.astype('float64')/df.vol.astype('float64')
    
    # Bollinger Band: 
    # simple_moving_average(20) + std x 2 | sma - std x 2
    df["bb_upper_band"] = df["10MA"].astype('float64') + 2 * df["10STD"].astype('float64')
    df["bb_lower_band"] = df["10MA"].astype('float64') - 2 * df["10STD"].astype('float64')

    # Calculate On-Balance Volume (OBV)
    df["change"] = df["close"].astype('float64').diff(periods=-1)#.shift(-1)
    df["direction"] = df["change"].astype('float64').apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))
    df["obv"] = df["vol"].astype('float64') * df["direction"].astype('float64')
    df["obv"] = df["obv"].astype('float64').cumsum()

    # Stack the DataFrame for all non-constant rows of the period (OHLC,vol,vwap,obv)
    stacked_df_cols = ["open","high","low","close","vol","vol_coin","vwap","obv","change","direction"]
    stacked_df = df.loc[0:len(df)-2,stacked_df_cols].stack() #cut the last row 0:8=9, (10-2) bc of diff = NAN
    
    # Create a new DataFrame from the stacked series and transpose it
    # this creates a multi index data frame with tuples as indices, like [(0,'ts'),...]
    df_single_row = stacked_df.to_frame().T
    
    # now change the multiindex col to a single index col by replacing it witht a list of concatenated strings 
    df_single_row.columns = [f'{col[1]}_{col[0]}' for col in df_single_row.columns]

    # get the first row of the bollinger band metrics that are a constant for the 10 observations
    bb_cols = ["10MA","10STD","bb_upper_band","bb_lower_band"]
    df_bb_metrics = df[bb_cols].head(1)

    # collect meta data
    meta_cols = ["day","hour"]
    df_meta = df[meta_cols].head(1)

    # put cols side by side
    df_concat = pd.concat([df_meta,df_bb_metrics,df_single_row],axis=1)
    
    return df,stacked_df,df_single_row,df_concat
    
df,stack_df,single_df,concat_df = prepare_data(data)

In [12]:
len(prep_df.columns)

97

In [4]:
df

Unnamed: 0,time,open,high,low,close,vol,vol_coin,day,hour,10MA,10STD,vwap,bb_upper_band,bb_lower_band,change,direction,obv
0,2024-05-31 11:30:00,68307.0,68390.0,68290.0,68390.0,804949,11.78445278,31,11,68149.6,192.859592,0.084962,68535.319184,67763.880816,83.0,1,804949.0
1,2024-05-31 11:15:00,68374.0,68376.0,68307.0,68307.0,3248163,47.53099732,31,11,68149.6,192.859592,0.021029,68535.319184,67763.880816,-67.0,-1,-2443214.0
2,2024-05-31 11:00:00,68260.5,68374.0,68259.0,68374.0,2723421,39.87751514,31,11,68149.6,192.859592,0.025106,68535.319184,67763.880816,113.5,1,280207.0
3,2024-05-31 10:45:00,68214.0,68264.5,68154.0,68260.5,2229925,32.70486497,31,10,68149.6,192.859592,0.030611,68535.319184,67763.880816,46.5,1,2510132.0
4,2024-05-31 10:30:00,68103.5,68219.5,68083.0,68214.0,1117495,16.40528511,31,10,68149.6,192.859592,0.061042,68535.319184,67763.880816,110.5,1,3627627.0
5,2024-05-31 10:15:00,68135.0,68139.0,68098.0,68103.5,2800305,41.10975304,31,10,68149.6,192.859592,0.02432,68535.319184,67763.880816,-31.5,-1,827322.0
6,2024-05-31 10:00:00,67894.5,68135.0,67893.5,68135.0,1392218,20.47693368,31,10,68149.6,192.859592,0.04894,68535.319184,67763.880816,240.5,1,2219540.0
7,2024-05-31 09:45:00,67940.0,67946.0,67844.0,67894.5,2758784,40.6267141,31,9,68149.6,192.859592,0.02461,68535.319184,67763.880816,-45.5,-1,-539244.0
8,2024-05-31 09:30:00,67877.5,67941.0,67865.0,67940.0,2101264,30.95606874,31,9,68149.6,192.859592,0.032333,68535.319184,67763.880816,62.5,1,1562020.0
9,2024-05-31 09:15:00,68099.0,68109.0,67874.0,67877.5,2389131,35.17834913,31,9,68149.6,192.859592,0.028411,68535.319184,67763.880816,,0,1562020.0


In [5]:
stack_df[8]

open             67877.5
high               67941
low                67865
close              67940
vol              2101264
vol_coin     30.95606874
vwap            0.032333
obv            1562020.0
change              62.5
direction              1
dtype: object

In [6]:
stacked_df_cols = ["open","high","low","close","vol","vol_coin","vwap","obv","change","direction"]
print(len(stacked_df_cols))
print(len(df)-1)
df.loc[0:len(df)-2,stacked_df_cols]

10
9


Unnamed: 0,open,high,low,close,vol,vol_coin,vwap,obv,change,direction
0,68307.0,68390.0,68290.0,68390.0,804949,11.78445278,0.084962,804949.0,83.0,1
1,68374.0,68376.0,68307.0,68307.0,3248163,47.53099732,0.021029,-2443214.0,-67.0,-1
2,68260.5,68374.0,68259.0,68374.0,2723421,39.87751514,0.025106,280207.0,113.5,1
3,68214.0,68264.5,68154.0,68260.5,2229925,32.70486497,0.030611,2510132.0,46.5,1
4,68103.5,68219.5,68083.0,68214.0,1117495,16.40528511,0.061042,3627627.0,110.5,1
5,68135.0,68139.0,68098.0,68103.5,2800305,41.10975304,0.02432,827322.0,-31.5,-1
6,67894.5,68135.0,67893.5,68135.0,1392218,20.47693368,0.04894,2219540.0,240.5,1
7,67940.0,67946.0,67844.0,67894.5,2758784,40.6267141,0.02461,-539244.0,-45.5,-1
8,67877.5,67941.0,67865.0,67940.0,2101264,30.95606874,0.032333,1562020.0,62.5,1


In [7]:
len(prep_df.columns)

97

In [8]:
prep_df.head()

Unnamed: 0,time,day,hour,10MA,10STD,bb_upper_band,bb_lower_band,open_0,high_0,low_0,...,open_8,high_8,low_8,close_8,vol_8,vol_coin_8,vwap_8,obv_8,change_8,direction_8
0,2024-05-31 11:30:00,31,11,68149.6,192.859592,68535.319184,67763.880816,68307,68390,68290,...,67877.5,67941,67865,67940,2101264,30.95606874,0.032333,1562020.0,62.5,1


In essence the function calls would look like this: 

response = fetch_kline()
pred_df = prepare_data(response)

entry,stop_loss,take_profit = make_prediction(prep_df) # add column if trade or not 

write_db(prep_df)

place_order(entry,stop_loss,take_profit)

