In [33]:
import pandas as pd
# Adding the script to this notebook
import sys
sys.path.append('/Users/baobach/Algo-Trading-Binance')

In [2]:
df = pd.read_json('../data/btc_stream.json')

In [3]:
df

Unnamed: 0,e,E,s,c,o,h,l,v,q
0,24hrMiniTicker,1707039324547,BTCUSDT,42960,43084.99,43270,42720.07,12584.33753,541221200.0


In [30]:
import json
def json_import(json_file_path):
    # Load JSON data from file
    with open(json_file_path, 'r') as json_file:
        data_list = json.load(json_file)

    # Convert the list of dictionaries to a DataFrame
    df = pd.DataFrame(data_list)

    # Convert 'E' column to datetime format and rename it to 'timestamp'
    df['E'] = pd.to_datetime(df['E'], unit='ms')
    df.rename(columns={'E': 'timestamp'}, inplace=True)

    # Set 'timestamp' column as the index
    df.set_index('timestamp', inplace=True)

    # Rename other columns for better readability
    df.columns = ['event', 'symbol', 'close', 'open', 'high', 'low', 'volume', 'quoteVolume']

    # Convert object columns to numeric types
    numeric_columns = ['close', 'open', 'high', 'low', 'volume']
    df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')

    # Reorder columns as per your desired output
    df = df[['open', 'high', 'low', 'close', 'volume']]

    return df

In [43]:
df = json_import('../data/btc_stream.json')
df

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-02-04 09:35:24.547,43084.99,43270.0,42720.07,42960.0,12584.33753


In [50]:
df = pd.read_parquet('../data/BTCUSDT_1Min.parq')
df = df.astype('float')
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2621433 entries, 2018-01-01 00:00:00 to 2023-01-01 00:00:00
Data columns (total 5 columns):
 #   Column  Dtype  
---  ------  -----  
 0   open    float64
 1   high    float64
 2   low     float64
 3   close   float64
 4   volume  float64
dtypes: float64(5)
memory usage: 120.0 MB


In [73]:
# Technical analysis
import pandas_ta as ta 
from tickcomp import TickBar
# Preprocessing
from sklearn.preprocessing import MinMaxScaler, RobustScaler
from sklearn.compose import ColumnTransformer

def data_wrangling(df):
   # Volume column
   df['v'] = df['volume']
   # Dollar value column
   df['dv'] = df['volume']*df['close']
   # Generate tick bar
   df = TickBar(df).dollar_tick(100_000_000)
   # Create 285 teachnical indicators
   df.ta.strategy()
   # Select the important features
   df = df[['AD', 'AMATe_LR_8_21_2', 'OBV', 'AOBV_LR_2', 'BOP', 'CDL_2CROWS',
      'CDL_3BLACKCROWS', 'CDL_3INSIDE', 'CDL_3LINESTRIKE', 'CDL_3OUTSIDE',
      'CDL_3STARSINSOUTH', 'CDL_3WHITESOLDIERS', 'CDL_ABANDONEDBABY',
      'CDL_ADVANCEBLOCK', 'CDL_BELTHOLD', 'CDL_BREAKAWAY',
      'CDL_CLOSINGMARUBOZU', 'CDL_CONCEALBABYSWALL', 'CDL_COUNTERATTACK',
      'CDL_DARKCLOUDCOVER', 'CDL_DOJI_10_0.1', 'CDL_DOJISTAR',
      'CDL_DRAGONFLYDOJI', 'CDL_ENGULFING', 'CDL_EVENINGDOJISTAR',
      'CDL_EVENINGSTAR', 'CDL_GAPSIDESIDEWHITE', 'CDL_GRAVESTONEDOJI',
      'CDL_HAMMER', 'CDL_HANGINGMAN', 'CDL_HARAMI', 'CDL_HARAMICROSS',
      'CDL_HIGHWAVE', 'CDL_HIKKAKE', 'CDL_HIKKAKEMOD', 'CDL_HOMINGPIGEON',
      'CDL_IDENTICAL3CROWS', 'CDL_INNECK', 'CDL_INSIDE', 'CDL_INVERTEDHAMMER',
      'CDL_KICKING', 'CDL_KICKINGBYLENGTH', 'CDL_LADDERBOTTOM',
      'CDL_LONGLINE', 'CDL_MARUBOZU', 'CDL_MATCHINGLOW', 'CDL_MATHOLD',
      'CDL_MORNINGDOJISTAR', 'CDL_MORNINGSTAR', 'CDL_ONNECK', 'CDL_PIERCING',
      'CDL_RICKSHAWMAN', 'CDL_RISEFALL3METHODS', 'CDL_SEPARATINGLINES',
      'CDL_SHOOTINGSTAR', 'CDL_SHORTLINE', 'CDL_SPINNINGTOP',
      'CDL_STALLEDPATTERN', 'CDL_STICKSANDWICH', 'CDL_TAKURI',
      'CDL_TASUKIGAP', 'CDL_THRUSTING', 'CDL_TRISTAR', 'CDL_UNIQUE3RIVER',
      'CDL_UPSIDEGAP2CROWS', 'CDL_XSIDEGAP3METHODS', 'LDECAY_5', 'DEC_1',
      'PSARr_0.02_0.2', 'PVOL', 'PVR', 'SQZ_ON', 'SQZ_OFF', 'SQZ_NO',
      'SQZPRO_ON_WIDE', 'SQZPRO_ON_NARROW', 'STC_10_12_26_0.5',
      'STCstoch_10_12_26_0.5', 'SUPERTd_7_3.0', 'THERMOl_20_2_0.5',
      'THERMOs_20_2_0.5', 'TTM_TRND_6']]

   # Instantiate transformer
   preprocessing = ColumnTransformer([
   ('MinMax', MinMaxScaler(), ['AD', 'OBV', 'PVOL', 'LDECAY_5']),
   ('Robust', RobustScaler(), [
      'CDL_2CROWS', 'CDL_3INSIDE', 'CDL_3OUTSIDE', 'CDL_ABANDONEDBABY', 'CDL_BELTHOLD',
      'CDL_BREAKAWAY', 'CDL_CLOSINGMARUBOZU', 'CDL_COUNTERATTACK', 'CDL_DARKCLOUDCOVER',
      'CDL_DOJI_10_0.1', 'CDL_DOJISTAR', 'CDL_DRAGONFLYDOJI', 'CDL_ENGULFING',
      'CDL_EVENINGDOJISTAR', 'CDL_EVENINGSTAR', 'CDL_GAPSIDESIDEWHITE', 'CDL_GRAVESTONEDOJI',
      'CDL_HAMMER', 'CDL_HANGINGMAN', 'CDL_HARAMI', 'CDL_HARAMICROSS', 'CDL_HIGHWAVE',
      'CDL_HIKKAKE', 'CDL_HIKKAKEMOD', 'CDL_HOMINGPIGEON', 'CDL_INNECK', 'CDL_INSIDE',
      'CDL_INVERTEDHAMMER', 'CDL_KICKING', 'CDL_KICKINGBYLENGTH', 'CDL_LADDERBOTTOM',
      'CDL_LONGLINE', 'CDL_MARUBOZU', 'CDL_MATCHINGLOW', 'CDL_MORNINGDOJISTAR',
      'CDL_MORNINGSTAR', 'CDL_ONNECK', 'CDL_PIERCING', 'CDL_RICKSHAWMAN',
      'CDL_SEPARATINGLINES', 'CDL_SHOOTINGSTAR', 'CDL_SHORTLINE', 'CDL_SPINNINGTOP',
      'CDL_STALLEDPATTERN', 'CDL_STICKSANDWICH', 'CDL_TAKURI', 'CDL_TASUKIGAP',
      'CDL_THRUSTING', 'CDL_TRISTAR', 'CDL_UNIQUE3RIVER', 'CDL_UPSIDEGAP2CROWS',
      'CDL_XSIDEGAP3METHODS', 'PSARr_0.02_0.2', 'PVOL', 'SQZ_ON', 'SQZ_OFF', 'SQZ_NO',
      'SQZPRO_ON_WIDE', 'SQZPRO_ON_NARROW', 'STC_10_12_26_0.5', 'STCstoch_10_12_26_0.5',
      'THERMOl_20_2_0.5'
   ])
   ], remainder='passthrough')

   # Transform the data
   df_transformed = preprocessing.fit_transform(df)
   df = pd.DataFrame(
   df_transformed, columns=preprocessing.get_feature_names_out(),
   index=df.index)

   return df

In [74]:
data_wrangling(df)

  return list(map(*args))
131it [00:07, 17.53it/s]


Unnamed: 0_level_0,MinMax__AD,MinMax__OBV,MinMax__PVOL,MinMax__LDECAY_5,Robust__CDL_2CROWS,Robust__CDL_3INSIDE,Robust__CDL_3OUTSIDE,Robust__CDL_ABANDONEDBABY,Robust__CDL_BELTHOLD,Robust__CDL_BREAKAWAY,...,remainder__CDL_ADVANCEBLOCK,remainder__CDL_CONCEALBABYSWALL,remainder__CDL_IDENTICAL3CROWS,remainder__CDL_MATHOLD,remainder__CDL_RISEFALL3METHODS,remainder__DEC_1,remainder__PVR,remainder__SUPERTd_7_3.0,remainder__THERMOs_20_2_0.5,remainder__TTM_TRND_6
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-01 20:36:00,0.381301,0.998830,0.000398,0.155883,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,-1.0
2018-01-02 09:25:00,0.381310,0.998938,0.001980,0.156159,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,-1.0
2018-01-02 18:25:00,0.381640,0.999051,0.002205,0.168377,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,-1.0
2018-01-03 00:36:00,0.381990,0.999171,0.002465,0.177843,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,-1.0
2018-01-03 08:32:00,0.381933,0.999256,0.001723,0.180542,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-31 17:51:00,0.592709,0.089163,0.014044,0.204564,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,4.0,1.0,1.0,-1.0
2022-12-31 19:36:00,0.594492,0.088378,0.018803,0.204382,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,0.0,-1.0
2022-12-31 21:29:00,0.594117,0.088178,0.004683,0.204376,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,4.0,-1.0,1.0,-1.0
2022-12-31 22:41:00,0.592512,0.087108,0.025576,0.204047,0.0,0.0,0.0,0.0,-100.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,3.0,-1.0,1.0,-1.0
