## create data 

In [1]:
import pandas as pd

import os
iskaggle = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', '')

if iskaggle:
    df = pd.read_csv("/kaggle/input/btcusdt-2023-6-9/btcusdt-2023-6_9.csv", index_col=0).reset_index(drop=True)
else:
    df = pd.read_csv("../lesson5-random-forests/btc-data/btcusdt-2023-6_9.csv", index_col=0).reset_index(drop=True)


print(df.shape)
df.head(3)

(11716, 6)


Unnamed: 0,time,open,high,low,close,vol
0,2023.06.01 00:00,27103.1,27108.1,27080.6,27096.9,386.675
1,2023.06.01 00:15,27096.9,27096.9,27036.7,27047.0,408.68
2,2023.06.01 00:30,27047.0,27077.4,27041.0,27054.9,275.08


In [2]:
df.columns

Index(['time', 'open', 'high', 'low', 'close', 'vol'], dtype='object')

#### add logs and normalization

In [3]:
import numpy as np

for col in df.columns[1:5]:
    df["log_"+str(col)] = np.log(df[str(col)])

df["norm_vol"] = df.vol / np.max(df.vol)

print(df.shape)
df.head(3)

(11716, 11)


Unnamed: 0,time,open,high,low,close,vol,log_open,log_high,log_low,log_close,norm_vol
0,2023.06.01 00:00,27103.1,27108.1,27080.6,27096.9,386.675,10.207403,10.207588,10.206573,10.207175,0.006003
1,2023.06.01 00:15,27096.9,27096.9,27036.7,27047.0,408.68,10.207175,10.207175,10.20495,10.205331,0.006344
2,2023.06.01 00:30,27047.0,27077.4,27041.0,27054.9,275.08,10.205331,10.206455,10.20511,10.205623,0.00427


In [4]:
# shift data 3 times, so that in one row there is information on the last 3 candles 
# therefore: the original candle data is the target data

df_s1 = df.shift(1).add_suffix("_s1")
df_s2 = df.shift(2).add_suffix("_s2")
df_s3 = df.shift(3).add_suffix("_s3")
df_s3 = df.shift(3).add_suffix("_s3")
df_s3 = df.shift(3).add_suffix("_s3")

print(df_s3.shape)
df_s3.head(3)

(11716, 11)


Unnamed: 0,time_s3,open_s3,high_s3,low_s3,close_s3,vol_s3,log_open_s3,log_high_s3,log_low_s3,log_close_s3,norm_vol_s3
0,,,,,,,,,,,
1,,,,,,,,,,,
2,,,,,,,,,,,


In [5]:
df_merge = pd.concat([df_s3,df_s2, df_s1, df], axis=1)
print(df_merge.shape)
df_merge.head(5)

(11716, 44)


Unnamed: 0,time_s3,open_s3,high_s3,low_s3,close_s3,vol_s3,log_open_s3,log_high_s3,log_low_s3,log_close_s3,...,open,high,low,close,vol,log_open,log_high,log_low,log_close,norm_vol
0,,,,,,,,,,,...,27103.1,27108.1,27080.6,27096.9,386.675,10.207403,10.207588,10.206573,10.207175,0.006003
1,,,,,,,,,,,...,27096.9,27096.9,27036.7,27047.0,408.68,10.207175,10.207175,10.20495,10.205331,0.006344
2,,,,,,,,,,,...,27047.0,27077.4,27041.0,27054.9,275.08,10.205331,10.206455,10.20511,10.205623,0.00427
3,2023.06.01 00:00,27103.1,27108.1,27080.6,27096.9,386.675,10.207403,10.207588,10.206573,10.207175,...,27054.9,27084.0,27054.8,27084.0,218.143,10.205623,10.206698,10.20562,10.206698,0.003386
4,2023.06.01 00:15,27096.9,27096.9,27036.7,27047.0,408.68,10.207175,10.207175,10.20495,10.205331,...,27084.0,27113.9,27073.5,27100.0,329.412,10.206698,10.207802,10.206311,10.207289,0.005114


In [6]:
df_merge.columns

Index(['time_s3', 'open_s3', 'high_s3', 'low_s3', 'close_s3', 'vol_s3',
       'log_open_s3', 'log_high_s3', 'log_low_s3', 'log_close_s3',
       'norm_vol_s3', 'time_s2', 'open_s2', 'high_s2', 'low_s2', 'close_s2',
       'vol_s2', 'log_open_s2', 'log_high_s2', 'log_low_s2', 'log_close_s2',
       'norm_vol_s2', 'time_s1', 'open_s1', 'high_s1', 'low_s1', 'close_s1',
       'vol_s1', 'log_open_s1', 'log_high_s1', 'log_low_s1', 'log_close_s1',
       'norm_vol_s1', 'time', 'open', 'high', 'low', 'close', 'vol',
       'log_open', 'log_high', 'log_low', 'log_close', 'norm_vol'],
      dtype='object')

In [7]:
df_merge.dropna().to_csv("nnbasic-btc-data.csv")

In [8]:
# do not use the time columns 

df_train = df_merge.filter(items = ['open_s3', 'high_s3', 'low_s3', 'close_s3', 'vol_s3',
       'open_s2', 'high_s2', 'low_s2', 'close_s2', 'vol_s2',
       'open_s1', 'high_s1', 'low_s1', 'close_s1', 'vol_s1', 
       'open', 'high', 'low', 'close', 'vol']).dropna()
print(df_train.shape)
df_train.head(3)

(11713, 20)


Unnamed: 0,open_s3,high_s3,low_s3,close_s3,vol_s3,open_s2,high_s2,low_s2,close_s2,vol_s2,open_s1,high_s1,low_s1,close_s1,vol_s1,open,high,low,close,vol
3,27103.1,27108.1,27080.6,27096.9,386.675,27096.9,27096.9,27036.7,27047.0,408.68,27047.0,27077.4,27041.0,27054.9,275.08,27054.9,27084.0,27054.8,27084.0,218.143
4,27096.9,27096.9,27036.7,27047.0,408.68,27047.0,27077.4,27041.0,27054.9,275.08,27054.9,27084.0,27054.8,27084.0,218.143,27084.0,27113.9,27073.5,27100.0,329.412
5,27047.0,27077.4,27041.0,27054.9,275.08,27054.9,27084.0,27054.8,27084.0,218.143,27084.0,27113.9,27073.5,27100.0,329.412,27100.0,27159.0,27100.0,27142.4,979.655
