### Load Packages

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from datetime import datetime

### Read Data

In [2]:
cwd = os.path.abspath('/Users/jeff/Desktop/ETH/')
file_list = os.listdir(cwd)
file_list.sort()
df = pd.DataFrame()
for file in file_list:
    path = os.path.join(cwd,file)
    data = pd.read_csv(path)
    df = pd.concat([df, data], axis=0)
#df.dtypes

### Conver time variables to type 'datetime'
df[['open_time','close_time','datatime']] = df[['open_time','close_time','datatime']].apply(pd.to_datetime)
df.dtypes

symbol                                  object
open_time                       datetime64[ns]
open                                   float64
high                                   float64
low                                    float64
close                                  float64
volume                                 float64
close_time                      datetime64[ns]
quote_asset_volume                     float64
number_of_trades                         int64
taker_buy_base_asset_volume            float64
taker_buy_quote_asset_volume           float64
datatime                        datetime64[ns]
dtype: object

### Visualization

In [3]:
### Candlestick
tmp = df.iloc[0:1440]
fig = go.Figure(data=[go.Candlestick(x=tmp['open_time'],
                open=tmp['open'],
                high=tmp['high'],
                low=tmp['low'],
                close=tmp['close'])])
fig.show()

### Label Encoding

In [4]:
df_y = pd.DataFrame(columns = ['up_amount','down_amount','up_time','down_time','up','down'])
future_time = 60*4
threshold = 0.01

for i in range(df.shape[0]-future_time):

    # time
    time = df.open_time.iloc[i]

    # amount
    open = df.open.iloc[i]
    up_amount = (max(df.high.iloc[range(i,i+future_time)])-open)/open
    down_amount = (min(df.low.iloc[range(i,i+future_time)])-open)/open

    # time
    maxindex = np.argmax(df.high.iloc[range(i,i+future_time)])
    minindex = np.argmin(df.low.iloc[range(i,i+future_time)])
    up_time = df.open_time.iloc[i+maxindex]
    down_time = df.open_time.iloc[i+minindex]
    
    # up/down
    up =np.where(up_amount > threshold,1,0)
    down =np.where(down_amount < -threshold,1,0)
    
    # append
    row_value=pd.DataFrame([[time,up_amount,down_amount,up_time,down_time,up,down]],
                           columns = ['time','up_amount','down_amount','up_time','down_time','up','down'])
    df_y = pd.concat([df_y,row_value],ignore_index=True)