In [None]:
import pandas as pd
import pyarrow.dataset as pqds
from pyarrow import fs
import pyarrow as pa
import numpy as np
import math
import os

In [None]:
os.getcwd()

In [None]:
data_path = "/tmp/output/training"
# Read parquet file from data_path as pyarrown dateset
dataset: pqds.dataset = pqds.dataset(
            data_path,
            format="parquet"
        )

dt: pa.table = dataset.to_table()
train_df: pd.DataFrame = dt.to_pandas()
print(f"total data {len(train_df)}")

In [None]:
train_df["scenario"].max()

In [None]:
data_path = "/tmp/output/eval"
# Read parquet file from data_path as pyarrown dateset
dataset: pqds.dataset = pqds.dataset(
            data_path,
            format="parquet"
        )

dt: pa.table = dataset.to_table()
eval_df: pd.DataFrame = dt.to_pandas()
print(f"total data {len(eval_df)}")

In [None]:
eval_df["scenario"].max()

Plot a training data

In [None]:
import random

In [None]:
pick_scenario = random.randint(0, train_df["scenario"].max())

In [None]:
pick_df = train_df[train_df["scenario"]==pick_scenario]

In [None]:
pick_df

In [None]:
#PLotly graph with candlestick of pick_df
pick_df["close"].plot()

In [None]:
import plotly.graph_objects as go
figure = go.Figure(
    data = [
        go.Candlestick(
            x = pick_df.index,
            open = pick_df['open'],
            high = pick_df['high'],
            low = pick_df['low'],
            close = pick_df['close']
        )
    ]
)


In [None]:
import pathlib
pathlib.Path("target").mkdir(parents=True, exist_ok=True)
figure.write_html('target/Candles_stick.html', auto_open=True)

## Try generating features

In [None]:
from crypto_feature_preprocess.port.features import (
    create_feature_from_close_price
)
from crypto_feature_preprocess.port.interfaces import (
    Feature_Definition,
    Feature_Enum, 
    Log_Price_Feature_Interface,
    SMA_Cross_Feature_Interface,
    RSI_Feature_Interface
)

In [None]:
LOG_PRICE_LOOKBACK:int = 10
LOOK_BACK: int = 3
#Build the feature spec
feature_spec_pools = [
    Feature_Definition(
            meta={"name": Feature_Enum.LOG_PRICE},
            data=Log_Price_Feature_Interface(dimension=LOG_PRICE_LOOKBACK),
    ),
    Feature_Definition(
            meta={"name":Feature_Enum.SMA_CROSS},
            data=SMA_Cross_Feature_Interface(dimension=LOOK_BACK, sma_window_1=20, sma_window_2=50)
    ),
    Feature_Definition(
            meta={"name":Feature_Enum.RSI},
            data=RSI_Feature_Interface(dimension=LOOK_BACK, rsi_window=14)
    )
]

In [None]:
features, feature_breakdown = create_feature_from_close_price(ohlcv_candles=pick_df, feature_pools=feature_spec_pools)

## Log Price change

In [None]:
#Plot histogram of log_price_change with 1000 bin and title with plotlib
log_price_feature_segment = features[:,0:feature_breakdown[0]]
pd.Series(log_price_feature_segment[:,0]).hist(bins=100).set_title("log_price_change")

## SMA feature

In [None]:
from crypto_feature_preprocess.domains.indicators import calculate_simple_moving_average
raw_sma_1 = calculate_simple_moving_average(pick_df["close"], feature_spec_pools[1].data.sma_window_1)
raw_sma_2 = calculate_simple_moving_average(pick_df["close"], feature_spec_pools[1].data.sma_window_2)

In [None]:
def padding_zeros_left(arr:np.ndarray, dim:int)->np.ndarray:
    """
    padding zeros to the left of the array to make the array length equal to dim
    """
    arr_len = len(arr)
    if arr_len == dim:
        return arr
    elif dim-arr_len > 0 :
        return np.pad(arr, pad_width=(dim-arr_len, 0))
    else:
        return arr

In [None]:
sma_cross_feature_segment = features[:, feature_breakdown[0]:feature_breakdown[1]]

In [None]:
candles = pick_df.copy()

In [None]:
# populate feature for visualization
candles["sma1"] = padding_zeros_left(raw_sma_1, len(candles))
candles["sma2"] = padding_zeros_left(raw_sma_2, len(candles))
candles["cross"] = padding_zeros_left(sma_cross_feature_segment[:,0], len(candles))
candles["cross-1"] = padding_zeros_left(sma_cross_feature_segment[:,1], len(candles))
candles["cross-2"] = padding_zeros_left(sma_cross_feature_segment[:,2], len(candles))

In [None]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scatter(x=candles.index, y=candles["close"], name="close", mode="lines"))
fig.add_trace(go.Scatter(x=candles.index, y=candles["sma1"], name="sma_1", mode="lines", line_dash="dot"))
fig.add_trace(go.Scatter(x=candles.index, y=candles["sma2"], name="sma_2", mode="lines", line_dash="dash"))

#mark red x for candles["cross"] == True
cross_indices = np.where(candles["cross"] == True)[0]
fig.add_trace(go.Scatter(x=candles.index[cross_indices], y=candles["close"][cross_indices], name="sma_cross", mode="markers", marker=dict(color="red", symbol="x")))

#mark blue x for candles["cross-1"] == True
cross_indices = np.where(candles["cross-1"] == True)[0]
fig.add_trace(go.Scatter(x=candles.index[cross_indices], y=candles["close"][cross_indices], name="sma_cross-1", mode="markers", marker=dict(color="blue", symbol="x")))

#mark brown x for candles["cross-2"] == True
cross_indices = np.where(candles["cross-2"] == True)[0]
fig.add_trace(go.Scatter(x=candles.index[cross_indices], y=candles["close"][cross_indices], name="sma_cross-2", mode="markers", marker=dict(color="brown", symbol="x")))

fig.show()

## RSI

In [None]:
import talib
rsi_talib = talib.RSI(pick_df["close"], timeperiod=14)

In [None]:
rsi_feature_segment = features[:, feature_breakdown[1]:]

In [None]:
candles = pick_df.copy()

In [None]:
candles["rsi_talib"] = padding_zeros_left(rsi_talib, len(candles))
candles["rsi_feature"] = padding_zeros_left(rsi_feature_segment[:,0], len(candles))

In [None]:
candles['rsi_talib'].corr(candles['rsi_feature'])