In [1]:
import numpy as np
from numpy import linalg as LA
import pandas as pd
from datetime import datetime, timedelta, date
import concurrent.futures


from gtda.time_series import TakensEmbedding
from gtda.time_series import SlidingWindow
from gtda.time_series import SingleTakensEmbedding

from gtda.time_series import PearsonDissimilarity
from gtda.homology import VietorisRipsPersistence
from gtda.diagrams import Amplitude
from gtda.diagrams import PersistenceLandscape


from gtda.pipeline import make_pipeline


from sklearn import set_config
set_config(display='diagram')  # For HTML representations of pipelines

from sklearn.cluster import KMeans


import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import json
import time
from tqdm import tqdm

from binance import Client, ThreadedWebsocketManager, ThreadedDepthCacheManager
import config as c 


from binance_data import get_klines, get_klines_df

from ta.utils import dropna
from ta.volatility import BollingerBands

key, secret = c.apis[1][0], c.apis[1][1]
client = Client(key, secret)
# config has variable 'apis = [[# YOUR API KEY],[#YOUR API SECRET]]

In [2]:
def get_date(timestamp):
    obj = datetime.fromtimestamp(timestamp/1000.0)
    return "%s_%s_%s %s:%s" % (obj.month, obj.day, obj.year, obj.hour, obj.minute)

def get_timestamp(date):
    dt_obj = datetime.strptime(date, '%m_%d_%Y %H:%M')
    return int(dt_obj.timestamp() * 1000)

In [20]:
#   [
#   {
#     "a": 26129,         // Aggregate tradeId
#     "p": "0.01633102",  // Price
#     "q": "4.70443515",  // Quantity
#     "f": 27781,         // First tradeId
#     "l": 27781,         // Last tradeId
#     "T": 1498793709153, // Timestamp
#     "m": true,          // Was the buyer the maker?
#     "M": true           // Was the trade the best price match?
#   }
# ]

def generate_volumebars(trades, frequency=200):

    times = trades[:,0]
    prices = trades[:,1]
    volumes = trades[:,2]
    ans = np.zeros(shape=(len(prices), 6))
    candle_counter = 0
    vol = 0
    lasti = 0
    
    for i in range(len(prices)):
        vol += volumes[i]
        if vol >= frequency:
            ans[candle_counter][0] = times[i]              # time
            ans[candle_counter][1] = prices[lasti]                     # open
            ans[candle_counter][2] = np.max(prices[lasti:i+1])         # high
            ans[candle_counter][3] = np.min(prices[lasti:i+1])         # low
            ans[candle_counter][4] = prices[i]                         # close
            ans[candle_counter][5] = np.sum(volumes[lasti:i+1])        # volume
            candle_counter += 1
            lasti = i+1
            vol = 0
    return ans[:candle_counter]

def gen_bars(bars_arr):
    for i in range(1,len(bars_arr)-1):
        if bars_arr[i-1][0] == bars_arr[i][0]:
            bars_arr[i][0] = (bars_arr[i-1][0] + bars_arr[i+1][0])/2
    if bars_arr[len(bars_arr)-1][0] == bars_arr[len(bars_arr)-2][0]:
        bars_arr[len(bars_arr)-1][0] += bars_arr[len(bars_arr)-1][0] - int(bars_arr[len(bars_arr)-1][0])
        
    return pd.DataFrame([i[1:] for i in bars_arr], columns = ["Open", "High", "Low", "Close", "Volume"], index=pd.to_datetime([i[0] for i in bars_arr], unit='ms')).sort_index()

In [17]:
orderflow = []
dates = [1577854800000 + i*3600000*60 + 100000 for i in range(60)]
date = dates[0]
for date in dates[:3]:
    with open("orderflow_data/con_"+get_date(date).split(" ", 1)[0], "r") as f:
        orderflow.extend(json.loads(f.read()))

In [21]:
trades = np.array([[float(i["T"]), float(i["p"]), float(i["q"])] for i in orderflow])
# trades = np.array([[float(i["T"]), float(i["p"]), float(i["q"])] for i in orderflow])

bars_arr = generate_volumebars(trades)

bars = gen_bars(bars_arr)
data_df = bars



In [23]:
x, dates = data_df.to_numpy(), data_df.index

SW = SlidingWindow(size=50)
x_sw, y_sw  = SW.fit_transform_resample(x, dates)

VR = VietorisRipsPersistence(homology_dimensions=[0,1], n_jobs=6)
x_vr = VR.fit_transform(X = x_sw)

# PL = PersistenceLandscape()
# x_pl = PL.fit_transform(X = x_vr, y = y_sw)

Ampl = Amplitude(metric="landscape", metric_params={"p":1})
norm = Ampl.fit_transform(X=x_vr, y=y_sw)

In [25]:
calced_df = pd.DataFrame(
    {
        # "log_price": log_price[1:],
        "btc_price_volumebars": data_df["Close"],
        "norm": norm[:,1][1:],
        # "vix_price": clean_data["^VIX"][50:]
        # "log_returns": np.diff(log_price),
    },
    index = y_sw[1:]
)



fig = make_subplots(rows=len(calced_df.keys()), cols=1, shared_xaxes=True, vertical_spacing = 0.01)
fig.update_layout(
    height=750,
    hovermode="x"
)

for i, col in enumerate(calced_df.keys(), start=1):
    if col == "norm":
        normed_norm = calced_df[col].values/max(calced_df[col].values)
        fig.add_trace(go.Scatter(x=calced_df[col].index, y=normed_norm, name=col), row=i, col=1)
    else:
        fig.add_trace(go.Scatter(x=calced_df[col].index, y=calced_df[col].values, name=col), row=i, col=1)
    
fig.show()

In [26]:
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.update_layout(
    height=500,
    hovermode="x"
)

for i, col in enumerate(calced_df.keys(), start=1):
    if col == "norm":
        normed_norm = calced_df[col].values/max(calced_df[col].values)
        fig.add_trace(go.Scatter(x=calced_df[col].index, y=normed_norm, name=col), secondary_y=True)
    else:
        fig.add_trace(go.Scatter(x=calced_df[col].index, y=calced_df[col].values, name=col))
    
fig.show()

In [86]:
columns = ["Open Time", "Open", "High", "Low", "Close", "Volume",
               "Close Time", "Quote Asset Volume", "Number of Trades",
               "Taker Buy Base Volume", "Taker Buy Quote Asset Volume",
               "Ignore"]

start, end = data_df.index[0], data_df.index[-1]

klines = client.get_historical_klines("BTCUSDT", "1h", start.strftime("%d %b, %Y"), end.strftime("%d %b, %Y"))
klines = [[float(i) for i in line] for line in klines ]
k_df = pd.DataFrame(klines, columns = columns, index=pd.date_range(
    start=start.strftime("%m/%d/%Y"), 
    end=end.strftime("%m/%d/%Y"),
    freq = "1H"
    )
)
df = k_df
indicator_bb = BollingerBands(close=df["Close"], window=20, window_dev=2)

# Add Bollinger Bands features
df['bb_bbm'] = indicator_bb.bollinger_mavg()
df['bb_bbh'] = indicator_bb.bollinger_hband()
df['bb_bbl'] = indicator_bb.bollinger_lband()

# Add Bollinger Band high indicator
df['bb_bbhi'] = indicator_bb.bollinger_hband_indicator()

# Add Bollinger Band low indicator
df['bb_bbli'] = indicator_bb.bollinger_lband_indicator()

fig = go.Figure(data=[go.Candlestick(x=df.index,
                open=df['Open'], high=df['High'],
                low=df['Low'], close=df['Close'])
                     ])
# fig.add_trace(go.scatter(x = [50:], y = [50:], secondary))
fig.add_trace(go.Scatter(x=df.index, y=df['bb_bbm']))
fig.add_trace(go.Scatter(x=df.index, y=df['bb_bbh']))
fig.add_trace(go.Scatter(x=df.index, y=df['bb_bbl']))
fig.update_layout(xaxis_rangeslider_visible=False)

fig.update_layout(
    width=1300,
    hovermode="x"
)


fig.show()