In [22]:
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import time, datetime
import plotly.io as pio
from datetime import datetime
# pio.renderers.default = "notebook"
pio.renderers.default = "browser"


In [6]:
# pio.renderers

Renderers configuration
-----------------------
    Default renderer: 'vscode'
    Available renderers:
        ['plotly_mimetype', 'jupyterlab', 'nteract', 'vscode',
         'notebook', 'notebook_connected', 'kaggle', 'azure', 'colab',
         'cocalc', 'databricks', 'json', 'png', 'jpeg', 'jpg', 'svg',
         'pdf', 'browser', 'firefox', 'chrome', 'chromium', 'iframe',
         'iframe_connected', 'sphinx_gallery', 'sphinx_gallery_png']

In [2]:
filePath = './data/EURUSD_1.csv'
data = pd.read_csv(filePath)
data['ts'] = pd.to_datetime(data.ts)
print(f'data.shape = {data.shape}')

data.head()

data.shape = (121674, 6)


Unnamed: 0,open,high,low,close,vol,ts
0,1.05135,1.05192,1.05134,1.05187,65,2017-01-02 09:00:00
1,1.05185,1.05198,1.05153,1.05168,58,2017-01-02 09:01:00
2,1.05175,1.05201,1.05149,1.05154,33,2017-01-02 09:02:00
3,1.05177,1.05202,1.05177,1.052,20,2017-01-02 09:03:00
4,1.05202,1.05202,1.05198,1.05202,7,2017-01-02 09:04:00


In [8]:
df = data.copy()

fig = go.Figure(data  = [go.Candlestick(
            x=df['ts'],
            open  = df['open'],
            high  = df['high'],
            low   = df['low'],
            close = df['close']
        )])

fig.show()

In [19]:
df.head(2)

Unnamed: 0_level_0,open,high,low,close,vol
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-02 09:00:00,1.05135,1.05192,1.05134,1.05187,65
2017-01-02 09:01:00,1.05185,1.05198,1.05153,1.05168,58


In [21]:
def vol_count(value):
    return (value.median() + value.mean()) / 2


conversion = {
    "open": "median",
    "close": "mean",
    "vol": vol_count,
    "high": "max",
    "low": "min",
}

df = df.set_index("ts")

downsampled = df.resample("8H").apply(conversion)
df = df.reset_index()
downsampled = downsampled.reset_index()

print(downsampled[downsampled.ts == "2017-01-02 16:00:00"].open.round(5))
print(downsampled[downsampled.ts == "2017-01-02 16:00:00"].close.round(5))
print(downsampled[downsampled.ts == "2017-01-04 00:00:00"].high.round(5))
print(downsampled[downsampled.ts == "2017-01-04 00:00:00"].low.round(5))
print(downsampled[downsampled.ts == "2017-01-05 08:00:00"].vol.round(1))


1    1.04649
Name: open, dtype: float64
1    1.04637
Name: close, dtype: float64
5    1.04236
Name: high, dtype: float64
5    1.03898
Name: low, dtype: float64
9    481.3
Name: vol, dtype: float64


In [28]:
df_filtered = df[df.ts <= datetime(2017, 1, 3, 0, 0, 0)]
df_filtered.tail()

df_filtered = df_filtered.set_index("ts")

upsampled = df_filtered.resample("10S").apply(conversion)
df_filtered = df_filtered.reset_index()
upsampled = upsampled.reset_index()


In [30]:
upsampled.open.isna().sum()

4508

In [32]:
upsampled.open.interpolate(method="linear", inplace=True)


In [33]:
upsampled[upsampled.ts==datetime(2017,1,2,9,0,40)].open

4    1.051683
Name: open, dtype: float64

In [None]:
upsampled.close.interpolate(method="nearest", inplace=True)


In [35]:
upsampled[upsampled.ts==datetime(2017,1,2,9,0,40)].close.round(5)


4    1.05168
Name: close, dtype: float64

In [40]:
df = df[(df.ts >= "2017-01-05 12:00:00") & (df.ts <= "2017-01-05 13:00:00")]
df["unixtime"] = df.ts.astype(np.int64)

fig = go.Figure(
    data=[
        go.Candlestick(
            x=df["ts"],
            open=df["open"],
            high=df["high"],
            low=df["low"],
            close=df["close"],
        )
    ]
)

# fig.show()

df.head()

Unnamed: 0,ts,open,high,low,close,vol,unixtime
4491,2017-01-05 12:00:00,1.05044,1.05052,1.05032,1.05048,530,1483617600000000000
4492,2017-01-05 12:01:00,1.05048,1.05053,1.05031,1.05046,447,1483617660000000000
4493,2017-01-05 12:02:00,1.05046,1.0508,1.05046,1.05075,516,1483617720000000000
4494,2017-01-05 12:03:00,1.05075,1.05093,1.05063,1.05087,477,1483617780000000000
4495,2017-01-05 12:04:00,1.05087,1.05118,1.05087,1.05101,456,1483617840000000000


In [41]:
ti = np.linspace(df.unixtime.min(), df.unixtime.max(), 135)


In [45]:
np.interp(ti, df.unixtime, df.close).mean().round(6)

1.050295