In [13]:
import os
import numpy as np
from MyKalmanFilter import MyKalmanFilter
from datetime import datetime
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd


Create dataframe with 'SnapshotTime','openPrices', 'highPrices', 'lowPrices', 'closedprices' series

In [21]:
data_dir = 'data'

# List all JSON files in the directory
json_files = [f for f in os.listdir(data_dir) if f.endswith('.json')]

# Initialize an empty list to store DataFrames
df_list = []

# Read each JSON file and append the DataFrame to the list
for json_file in json_files:
    file_path = os.path.join(data_dir, json_file)
    df = pd.read_json(file_path)
    df_list.append(df)

# Concatenate all DataFrames into a single DataFrame
df_org = pd.concat(df_list, ignore_index=True)


def compute_average_ask_bid(price_data):
    if( price_data['Ask'] is not None and price_data['Bid'] is not None):
        return (price_data['Ask'] + price_data['Bid']) / 2
    else:
        return None

df_org['openPrices'] = [compute_average_ask_bid(OpenPrice) for OpenPrice in df_org["OpenPrice"]];
df_org['highPrices'] = [compute_average_ask_bid(HighPrice) for HighPrice in df_org["HighPrice"]];
df_org['lowPrices'] = [compute_average_ask_bid(LowPrice) for LowPrice in df_org["LowPrice"]];
df_org['closedprices'] = [compute_average_ask_bid(closedprice) for closedprice in df_org["ClosePrice"]];

## Drop rows with None values
df_org.dropna(subset=['closedprices', 'highPrices', 'lowPrices', 'openPrices'], inplace=True)

data = df_org[['SnapshotTime','openPrices', 'highPrices', 'lowPrices', 'closedprices']].copy()
data.rename(columns={'SnapshotTime': 'snapshotTime'}, inplace=True)
data

Unnamed: 0,snapshotTime,openPrices,highPrices,lowPrices,closedprices
0,2024/11/18 08:00:00,7294.7,7295.5,7287.7,7289.0
1,2024/11/18 08:01:00,7288.0,7288.0,7281.7,7281.7
2,2024/11/18 08:02:00,7282.2,7282.2,7276.7,7277.0
3,2024/11/18 08:03:00,7277.2,7282.2,7277.2,7282.2
4,2024/11/18 08:04:00,7282.0,7282.0,7278.2,7278.7
...,...,...,...,...,...
48252,2025/02/14 21:56:00,8177.3,8177.8,8177.1,8177.8
48253,2025/02/14 21:57:00,8177.6,8177.6,8176.6,8176.6
48254,2025/02/14 21:58:00,8176.3,8177.1,8175.8,8176.8
48255,2025/02/14 21:59:00,8176.6,8176.8,8173.8,8173.8


Init kalman filter and compute slow kalman and speed kalman

In [22]:
kspeed = MyKalmanFilter(transition_matrix=1, 
                    observation_matrix=1, 
                    initial_state_mean=0, 
                    initial_state_covariance=1, 
                    observation_covariance=1, 
                    transition_covariance=0.01)

kslow = MyKalmanFilter(transition_matrix=1, 
                    observation_matrix=1, 
                    initial_state_mean=0, 
                    initial_state_covariance=1, 
                    observation_covariance=1, 
                    transition_covariance=0.001)

kspeed_filter, kspeed_prediction = kspeed.filter(df_org['closedprices']);
kslow_filter, kslow_prediction = kslow.filter(df_org['closedprices']);

data['kspeed_filter'] = kspeed_filter
data['kslow_filter'] = kslow_filter

In [23]:
data

Unnamed: 0,snapshotTime,openPrices,highPrices,lowPrices,closedprices,kspeed_filter,kslow_filter
0,2024/11/18 08:00:00,7294.7,7295.5,7287.7,7289.0,3662.631841,3646.321339
1,2024/11/18 08:01:00,7288.0,7288.0,7281.7,7281.7,4888.907987,4860.131998
2,2024/11/18 08:02:00,7282.2,7282.2,7276.7,7277.0,5506.518422,5466.460591
3,2024/11/18 08:03:00,7277.2,7282.2,7277.2,7282.2,5882.505561,5831.782578
4,2024/11/18 08:04:00,7282.0,7282.0,7278.2,7278.7,6135.910488,6075.139369
...,...,...,...,...,...,...,...
48252,2025/02/14 21:56:00,8177.3,8177.8,8177.1,8177.8,8176.013317,8176.481737
48253,2025/02/14 21:57:00,8177.6,8177.6,8176.6,8176.6,8176.069125,8176.485418
48254,2025/02/14 21:58:00,8176.3,8177.1,8175.8,8176.8,8176.138650,8176.495210
48255,2025/02/14 21:59:00,8176.6,8176.8,8173.8,8173.8,8175.916186,8176.411317
