In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter

import os

In [2]:
from pathlib import Path

# Configurable parts
SYMBOL = "USDJPY"
SAMPLE_TYPE = "time"
MINUTES = 1
DOLLAR_THRESHOLD = "115009542m"
EVENT_NAME = "CUSUM"
EVENT_THRESHOLD = 2.52e-04

START_DATE = "20210101"
END_DATE = "20241231"

# Build base name
if SAMPLE_TYPE == "dollar":
    RESAMPLED_NAME = f"{SYMBOL}-{DOLLAR_THRESHOLD}-dollar-{START_DATE}-{END_DATE}"
else:
    RESAMPLED_NAME = f"{SYMBOL}-{MINUTES}m-{START_DATE}-{END_DATE}"
LABEL_NAME = "TB"

# Base directories
BASE_DIR = Path("../data")
RESAMPLED_DIR = BASE_DIR / "resampled"
PROCESSED_DIR = BASE_DIR / "processed"
NORMALIZED_DIR = BASE_DIR / "normalized"
SCALER_DIR = BASE_DIR / "scalers"
EVENTS_DIR = BASE_DIR / "events"

# Final paths
RESAMPLED_FILE_PATH = RESAMPLED_DIR / f"{RESAMPLED_NAME}.pkl"
PROCESSED_FILE_PATH = PROCESSED_DIR / f"{RESAMPLED_NAME}-processed.pkl"
NORMALIZED_FILE_PATH = NORMALIZED_DIR / f"{RESAMPLED_NAME}-normalized.pkl"
STD_SCALER_PATH = SCALER_DIR / f"{RESAMPLED_NAME}_standard_scaler.pkl"
MINMAX_SCALER_PATH = SCALER_DIR / f"{RESAMPLED_NAME}_minmax_scaler.pkl"
EVENT_FILE_PATH = EVENTS_DIR / f"{RESAMPLED_NAME}_{EVENT_NAME}_{EVENT_THRESHOLD:.2e}.pkl"

In [3]:
DIRECTION_LABEL_DIR = BASE_DIR / "direction_labels"
DIRECTION_LABEL_DIR.mkdir(parents=True, exist_ok=True)
DIRECTION_LABEL_FILE_PATH = DIRECTION_LABEL_DIR / f"{RESAMPLED_NAME}-{EVENT_NAME}-{LABEL_NAME}.pkl"

In [4]:
df = pd.read_pickle(RESAMPLED_FILE_PATH)

In [None]:
labels = pd.read_pickle(DIRECTION_LABEL_FILE_PATH)

In [5]:
df.columns.to_list()

['timestamp', 'open', 'high', 'low', 'close', 'volume', 'spread']

In [None]:
df['close_above_ema50'].value_counts()

In [None]:
df['macd_diff'].describe()

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 5))
plt.hist(df['rsi14'], bins=100, alpha=0.7, color='skyblue', edgecolor='black')

plt.xlabel('Value')
plt.ylabel('Frequency')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
df1000 = df.iloc[-1000:]
plt.figure(figsize=(12,6))
plt.plot(df1000.index, df1000['close'], label="Close", color="black", linewidth=1)
plt.plot(df1000.index, df1000['dc20_upper'], label="Donchian High", color="red", linestyle="--")
plt.plot(df1000.index, df1000['dc20_lower'], label="Donchian Low", color="red", linestyle="--")

# Fill channel area
plt.fill_between(df1000.index, df1000['dc20_lower'], df1000['dc20_upper'], color="lightblue", alpha=0.2)

plt.title(f"Close Price with Donchian Channel ({20})")
plt.xlabel("Time")
plt.ylabel("Price")
plt.legend()
plt.grid(True, linestyle="--", alpha=0.5)
plt.show()