In [1]:
#!pip install --upgrade pip
#!pip install pandas pyarrow scipy

In [2]:
import os
import requests
from tqdm import tqdm
import pandas as pd
import plotly.express as px

from data_downloader import download, get_filename
from pattern_analysis import get_alpha_lambda, get_rmse, create_window
from pattern_searcher import PatternSearcher

In [3]:
ticker = 'AAPL'
interval = '5s'
year = '2024'
m = 60 # window size

In [4]:
filename = get_filename(ticker, interval, year)
# If file doesn't exist, then download it.
if not os.path.exists(filename):
	download(ticker, interval, year)

filename = get_filename(ticker, interval, year)
df = pd.read_parquet(filename)

# fig = px.line(df[-1000:], y='open', title=f'{ticker} Open Prices')
# fig.show()

total_bars = df.shape[0]
print(f'Total bars: {total_bars} ({total_bars:,})')

Total bars: 1428355 (1,428,355)


In [5]:
a0 = create_window(df, 42, m)
a = create_window(df, 285070, m)

In [6]:
# plot the original data without re-scalling.

import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create subplots
fig = make_subplots(
    rows=2, cols=2,
    specs=[[{"rowspan": 2}, {"rowspan": 1}],
           [None, {}]],
    subplot_titles=("Shared Y-axis", "a0 (Separate Y-axis)", "a (Separate Y-axis)")
)

# Plot on the left (shared y-axis)
fig.add_trace(go.Scatter(y=a0, name='a0'), row=1, col=1)
fig.add_trace(go.Scatter(y=a, name='a'), row=1, col=1)

# Plot on the right (separate y-axes)
fig.add_trace(go.Scatter(y=a0, name='a0'), row=1, col=2)
fig.add_trace(go.Scatter(y=a, name='a'), row=2, col=2)

fig.update_layout(margin=dict(b=100))  # Увеличь отступ снизу

fig.add_annotation(
    text="Fig. 1. Here a and a0 are plotted on the same Y scale.",
    xref="paper", yref="paper",
    x=0, y=-0.20,
    showarrow=False,
    font=dict(size=16),
)

fig.add_annotation(
    text="Fig. 2. Here a and a0 are plotted each on it's own Y scale.",
    xref="paper", yref="paper",
    x=0.8, y=-0.20,
    showarrow=False,
    font=dict(size=16),
)
fig.show()

In [7]:
# now rescale.

import numpy as np

def plot_rescaled(a0, a):
    alpha, lmbda = get_alpha_lambda(a0, a)
    print(f'alpha = {alpha}')
    print(f'lambda = {lmbda}')

    # Apply the transformation
    a_transformed = alpha * a + lmbda

    # Plot the original and transformed data
    import plotly.graph_objects as go

    fig = go.Figure()
    fig.add_trace(go.Scatter(y=a0, name='a0'))
    fig.add_trace(go.Scatter(y=a_transformed, name='a (transformed)'))
    fig.add_annotation(
        text="Fig. 3. Here a is rescaled to match a0. NOTE: negative alpha flips a.",
        xref="paper", yref="paper",
        x=0.5, y=-0.20,
        showarrow=False,
        font=dict(size=16),
    )
    fig.show()

plot_rescaled(a0, a)


alpha = 0.2034997746541522
lambda = 154.70890889332452


In [None]:
from scipy import signal
import random
from tqdm import tqdm

# Instantiate the optimized PatternSearcher with the 'open' column of the DataFrame
searcher = PatternSearcher(df['open'], m)
# Set the random seed for reproducibility
random.seed(42)
correlations = []

In [None]:
N = 100
# get correlations for N randomly sampled patterns
for _ in tqdm(range(N)):
    start_index = random.randrange(0, len(df) - m)
    pattern = create_window(df, start_index, m)
    correlations.append({
        'start_index': start_index,
        'top_10k_r': searcher.get_sorted_r(pattern, 10000)
    })


In [None]:
# take slice_point-th correlation in each experiment, sort them, plot
slice_point = 9900
correlations.sort(key=lambda x: abs(x['top_10k_r'][slice_point][1]), reverse=True)

y = []
for corr in correlations:
    y.append(abs(corr['top_10k_r'][slice_point][1]))

fig = go.Figure()
fig.add_trace(go.Scatter(y=y, name=f'slice_point-th r values sorted'))
fig.show()

In [None]:
import plotly.express as px

# Extract the 1000th correlation value from each experiment
y = [abs(corr['top_10k_r'][slice_point][1]) for corr in correlations]

# Create the histogram
fig = px.histogram(x=y, nbins=40, title='Distribution of the 1000th Correlation Value')
fig.update_layout(
    xaxis_title="Absolute Correlation Value",
    yaxis_title="Frequency",
)
fig.show()

In [None]:
for c in correlations[:2]:
  a = create_window(df, c['start_index'], m)
  fig = go.Figure()
  fig.add_trace(go.Scatter(y=a, name='a'))
  fig.show()

In [None]:
for c in correlations[1]['top_10k_r'][:10]:
  print(c)
  a = create_window(df, c[0], m)
  fig = go.Figure()
  fig.add_trace(go.Scatter(y=a, name='a'))
  fig.show()