In [None]:
#!pip install pandas pyarrow scipy

In [None]:
import numpy as np
import sys, os
import pandas as pd
import plotly.express as px
import random
from tqdm import tqdm

from data_downloader import download, get_filename
from pattern_analysis import get_alpha_lambda, get_rmse, create_window
from pattern_searcher import PatternSearcher

In [None]:
ticker = 'AAPL'
interval = '5s'
year = '2024'
# Set the random seed for reproducibility
random.seed(42)

In [None]:
# download data to the file on disk.
filename = get_filename(ticker, interval, year)
if not os.path.exists(filename):
	download(ticker, interval, year)
# read data from the file on disk.
filename = get_filename(ticker, interval, year)
df = pd.read_parquet(filename)

# show data.
# fig = px.line(df[-1000:], y='open', title=f'{ticker} Open Prices')
# fig.show()
# total_bars = df.shape[0]
# print(f'Total bars: {total_bars} ({total_bars:,})')

In [None]:
m = 60 # window size

In [None]:
# Instantiate the optimized PatternSearcher with the 'open' column and fixed template length
searcher = PatternSearcher(df['open'], template_length=m)

# # Print searcher statistics
# print("PatternSearcher Statistics:")
# stats = searcher.get_stats()
# for key, value in stats.items():
#     print(f"  {key}: {value}")

In [None]:
N = 100
correlations = []
# get correlations for N randomly sampled patterns
for _ in tqdm(range(N)):
    start_index = random.randrange(0, len(df) - m)
    pattern = create_window(df, start_index, m)
    correlations.append({
        'start_index': start_index,
        'similar': searcher.get_rs_above(pattern, 0.97)
    })


In [None]:
# total_analyzed = 0
# for corr in correlations:
#     print(f'index = {corr["start_index"]}, similar patterns: {len(corr["similar"])}')
#     total_analyzed += len(corr['similar'])
# print(f'Total analyzed points: {total_analyzed}')