In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import mom_trans.changepoint_detection as cpd_gp
from mom_trans.data_prep import calc_returns
from data.pull_data import pull_openbb_sample_data
import time

# Load sample data
ticker = "ES"
data = pull_openbb_sample_data(ticker)
data = data.iloc[:500]
data["daily_returns"] = calc_returns(data["close"])

# Run both implementations with timing
lookback_window = 21
output_gp = f"gp_results{lookback_window}.csv"
output_rupt = f"ruptures_results{lookback_window}.csv"
output_bayes = f"bayes_results{lookback_window}.csv"

# Time the GP implementation
print("Running GP implementation...")
start_time_gp = time.time()
cpd_gp.run_module(data, lookback_window, output_gp)
end_time_gp = time.time()
gp_runtime = end_time_gp - start_time_gp
print(f"GP Implementation runtime: {gp_runtime:.2f} seconds")


# Compare results
gp_results = pd.read_csv(output_gp)

# Print timing comparison
print(f"GP Implementation runtime: {gp_runtime:.2f} seconds")




No GPU found. Running on CPU
Running GP implementation...


processing windows:   0%|          | 0/48 [00:00<?, ?batch/s]

Processing batch 3/48


processing windows:   2%|▏         | 1/48 [00:20<15:59, 20.41s/batch]

Processing batch 4/48


processing windows:   4%|▍         | 2/48 [00:39<15:06, 19.72s/batch]

Processing batch 5/48


processing windows:   6%|▋         | 3/48 [00:59<14:56, 19.93s/batch]

Processing batch 6/48


processing windows:   8%|▊         | 4/48 [01:21<15:08, 20.65s/batch]

Processing batch 7/48


processing windows:  10%|█         | 5/48 [01:43<15:01, 20.97s/batch]

Processing batch 8/48


processing windows:  12%|█▎        | 6/48 [02:07<15:24, 22.02s/batch]

Processing batch 9/48


processing windows:  15%|█▍        | 7/48 [02:31<15:37, 22.88s/batch]

Processing batch 10/48


processing windows:  17%|█▋        | 8/48 [02:56<15:38, 23.47s/batch]

Processing batch 11/48


processing windows:  19%|█▉        | 9/48 [03:24<16:12, 24.92s/batch]

Processing batch 12/48


processing windows:  21%|██        | 10/48 [03:53<16:29, 26.04s/batch]

Processing batch 13/48


processing windows:  23%|██▎       | 11/48 [04:21<16:31, 26.79s/batch]

Processing batch 14/48


processing windows:  25%|██▌       | 12/48 [04:51<16:40, 27.79s/batch]

Processing batch 15/48


Exception ignored in: <function AtomicFunction.__del__ at 0x00000282BDC13910>
Traceback (most recent call last):
  File "c:\Users\cerva\miniconda3\lib\site-packages\tensorflow\python\eager\polymorphic_function\atomic_function.py", line 287, in __del__
    if self._generated_graph:
KeyboardInterrupt: 


In [1]:
import os


def read_processed_tickers(file_path):
    """
    Safely read the processed tickers file without disrupting ongoing writes.
    Returns a set of processed ticker symbols.
    """
    processed_tickers = set()

    # Check if file exists first
    if not os.path.exists(file_path):
        return processed_tickers

    try:
        # Try a few times in case the file is being written to
        max_attempts = 3
        for attempt in range(max_attempts):
            try:
                with open(file_path, "r") as f:
                    return set(line.strip() for line in f if line.strip())
            except (IOError, PermissionError) as e:
                # If there's a file access issue, wait briefly and retry
                if attempt < max_attempts - 1:
                    print(
                        f"Attempt {attempt+1} to read {file_path} failed: {e}. Retrying..."
                    )
                    time.sleep(0.5)
                else:
                    print(
                        f"Failed to read processed tickers after {max_attempts} attempts: {e}"
                    )
                    raise
    except Exception as e:
        print(f"Error reading processed tickers file: {e}")

    return processed_tickers

In [4]:
from settings.default import CPD_OPENBB_OUTPUT_FOLDER

# Example usage:
lbw = 126  # Your lookback window length
progress_file = os.path.join(
    CPD_OPENBB_OUTPUT_FOLDER(lbw), "processed_tickers.txt"
)
processed_tickers = read_processed_tickers(progress_file)
print(f"Currently processed {len(processed_tickers)} tickers")
print(f"Processed tickers: {processed_tickers}")

Currently processed 2 tickers
Processed tickers: {'6J', '6C'}
