In [1]:
import os
import pandas as pd
import numpy as np
from math import ceil

In [2]:
def load_correlations(directory="correlation"):
    correlations = {}

    for file in os.listdir(directory):
        if file.endswith(".csv"):
            feature = file.replace("correlation_matrix_", "").replace(".csv", "")
            path = os.path.join(directory, file)
            try:
                correlations[feature] = pd.read_csv(path, index_col=0)
            except Exception as e:
                print(f"Error loading correlation {file}: {e}")

    return correlations


def load_fft(directory="fft"):
    fft = {}

    for file in os.listdir(directory):
        if file.endswith(".csv"):
            feature = file.replace("fft_matrix_", "").replace(".csv", "")
            path = os.path.join(directory, file)
            try:
                fft[feature] = pd.read_csv(path, index_col=0)
            except Exception as e:
                print(f"Error loading FFT {file}: {e}")

    return fft


In [3]:
def calculate_samples(corr_df: pd.DataFrame | None, fft_df: pd.DataFrame) -> pd.DataFrame:
    samples_df = pd.DataFrame(index=fft_df.index, columns=fft_df.columns, dtype=float)
    for col in fft_df.columns:
        for idx in fft_df.index:
            fft_val = fft_df.at[idx, col]
            if pd.isna(fft_val) or fft_val == 0:
                samples_df.at[idx, col] = np.nan
                continue
            nyquist_fft = ceil(2 / fft_val)
            if corr_df is None:
                samples_df.at[idx, col] = nyquist_fft
                continue
            corr_val = corr_df.at[idx, col] if col in corr_df.columns else np.nan
            if pd.isna(corr_val) or corr_val == 0:
                samples_df.at[idx, col] = nyquist_fft
                continue
            nyquist_corr = ceil(2 / corr_val)
            samples_df.at[idx, col] = max(nyquist_corr, nyquist_fft)
    return samples_df

def save_samples_files(corr_dict, fft_dict, output_dir="samples"):
    os.makedirs(output_dir, exist_ok=True)
    for feature, corr_df in corr_dict.items():
        fft_df = fft_dict.get(feature)
        if fft_df is None:
            print(f"Missing FFT data for feature '{feature}', skipping.")
            continue
        samples_df = calculate_samples(corr_df, fft_df)
        file_path = os.path.join(output_dir, f"samples_{feature}.csv")
        samples_df.to_csv(file_path, index=True)
        print(f"Saved {file_path}")
    # בדיקה של FFT ללא קורלציה (corr_df=None)
    for feature, fft_df in fft_dict.items():
        if feature not in corr_dict:
            samples_df = calculate_samples(None, fft_df)
            file_path = os.path.join(output_dir, f"samples_{feature}.csv")
            samples_df.to_csv(file_path, index=True)
            print(f"Saved (no corr) {file_path}")

In [4]:
corr_dict = load_correlations()
fft_dict = load_fft()


In [5]:
save_samples_files(corr_dict, fft_dict)

Saved samples\samples_close.csv
Saved samples\samples_high.csv
Saved samples\samples_low.csv
Saved samples\samples_open.csv
Saved (no corr) samples\samples_turnover.csv
Saved (no corr) samples\samples_volume.csv
