In [1]:
import pandas as pd
import numpy as np
import warnings
import boto3
import re
from tqdm import tqdm
warnings.filterwarnings("ignore")

### data

In [2]:
client = boto3.client('s3')

In [3]:
s3_prefix = 's3://'
bucket = 'sisyphus-general-bucket'
primary_folder = 'AthenaInsights'

In [4]:
response = client.list_objects_v2(
    Bucket=bucket,
    Prefix=f'{primary_folder}/latest_data/feature_prep/')

In [30]:
paths = []
for content in response.get('Contents', []):
    # print(f"{s3_prefix}{bucket}/{content['Key']}")
    k = content['Key']
    if ('base' in k or 'diff' in k) and ('rsi' not in k and 'macd' not in k):
        paths.append(f"{s3_prefix}{bucket}/{content['Key']}")

In [31]:
paths

['s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_100D_base.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_100D_diff.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10D_base.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10D_diff.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10min_base.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10min_diff.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_120min_base.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_120min_diff.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_150D_base.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_b

In [32]:
def calculate_macd(df, signal=14, ema_columns=[]):
    for i in range(len(ema_columns)):
        for j in range(i + 1, len(ema_columns)):
            fast_ema = ema_columns[i]
            slow_ema = ema_columns[j]

            # Calculate MACD
            # macd_col_name = f'MACD_{fast_ema}_{slow_ema}'
            # df[macd_col_name] = df[fast_ema] - df[slow_ema]

            # Calculate Signal line
            signal_col_name = f'Signal_{fast_ema}_{slow_ema}_signal{signal}'
            # df[signal_col_name] = df[macd_col_name].ewm(span=signal, adjust=False).mean()
            df[signal_col_name] = (df[fast_ema] - df[slow_ema]).ewm(span=signal, adjust=False).mean()
            

            # # Calculate Histogram
            # histogram_col_name = f'Histogram_{fast_ema}_{slow_ema}_signal{signal}'
            # df[histogram_col_name] = df[macd_col_name] - df[signal_col_name]
    return df


def read_and_calculate_macd(path, signals):
    print(f'Reading from {path}')
    df = pd.read_parquet(path)
    fields = [z for z in df.columns if 'close_ema' in z or 'close_diff_ema' in z]
    df = df[['symbol'] + fields]
    grouped = df.groupby('symbol')
    results = []

    for symbol, group in grouped:
        ema_columns = [z for z in group.columns if z!='symbol']
        ema_columns = sorted(ema_columns, key=lambda x: int(re.search(r'\d+', x).group()))
        for signal in signals:
            print(f'for signal = {signal}')
            group = calculate_macd(group, signal, ema_columns)
        results.append(group)

    # Concatenate all the grouped results back into a single DataFrame
    df = pd.concat(results)
    loc = path.replace('.parquet', '_macd.parquet')
    print(f'Saving to {loc}')
    df.to_parquet(loc)
    del df, group, results


for path in tqdm(paths):
    base_time_interval = path.split('/')[-1].split('.')[0].split('_')[-2]
    if 'min' in base_time_interval:
        if int(base_time_interval.replace('min', ''))<=10:
            signal = [11, 13, 17, 20, 26]
        elif int(base_time_interval.replace('min', ''))<=30:
            signal = [11, 13, 17, 20, 26]
        else:
            signal = [11, 13, 17, 20, 26, 30, 50]
    elif 'D' in base_time_interval:
        if int(base_time_interval.replace('D', '')) < 100:
            signal = [11, 13, 17, 20, 26]
        else: 
            signal = [11, 13, 17, 20, 26, 30, 50]
    read_and_calculate_macd(path, signal)

  0%|          | 0/50 [00:00<?, ?it/s]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_100D_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_100D_base_macd.parquet


  2%|▏         | 1/50 [00:01<01:07,  1.37s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_100D_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_100D_diff_macd.parquet


  4%|▍         | 2/50 [00:02<01:01,  1.28s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10D_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10D_base_macd.parquet


  6%|▌         | 3/50 [00:04<01:07,  1.43s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10D_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10D_diff_macd.parquet


  8%|▊         | 4/50 [00:05<01:09,  1.50s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10min_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10min_base_macd.parquet


 10%|█         | 5/50 [00:18<04:12,  5.60s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10min_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10min_diff_macd.parquet


 12%|█▏        | 6/50 [00:30<05:34,  7.60s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_120min_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_120min_base_macd.parquet


 14%|█▍        | 7/50 [00:33<04:31,  6.32s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_120min_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_120min_diff_macd.parquet


 16%|█▌        | 8/50 [00:37<03:45,  5.36s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_150D_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_150D_base_macd.parquet


 18%|█▊        | 9/50 [00:38<02:46,  4.05s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_150D_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_150D_diff_macd.parquet


 20%|██        | 10/50 [00:39<02:04,  3.11s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_15D_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_15D_base_macd.parquet


 22%|██▏       | 11/50 [00:41<01:44,  2.67s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_15D_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_15D_diff_macd.parquet


 24%|██▍       | 12/50 [00:42<01:29,  2.36s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_15min_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_15min_base_macd.parquet


 26%|██▌       | 13/50 [00:50<02:27,  3.99s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_15min_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_15min_diff_macd.parquet


 28%|██▊       | 14/50 [00:58<03:03,  5.09s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_180min_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_180min_base_macd.parquet


 30%|███       | 15/50 [01:01<02:36,  4.48s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_180min_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_180min_diff_macd.parquet


 32%|███▏      | 16/50 [01:04<02:16,  4.01s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_1D_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_1D_base_macd.parquet


 34%|███▍      | 17/50 [01:05<01:52,  3.40s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_1D_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_1D_diff_macd.parquet


 36%|███▌      | 18/50 [01:08<01:35,  2.99s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_1min_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_1min_base_macd.parquet


 38%|███▊      | 19/50 [01:32<04:51,  9.40s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_1min_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_1min_diff_macd.parquet


 40%|████      | 20/50 [01:53<06:24, 12.81s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_200D_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_200D_base_macd.parquet


 42%|████▏     | 21/50 [01:54<04:29,  9.29s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_200D_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_200D_diff_macd.parquet


 44%|████▍     | 22/50 [01:55<03:11,  6.84s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_20D_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_20D_base_macd.parquet


 46%|████▌     | 23/50 [01:57<02:23,  5.31s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_20D_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_20D_diff_macd.parquet


 48%|████▊     | 24/50 [01:58<01:50,  4.26s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_20min_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_20min_base_macd.parquet


 50%|█████     | 25/50 [02:07<02:21,  5.66s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_20min_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_20min_diff_macd.parquet


 52%|█████▏    | 26/50 [02:13<02:17,  5.75s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_240min_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_240min_base_macd.parquet


 54%|█████▍    | 27/50 [02:16<01:51,  4.85s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_240min_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_240min_diff_macd.parquet


 56%|█████▌    | 28/50 [02:18<01:30,  4.12s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_25min_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_25min_base_macd.parquet


 58%|█████▊    | 29/50 [02:26<01:46,  5.09s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_25min_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_25min_diff_macd.parquet


 60%|██████    | 30/50 [02:32<01:45,  5.29s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_2D_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_2D_base_macd.parquet


 62%|██████▏   | 31/50 [02:33<01:20,  4.21s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_2D_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_2D_diff_macd.parquet


 64%|██████▍   | 32/50 [02:35<01:02,  3.49s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_2min_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_2min_base_macd.parquet


 66%|██████▌   | 33/50 [03:06<03:19, 11.76s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_2min_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_2min_diff_macd.parquet


 68%|██████▊   | 34/50 [03:46<05:21, 20.08s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_30D_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_30D_base_macd.parquet


 70%|███████   | 35/50 [03:47<03:37, 14.53s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_30D_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_30D_diff_macd.parquet


 72%|███████▏  | 36/50 [03:49<02:30, 10.75s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_30min_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_30min_base_macd.parquet


 74%|███████▍  | 37/50 [03:55<02:00,  9.26s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_30min_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_30min_diff_macd.parquet


 76%|███████▌  | 38/50 [04:00<01:35,  7.92s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_3D_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_3D_base_macd.parquet


 78%|███████▊  | 39/50 [04:01<01:06,  6.06s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_3D_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_3D_diff_macd.parquet


 80%|████████  | 40/50 [04:03<00:47,  4.79s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_3min_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_3min_base_macd.parquet


 82%|████████▏ | 41/50 [04:27<01:35, 10.60s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_3min_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_3min_diff_macd.parquet


 84%|████████▍ | 42/50 [04:49<01:50, 13.86s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_50D_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_50D_base_macd.parquet


 86%|████████▌ | 43/50 [04:50<01:11, 10.19s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_50D_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_50D_diff_macd.parquet


 88%|████████▊ | 44/50 [04:53<00:46,  7.79s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_5D_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_5D_base_macd.parquet


 90%|█████████ | 45/50 [04:55<00:30,  6.03s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_5D_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_5D_diff_macd.parquet


 92%|█████████▏| 46/50 [04:56<00:19,  4.78s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_5min_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_5min_base_macd.parquet


 94%|█████████▍| 47/50 [05:13<00:24,  8.32s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_5min_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_5min_diff_macd.parquet


 96%|█████████▌| 48/50 [05:30<00:22, 11.05s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_60min_base.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_60min_base_macd.parquet


 98%|█████████▊| 49/50 [05:33<00:08,  8.64s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_60min_diff.parquet
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
for signal = 11
for signal = 13
for signal = 17
for signal = 20
for signal = 26
for signal = 30
for signal = 50
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_60min_diff_macd.parquet


100%|██████████| 50/50 [05:37<00:00,  6.76s/it]


### eda

In [None]:
# y = pd.read_parquet("s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_1min_base.parquet")
# y = pd.read_parquet("s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_1min_base.parquet")

In [2]:
x = pd.read_parquet("s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_2min_base_macd.parquet")
x.loc['2024-11-19 16:50:00']
x.shape # (1160890, 149)
x = x.reset_index()[['us_eastern_timestamp', 'symbol']]

In [3]:
y = pd.read_parquet("s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_1min_base_macd.parquet")
y.loc['2024-11-19 16:50:00']
y.shape # (392417, 149)
y = y.reset_index()[['us_eastern_timestamp', 'symbol']]

In [4]:
x[x.symbol=='SPY']

Unnamed: 0,us_eastern_timestamp,symbol
696460,2024-01-02 04:00:00,SPY
696461,2024-01-02 04:02:00,SPY
696462,2024-01-02 04:04:00,SPY
696463,2024-01-02 04:06:00,SPY
696464,2024-01-02 04:08:00,SPY
...,...,...
928775,2024-11-19 19:50:00,SPY
928776,2024-11-19 19:52:00,SPY
928777,2024-11-19 19:54:00,SPY
928778,2024-11-19 19:56:00,SPY


In [5]:
y[y.symbol=='SPY']

Unnamed: 0,us_eastern_timestamp,symbol
198636,2024-01-02 04:00:00,SPY
198637,2024-01-02 04:01:00,SPY
198638,2024-01-02 04:02:00,SPY
198639,2024-01-02 04:03:00,SPY
198640,2024-01-02 04:05:00,SPY
...,...,...
374553,2024-11-19 19:54:00,SPY
374554,2024-11-19 19:56:00,SPY
374555,2024-11-19 19:57:00,SPY
374556,2024-11-19 19:58:00,SPY


In [None]:
del x
# 175922 232320 

In [8]:
# y[(y.symbol=='SPY')&(~y.us_eastern_timestamp.isin(x[x.symbol=='SPY'].us_eastern_timestamp))]
x[(x.symbol=='SPY')&(~x.us_eastern_timestamp.isin(y[y.symbol=='SPY'].us_eastern_timestamp))]

Unnamed: 0,us_eastern_timestamp,symbol
696462,2024-01-02 04:04:00,SPY
696467,2024-01-02 04:14:00,SPY
696469,2024-01-02 04:18:00,SPY
696470,2024-01-02 04:20:00,SPY
696471,2024-01-02 04:22:00,SPY
...,...,...
928766,2024-11-19 19:32:00,SPY
928767,2024-11-19 19:34:00,SPY
928768,2024-11-19 19:36:00,SPY
928769,2024-11-19 19:38:00,SPY


In [10]:
y[y.symbol=='SPY'].tail(30)

Unnamed: 0,us_eastern_timestamp,symbol
374528,2024-11-19 18:26:00,SPY
374529,2024-11-19 18:27:00,SPY
374530,2024-11-19 18:28:00,SPY
374531,2024-11-19 18:30:00,SPY
374532,2024-11-19 18:31:00,SPY
374533,2024-11-19 18:33:00,SPY
374534,2024-11-19 18:34:00,SPY
374535,2024-11-19 18:37:00,SPY
374536,2024-11-19 18:39:00,SPY
374537,2024-11-19 18:40:00,SPY
