In [1]:
import pandas as pd
import numpy as np
import warnings
import boto3
import re
from tqdm import tqdm
warnings.filterwarnings("ignore")

### data

In [2]:
client = boto3.client('s3')

In [3]:
s3_prefix = 's3://'
bucket = 'sisyphus-general-bucket'
primary_folder = 'AthenaInsights'

In [4]:
response = client.list_objects_v2(
    Bucket=bucket,
    Prefix=f'{primary_folder}/latest_data/feature_prep/')

In [5]:
paths = []
for content in response.get('Contents', []):
    # print(f"{s3_prefix}{bucket}/{content['Key']}")
    k = content['Key']
    if ('base' in k or 'diff' in k) and ('rsi' not in k and 'macd' not in k):
        paths.append(f"{s3_prefix}{bucket}/{content['Key']}")

In [6]:
paths

['s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_100D_base.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_100D_diff.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10D_base.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10D_diff.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10min_base.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10min_diff.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_120min_base.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_120min_diff.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_150D_base.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_b

In [7]:
def calculate_macd(df, signal=14, ema_columns=[]):
    for i in range(len(ema_columns)):
        for j in range(i + 1, len(ema_columns)):
            fast_ema = ema_columns[i]
            slow_ema = ema_columns[j]

            # Calculate MACD
            # macd_col_name = f'MACD_{fast_ema}_{slow_ema}'
            # df[macd_col_name] = df[fast_ema] - df[slow_ema]

            # Calculate Signal line
            signal_col_name = f'Signal_{fast_ema}_{slow_ema}_signal{signal}'
            # df[signal_col_name] = df[macd_col_name].ewm(span=signal, adjust=False).mean()
            df[signal_col_name] = (df[fast_ema] - df[slow_ema]).ewm(span=signal, adjust=False).mean()
            

            # # Calculate Histogram
            # histogram_col_name = f'Histogram_{fast_ema}_{slow_ema}_signal{signal}'
            # df[histogram_col_name] = df[macd_col_name] - df[signal_col_name]
    return df


def read_and_calculate_macd(path, signals):
    print(f'Reading from {path}')
    df = pd.read_parquet(path)
    fields = [z for z in df.columns if 'close_ema' in z or 'close_diff_ema' in z]
    df = df[['symbol'] + fields]
    grouped = df.groupby('symbol')
    results = []

    for symbol, group in grouped:
        ema_columns = [z for z in group.columns if z!='symbol']
        ema_columns = sorted(ema_columns, key=lambda x: int(re.search(r'\d+', x).group()))
        print(f'symbol - {symbol} for signals = {signals}')
        for signal in signals:
            # print(f'for signal = {signal}')
            group = calculate_macd(group, signal, ema_columns)
        results.append(group)

    # Concatenate all the grouped results back into a single DataFrame
    df = pd.concat(results)
    loc = path.replace('.parquet', '_macd.parquet')
    print(f'Saving to {loc}')
    df.to_parquet(loc)
    del df, group, results


for path in tqdm(paths):
    base_time_interval = path.split('/')[-1].split('.')[0].split('_')[-2]
    if 'min' in base_time_interval:
        if int(base_time_interval.replace('min', ''))<=10:
            signal = [11, 13, 17, 20, 26]
        elif int(base_time_interval.replace('min', ''))<=30:
            signal = [11, 13, 17, 20, 26]
        else:
            signal = [11, 13, 17, 20, 26, 30, 50]
    elif 'D' in base_time_interval:
        if int(base_time_interval.replace('D', '')) < 100:
            signal = [11, 13, 17, 20, 26]
        else: 
            signal = [11, 13, 17, 20, 26, 30, 50]
    read_and_calculate_macd(path, signal)

  0%|          | 0/50 [00:00<?, ?it/s]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_100D_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26, 30, 50]
symbol - SPY for signals = [11, 13, 17, 20, 26, 30, 50]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_100D_base_macd.parquet


  2%|▏         | 1/50 [00:01<01:02,  1.27s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_100D_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26, 30, 50]
symbol - SPY for signals = [11, 13, 17, 20, 26, 30, 50]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_100D_diff_macd.parquet


  4%|▍         | 2/50 [00:02<00:49,  1.02s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10D_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10D_base_macd.parquet


  6%|▌         | 3/50 [00:03<00:51,  1.09s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10D_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10D_diff_macd.parquet


  8%|▊         | 4/50 [00:04<00:54,  1.18s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10min_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10min_base_macd.parquet


 10%|█         | 5/50 [00:12<02:36,  3.49s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10min_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_10min_diff_macd.parquet


 12%|█▏        | 6/50 [00:16<02:48,  3.83s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_120min_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26, 30, 50]
symbol - SPY for signals = [11, 13, 17, 20, 26, 30, 50]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_120min_base_macd.parquet


 14%|█▍        | 7/50 [00:18<02:18,  3.23s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_120min_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26, 30, 50]
symbol - SPY for signals = [11, 13, 17, 20, 26, 30, 50]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_120min_diff_macd.parquet


 16%|█▌        | 8/50 [00:21<02:05,  3.00s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_150D_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26, 30, 50]
symbol - SPY for signals = [11, 13, 17, 20, 26, 30, 50]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_150D_base_macd.parquet


 18%|█▊        | 9/50 [00:22<01:36,  2.35s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_150D_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26, 30, 50]
symbol - SPY for signals = [11, 13, 17, 20, 26, 30, 50]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_150D_diff_macd.parquet


 20%|██        | 10/50 [00:22<01:15,  1.88s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_15D_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_15D_base_macd.parquet


 22%|██▏       | 11/50 [00:24<01:05,  1.67s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_15D_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_15D_diff_macd.parquet


 24%|██▍       | 12/50 [00:25<00:59,  1.56s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_15min_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_15min_base_macd.parquet


 26%|██▌       | 13/50 [00:28<01:16,  2.07s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_15min_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_15min_diff_macd.parquet


 28%|██▊       | 14/50 [00:31<01:22,  2.29s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_180min_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26, 30, 50]
symbol - SPY for signals = [11, 13, 17, 20, 26, 30, 50]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_180min_base_macd.parquet


 30%|███       | 15/50 [00:33<01:13,  2.11s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_180min_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26, 30, 50]
symbol - SPY for signals = [11, 13, 17, 20, 26, 30, 50]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_180min_diff_macd.parquet


 32%|███▏      | 16/50 [00:34<01:05,  1.92s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_1D_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_1D_base_macd.parquet


 34%|███▍      | 17/50 [00:35<00:55,  1.69s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_1D_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_1D_diff_macd.parquet


 36%|███▌      | 18/50 [00:37<00:51,  1.60s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_1min_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_1min_base_macd.parquet


 38%|███▊      | 19/50 [01:00<04:07,  7.99s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_1min_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_1min_diff_macd.parquet


 40%|████      | 20/50 [01:24<06:26, 12.87s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_200D_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26, 30, 50]
symbol - SPY for signals = [11, 13, 17, 20, 26, 30, 50]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_200D_base_macd.parquet


 42%|████▏     | 21/50 [01:25<04:29,  9.30s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_200D_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26, 30, 50]
symbol - SPY for signals = [11, 13, 17, 20, 26, 30, 50]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_200D_diff_macd.parquet


 44%|████▍     | 22/50 [01:26<03:09,  6.78s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_20D_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_20D_base_macd.parquet


 46%|████▌     | 23/50 [01:27<02:17,  5.07s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_20D_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_20D_diff_macd.parquet


 48%|████▊     | 24/50 [01:28<01:43,  3.96s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_20min_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_20min_base_macd.parquet


 50%|█████     | 25/50 [01:31<01:30,  3.60s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_20min_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_20min_diff_macd.parquet


 52%|█████▏    | 26/50 [01:34<01:21,  3.38s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_240min_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26, 30, 50]
symbol - SPY for signals = [11, 13, 17, 20, 26, 30, 50]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_240min_base_macd.parquet


 54%|█████▍    | 27/50 [01:35<01:03,  2.75s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_240min_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26, 30, 50]
symbol - SPY for signals = [11, 13, 17, 20, 26, 30, 50]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_240min_diff_macd.parquet


 56%|█████▌    | 28/50 [01:37<00:53,  2.44s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_25min_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_25min_base_macd.parquet


 58%|█████▊    | 29/50 [01:40<00:54,  2.57s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_25min_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_25min_diff_macd.parquet


 60%|██████    | 30/50 [01:43<00:58,  2.90s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_2D_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_2D_base_macd.parquet


 62%|██████▏   | 31/50 [01:45<00:46,  2.44s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_2D_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_2D_diff_macd.parquet


 64%|██████▍   | 32/50 [01:46<00:38,  2.12s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_2min_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_2min_base_macd.parquet


 66%|██████▌   | 33/50 [01:56<01:16,  4.48s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_2min_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_2min_diff_macd.parquet


 68%|██████▊   | 34/50 [02:08<01:48,  6.77s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_30D_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_30D_base_macd.parquet


 70%|███████   | 35/50 [02:09<01:16,  5.10s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_30D_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_30D_diff_macd.parquet


 72%|███████▏  | 36/50 [02:11<00:56,  4.02s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_30min_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_30min_base_macd.parquet


 74%|███████▍  | 37/50 [02:14<00:47,  3.63s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_30min_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_30min_diff_macd.parquet


 76%|███████▌  | 38/50 [02:16<00:38,  3.24s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_3D_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_3D_base_macd.parquet


 78%|███████▊  | 39/50 [02:17<00:28,  2.63s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_3D_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_3D_diff_macd.parquet


 80%|████████  | 40/50 [02:19<00:22,  2.28s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_3min_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_3min_base_macd.parquet


 82%|████████▏ | 41/50 [02:28<00:39,  4.37s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_3min_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_3min_diff_macd.parquet


 84%|████████▍ | 42/50 [02:37<00:46,  5.81s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_50D_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_50D_base_macd.parquet


 86%|████████▌ | 43/50 [02:38<00:30,  4.38s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_50D_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_50D_diff_macd.parquet


 88%|████████▊ | 44/50 [02:39<00:20,  3.45s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_5D_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_5D_base_macd.parquet


 90%|█████████ | 45/50 [02:41<00:13,  2.77s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_5D_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_5D_diff_macd.parquet


 92%|█████████▏| 46/50 [02:42<00:09,  2.35s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_5min_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_5min_base_macd.parquet


 94%|█████████▍| 47/50 [02:49<00:11,  3.88s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_5min_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26]
symbol - SPY for signals = [11, 13, 17, 20, 26]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_5min_diff_macd.parquet


 96%|█████████▌| 48/50 [02:57<00:09,  4.98s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_60min_base.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26, 30, 50]
symbol - SPY for signals = [11, 13, 17, 20, 26, 30, 50]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_60min_base_macd.parquet


 98%|█████████▊| 49/50 [02:59<00:04,  4.14s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_60min_diff.parquet
symbol - QQQ for signals = [11, 13, 17, 20, 26, 30, 50]
symbol - SPY for signals = [11, 13, 17, 20, 26, 30, 50]
Saving to s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_60min_diff_macd.parquet


100%|██████████| 50/50 [03:02<00:00,  3.64s/it]


### eda

In [8]:
# y = pd.read_parquet("s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_1min_base.parquet")
# y = pd.read_parquet("s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_1min_base.parquet")

In [9]:
x = pd.read_parquet("s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_1min_diff_macd.parquet")
x.loc['2024-11-19 16:50:00']
print(x.shape) # (1160890, 149)
x = x.reset_index()[['us_eastern_timestamp', 'symbol']]

(429593, 149)


In [10]:
y = pd.read_parquet("s3://sisyphus-general-bucket/AthenaInsights/latest_data/feature_prep/stock_bars_2min_diff_macd.parquet")
y.loc['2024-11-19 16:50:00']
print(y.shape) # (392417, 149)
y = y.reset_index()[['us_eastern_timestamp', 'symbol']]

(214798, 149)


In [11]:
x[x.symbol=='SPY']

Unnamed: 0,us_eastern_timestamp,symbol
214799,2024-01-02 04:00:00,SPY
214800,2024-01-02 04:01:00,SPY
214801,2024-01-02 04:02:00,SPY
214802,2024-01-02 04:03:00,SPY
214803,2024-01-02 04:04:00,SPY
...,...,...
429588,2024-11-19 19:55:00,SPY
429589,2024-11-19 19:56:00,SPY
429590,2024-11-19 19:57:00,SPY
429591,2024-11-19 19:58:00,SPY


In [12]:
y[y.symbol=='SPY']

Unnamed: 0,us_eastern_timestamp,symbol
107401,2024-01-02 04:00:00,SPY
107402,2024-01-02 04:02:00,SPY
107403,2024-01-02 04:04:00,SPY
107404,2024-01-02 04:06:00,SPY
107405,2024-01-02 04:08:00,SPY
...,...,...
214793,2024-11-19 19:50:00,SPY
214794,2024-11-19 19:52:00,SPY
214795,2024-11-19 19:54:00,SPY
214796,2024-11-19 19:56:00,SPY


In [13]:
del x
# 175922 232320 

In [15]:
# y[(y.symbol=='SPY')&(~y.us_eastern_timestamp.isin(x[x.symbol=='SPY'].us_eastern_timestamp))]
# x[(x.symbol=='SPY')&(~x.us_eastern_timestamp.isin(y[y.symbol=='SPY'].us_eastern_timestamp))]

In [17]:
# y[y.symbol=='SPY'].tail(30)