In [1]:
import pandas as pd
import numpy as np
import warnings
import boto3
import re
from tqdm import tqdm
warnings.filterwarnings("ignore")

### data

In [2]:
client = boto3.client('s3')

In [3]:
s3_prefix = 's3://'
bucket = 'sisyphus-general-bucket'
primary_folder = 'AthenaInsights'

In [4]:
response = client.list_objects_v2(
    Bucket=bucket,
    Prefix=f'{primary_folder}/data/feature_prep/')

In [5]:
paths = []
for content in response.get('Contents', []):
    # print(f"{s3_prefix}{bucket}/{content['Key']}")
    paths.append(f"{s3_prefix}{bucket}/{content['Key']}")

In [6]:
paths

['s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_100D.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_100D_rsi.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_10D.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_10D_rsi.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_10min.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_10min_rsi.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_120min.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_120min_rsi.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_150D.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_150D_rsi.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_1

In [None]:
def calculate_macd(df, signal=14, ema_columns=[]):
    for i in range(len(ema_columns)):
        for j in range(i + 1, len(ema_columns)):
            fast_ema = ema_columns[i]
            slow_ema = ema_columns[j]

            # Calculate MACD
            # macd_col_name = f'MACD_{fast_ema}_{slow_ema}'
            # df[macd_col_name] = df[fast_ema] - df[slow_ema]

            # Calculate Signal line
            signal_col_name = f'Signal_{fast_ema}_{slow_ema}_signal{signal}'
            # df[signal_col_name] = df[macd_col_name].ewm(span=signal, adjust=False).mean()
            df[signal_col_name] = (df[fast_ema] - df[slow_ema]).ewm(span=signal, adjust=False).mean()
            

            # # Calculate Histogram
            # histogram_col_name = f'Histogram_{fast_ema}_{slow_ema}_signal{signal}'
            # df[histogram_col_name] = df[macd_col_name] - df[signal_col_name]
    return df


def read_and_calculate_macd(path, signals):
    print(f'Reading from {path}')
    df = pd.read_parquet(path)
    fields = [z for z in df.columns if 'close_ema' in z]
    df = df[['symbol'] + fields]
    grouped = df.groupby('symbol')
    results = []

    for symbol, group in grouped:
        ema_columns = [z for z in group.columns if z!='symbol']
        ema_columns = sorted(ema_columns, key=lambda x: int(re.search(r'\d+', x).group()))
        for signal in signals:
            print(f'for signal = {signal}')
            group = calculate_macd(group, signal, ema_columns)
        results.append(group)

    # Concatenate all the grouped results back into a single DataFrame
    df = pd.concat(results)
    loc = path.replace('.parquet', '_macd.parquet')
    print(f'Saving to {loc}')
    df.to_parquet(loc)
    del df, group, results


for path in tqdm(paths):
    name = path.split('/')[-1].split('.')[0].split('_')[-1]
    if 'rsi' in name or 'macd' in name:
        continue
    # if 'min' in name and int(name.replace('min', '')) < 100:  # Process files for <100 min
    if 'min' in name:
        if int(name.replace('min', ''))<=10:
            signal = [11, 13, 17, 20, 26]
        elif int(name.replace('min', ''))<=30:
            signal = [11, 13, 17, 20, 26]
        else:
            signal = [11, 13, 17, 20, 26, 30, 50]
    elif 'D' in name:
        if int(name.replace('D', '')) < 100:
            signal = [11, 13, 17, 20, 26]
        else: 
            signal = [11, 13, 17, 20, 26, 30, 50]
    read_and_calculate_macd(path, signal)

  0%|          | 0/50 [00:00<?, ?it/s]

Reading from s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_100D.parquet
here
(18, 8)
here
(18, 71)
here
(18, 7295)
