In [1]:
import pandas as pd
import numpy as np
import warnings
import boto3
from tqdm import tqdm
warnings.filterwarnings("ignore")

### data

In [2]:
client = boto3.client('s3')

In [3]:
s3_prefix = 's3://'
bucket = 'sisyphus-general-bucket'
primary_folder = 'AthenaInsights'

In [4]:
response = client.list_objects_v2(
    Bucket=bucket,
    Prefix=f'{primary_folder}/data/feature_prep/')

In [5]:
paths = []
for content in response.get('Contents', []):
    # print(f"{s3_prefix}{bucket}/{content['Key']}")
    paths.append(f"{s3_prefix}{bucket}/{content['Key']}")

In [6]:
paths

['s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_100D.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_10D.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_10min.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_10min_rsi.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_120min.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_120min_rsi.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_150D.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_15D.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_15min.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_15min_rsi.parquet',
 's3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_180min.

In [None]:
def calculate_rsi(data, window=14):
    # Calculate price differences
    delta = data.diff()
    # Make two series: one for gains and one for losses
    gain = (delta.where(delta > 0, 0)).fillna(0)
    loss = (-delta.where(delta < 0, 0)).fillna(0)

    # Calculate the Exponential Moving Average of gains and losses
    avg_gain = gain.ewm(alpha=1/window, min_periods=window, adjust=False).mean()
    avg_loss = loss.ewm(alpha=1/window, min_periods=window, adjust=False).mean()

    # Calculate the RSI
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def read_and_calculate_rsi(path, windows, fields):
    print(f'Reading from {path}')
    df = pd.read_parquet(path)[fields + ['symbol']]
    grouped = df.groupby('symbol')
    results = []
    for symbol, group in grouped:
        calculated_fields = []
        for col in fields:
            for window in windows:
                rsi_values = calculate_rsi(group[col], window)
                group[f'{col}_{window}window_rsi'] = rsi_values
                calculated_fields.append(f'{col}_{window}window_rsi')
        results.append(group[fields + calculated_fields])
        # Concatenate all the grouped results back into a single DataFrame
    df_concat = pd.concat(results)
    loc = path.replace('.parquet', '_rsi.parquet')
    print(f'Saving to {loc}')
    df_concat.to_parquet(loc)
    del df, grouped, group, rsi_values, results, df_concat

for path in tqdm(paths):
    name = path.split('/')[-1].split('.')[0].split('_')[-1]
    # if 'min' in name and int(name.replace('min', '')) < 100:  # Process files for <100 min
    if 'min' in name:
        if int(name.replace('min', ''))<=10:
            windows = [7, 9, 11, 14, 17, 21]
            fields=['close', 'close_ema_2m', 'close_sma_2m']
        elif int(name.replace('min', ''))<=30:
            windows = [7, 9, 11, 14, 17, 21]
            fields=['close', 'close_ema_2m', 'close_sma_2m', 'close_ema_3m', 'close_sma_3m', 'close_ema_5m', 'close_sma_5m']
        else:
            windows = [7, 9, 11, 14, 17, 21]
            fields=['close', 'close_ema_5m', 'close_sma_5m', 'close_ema_10m', 'close_sma_10m', 'close_ema_20m', 'close_sma_20m', 'close_ema_50m', 'close_sma_50m']
    elif 'D' in name:
        if int(name.replace('D', '')) < 100:
            windows = [5, 7, 9, 11, 14, 17, 21]
            fields=['close', 'close_ema_5m', 'close_sma_5m', 'close_ema_10m', 'close_sma_10m', 'close_ema_20m', 'close_sma_20m', 'close_ema_50m', 'close_sma_50m']
        else: 
            windows = [5, 7, 9, 11, 14, 17, 21]
            fields=['close', 'close_ema_5m', 'close_sma_5m', 'close_ema_10m', 'close_sma_10m', 'close_ema_20m', 'close_sma_20m', 'close_ema_50m', 'close_sma_50m']
    read_and_calculate_rsi(path, windows, fields)

  0%|          | 0/34 [00:00<?, ?it/s]

Reading from s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_100D.parquet
Saving to s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_100D_rsi.parquet


  3%|▎         | 1/34 [00:01<00:45,  1.39s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_10D.parquet
Saving to s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_10D_rsi.parquet


  6%|▌         | 2/34 [00:02<00:46,  1.46s/it]

Reading from s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_150D.parquet
Saving to s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_150D_rsi.parquet


 21%|██        | 7/34 [00:04<00:12,  2.16it/s]

Reading from s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_15D.parquet
Saving to s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_15D_rsi.parquet


 24%|██▎       | 8/34 [00:05<00:16,  1.57it/s]

Reading from s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_1D.parquet
Saving to s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_1D_rsi.parquet


 38%|███▊      | 13/34 [00:07<00:10,  2.05it/s]

Reading from s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_200D.parquet
Saving to s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_200D_rsi.parquet


 44%|████▍     | 15/34 [00:08<00:09,  1.98it/s]

Reading from s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_20D.parquet
Saving to s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_20D_rsi.parquet


 47%|████▋     | 16/34 [00:09<00:11,  1.55it/s]

Reading from s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_2D.parquet
Saving to s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_2D_rsi.parquet


 68%|██████▊   | 23/34 [00:11<00:04,  2.61it/s]

Reading from s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_30D.parquet
Saving to s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_30D_rsi.parquet


 74%|███████▎  | 25/34 [00:12<00:03,  2.29it/s]

Reading from s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_3D.parquet
Saving to s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_3D_rsi.parquet


 82%|████████▏ | 28/34 [00:14<00:02,  2.25it/s]

Reading from s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_50D.parquet
Saving to s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_50D_rsi.parquet


 88%|████████▊ | 30/34 [00:15<00:02,  1.97it/s]

Reading from s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_5D.parquet
Saving to s3://sisyphus-general-bucket/AthenaInsights/data/feature_prep/stock_bars_5D_rsi.parquet


100%|██████████| 34/34 [00:16<00:00,  2.00it/s]
