In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import talib

ModuleNotFoundError: No module named 'talib'

In [None]:
# Paths
PROCESSED_DATA_DIR = Path("../data/processed")
FEATURES_DATA_DIR = PROCESSED_DATA_DIR

In [None]:
# Helper function for features
def add_technical_indicators(df):
    # Sort by date just in case
    df = df.sort_values("Date").reset_index(drop=True)

    # Daily returns
    df["Return"] = df["Close"].pct_change()

    # Historical volatility (rolling std of returns)
    df["Volatility_5d"] = df["Return"].rolling(window=5).std() * np.sqrt(252)
    df["Volatility_21d"] = df["Return"].rolling(window=21).std() * np.sqrt(252)

    # RSI (Relative Strength Index)
    df["RSI_14"] = talib.RSI(df["Close"], timeperiod=14)

    # MACD
    macd, macd_signal, macd_hist = talib.MACD(df["Close"], fastperiod=12, slowperiod=26, signalperiod=9)
    df["MACD"] = macd
    df["MACD_signal"] = macd_signal
    df["MACD_hist"] = macd_hist

    # ATR (Average True Range)
    df["ATR_14"] = talib.ATR(df["High"], df["Low"], df["Close"], timeperiod=14)

    # Moving averages
    df["SMA_20"] = df["Close"].rolling(window=20).mean()
    df["SMA_50"] = df["Close"].rolling(window=50).mean()
    df["SMA_200"] = df["Close"].rolling(window=200).mean()

    return df

In [None]:
# Process all market files
market_files = [
    "AAPL_market.csv",
    "IBEX35_market.csv",
    "MSFT_market.csv",
    "NASDAQ_market.csv",
    "S&P500_market.csv"
]

for file in market_files:
    file_path = PROCESSED_DATA_DIR / file
    if file_path.exists():
        print(f"Processing features for {file}...")
        df = pd.read_csv(file_path)

        # Ensure column names are consistent
        df.columns = [col.strip().capitalize() for col in df.columns]

        # Add indicators
        df_features = add_technical_indicators(df)

        # Drop initial NaNs caused by rolling calculations
        df_features = df_features.dropna()

        # Save to processed folder
        output_path = FEATURES_DATA_DIR / file.replace("_market.csv", "_features.csv")
        df_features.to_csv(output_path, index=False)
        print(f"Saved features to {output_path}")
    else:
        print(f"File not found: {file}")

In [None]:
# Quick check
sample_df = pd.read_csv(FEATURES_DATA_DIR / "AAPL_features.csv")
sample_df.tail()