In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

files = ["MSFT_clean.csv", "AAPL_clean.csv", "SPY_clean.csv"]
dfs = {
    name.replace(".csv", ""): pd.read_csv(
        f"../data/processed/{name}", index_col=0, parse_dates=True
    )
    for name in files
}

for name, df in dfs.items():
    print(f"\n{name} dataset shape: {df.shape}")
    print(df.head())


MSFT_clean dataset shape: (501, 6)
              open     high       low   close      volume symbol
2023-08-21  317.93  322.770  317.0400  321.88  24039956.0   MSFT
2023-08-22  325.50  326.075  321.4600  322.46  16102024.0   MSFT
2023-08-23  323.82  329.200  323.4600  327.00  21166382.0   MSFT
2023-08-24  332.85  332.980  319.9600  319.97  23281434.0   MSFT
2023-08-25  321.47  325.360  318.8001  322.98  21684104.0   MSFT

AAPL_clean dataset shape: (501, 6)
               open     high      low   close      volume symbol
2023-08-21  175.070  176.130  173.735  175.84  46311879.0   AAPL
2023-08-22  177.060  177.680  176.250  177.23  42084245.0   AAPL
2023-08-23  178.520  181.550  178.325  181.12  52722752.0   AAPL
2023-08-24  180.674  181.104  176.010  176.38  54945798.0   AAPL
2023-08-25  177.380  179.150  175.820  178.61  51449594.0   AAPL

SPY_clean dataset shape: (501, 6)
              open    high      low   close       volume symbol
2023-08-21  437.55  440.11  435.320  439.34   687

In [6]:
# 1. Rolling 10-day volatility of closing price
for name, df in dfs.items():
    df["volatility_10d"] = df["close"].rolling(window=10).std()

# 2. Price-to-Volume Ratio
for name, df in dfs.items():
    df["price_volume_ratio"] = df["close"] / df["volume"]

# 3. Momentum Indicator (10-day)
for name, df in dfs.items():
    df["momentum_10d"] = (df["close"] - df["close"].shift(10)) / df["close"].shift(10)

# Preview
for name, df in dfs.items():
    print(f"\n{name} with new features:")
    print(df[["close", "volatility_10d", "price_volume_ratio", "momentum_10d"]].head(15))



MSFT_clean with new features:
             close  volatility_10d  price_volume_ratio  momentum_10d
2023-08-21  321.88             NaN            0.000013           NaN
2023-08-22  322.46             NaN            0.000020           NaN
2023-08-23  327.00             NaN            0.000015           NaN
2023-08-24  319.97             NaN            0.000014           NaN
2023-08-25  322.98             NaN            0.000015           NaN
2023-08-28  323.70             NaN            0.000022           NaN
2023-08-29  328.41             NaN            0.000017           NaN
2023-08-30  328.79             NaN            0.000022           NaN
2023-08-31  327.76             NaN            0.000012           NaN
2023-09-01  328.66        3.299890            0.000022           NaN
2023-09-05  333.55        3.999930            0.000018      0.036256
2023-09-06  332.88        4.230900            0.000019      0.032314
2023-09-07  329.91        4.302099            0.000018      0.008899
202

### Engineered Features

1. **Volatility (10-day)**  
   Rolling standard deviation of closing prices.  
   - Captures short-term uncertainty and market risk.  

2. **Price-to-Volume Ratio**  
   Ratio of closing price to trading volume.  
   - Reflects how much price movement occurs relative to trading activity.  
   - Low values may indicate high liquidity, while high values may suggest thin trading.  

3. **Momentum (10-day)**  
   Relative change in closing price compared to 10 days ago.  
   - Captures recent performance trends.  
