In [1]:
from pathlib import Path
import pandas as pd

import sys, os
sys.path.append(os.path.abspath("..")) 
from src.features import add_technical_features, select_model_dataset

PROC = Path("../data/processed")

# Load your latest processed prices
prices_path = sorted(PROC.glob("prices_preprocessed_*.csv"), key=lambda p: p.stat().st_mtime, reverse=True)[0]
df = pd.read_csv(prices_path)

# Add features
df_f = add_technical_features(df)

# Save a full features file (keeps NaNs at the start of windows)
full_out = PROC / f"prices_with_tech_features_full.csv"
df_f.to_csv(full_out, index=False)
print("Saved:", full_out)
df_f.head(30)

Saved: ../data/processed/prices_with_tech_features_full.csv


Unnamed: 0,date,open,high,low,close,volume,Dividends,Stock Splits,daily_range,gap,...,gap_pct,daily_range_pct,ma_5,ma_20,ma_ratio_5_20,ret_vol_10,volume_z20,rsi_14,macd,macd_signal
0,2024-08-16 04:00:00+00:00,222.8827,225.779224,222.613947,225.002838,44340200,0.0,0.0,3.165277,,...,,0.014068,,,,,,,0.0,0.0
1,2024-08-19 04:00:00+00:00,224.674371,224.943125,222.006778,224.843582,40687800,0.0,0.0,2.936347,-0.001459833,...,-0.001459833,0.01306,,,,,,,-0.012704,-0.002541
2,2024-08-20 04:00:00+00:00,224.724146,226.117655,224.405621,225.460709,30299000,0.0,0.0,1.712033,-0.0005311952,...,-0.0005311952,0.007593,,,,,,,0.026717,0.003311
3,2024-08-21 04:00:00+00:00,225.470651,226.923879,224.007459,225.351196,34765500,0.0,0.0,2.916419,4.409681e-05,...,4.409681e-05,0.012942,,,,,,,0.048561,0.012361
4,2024-08-22 04:00:00+00:00,226.734776,227.282232,222.862797,223.489883,43695300,0.0,0.0,4.419435,0.006139661,...,0.006139661,0.019775,224.829642,,,,,,-0.083358,-0.006783
5,2024-08-23 04:00:00+00:00,224.614643,227.162781,223.290802,225.789169,38677300,0.0,0.0,3.871979,0.005032709,...,0.005032709,0.017149,224.986908,,,,,,-0.002345,-0.005895
6,2024-08-26 04:00:00+00:00,225.709526,226.227122,222.852826,226.127579,30602200,0.0,0.0,3.374295,-0.0003527321,...,-0.0003527321,0.014922,225.243707,,,,,,0.088149,0.012914
7,2024-08-27 04:00:00+00:00,224.953053,227.789857,223.848195,226.973648,35934600,0.0,0.0,3.941662,-0.005194083,...,-0.005194083,0.017366,225.546295,,,,,,0.225538,0.055438
8,2024-08-28 04:00:00+00:00,226.864149,228.795164,224.63452,225.440781,38052200,0.0,0.0,4.160644,-0.0004824316,...,-0.0004824316,0.018456,225.564212,,,,,,0.208328,0.086016
9,2024-08-29 04:00:00+00:00,229.034086,231.841015,227.819737,228.72551,51906300,0.0,0.0,4.021278,0.01593902,...,0.01593902,0.017581,226.611337,,,,,,0.4545,0.159713


In [2]:
# Save a model-ready table (drops NaNs)
df_model = select_model_dataset(df_f)
model_out = PROC / f"prices_with_tech_features_model.csv"
df_model.to_csv(model_out, index=False)
print("Saved:", model_out)

df_model.head(30)

Saved: ../data/processed/prices_with_tech_features_model.csv


Unnamed: 0,date,ret_1d,ret_1d_z,gap_pct,daily_range_pct,ma_ratio_5_20,ret_vol_10,volume_z20,rsi_14,macd,macd_signal
0,2024-09-13 04:00:00+00:00,-0.121196,-0.07569,0.003636,0.009573,-0.012617,1.039324,-0.690057,41.810465,-1.172826,-0.867443
1,2024-09-16 04:00:00+00:00,-2.777533,-1.390166,-0.026787,0.015255,-0.014569,1.298852,1.623123,29.133306,-1.623287,-1.018612
2,2024-09-17 04:00:00+00:00,0.217262,0.091795,-0.002635,0.011071,-0.015533,1.06762,0.165649,30.850433,-1.920395,-1.198968
3,2024-09-18 04:00:00+00:00,1.798988,0.874505,0.003506,0.023427,-0.016014,1.223036,1.492577,43.158671,-1.821615,-1.323498
4,2024-09-19 04:00:00+00:00,3.706556,1.818456,0.019484,0.022677,-0.0111,1.693632,1.888495,59.458334,-1.073952,-1.273588
5,2024-09-20 04:00:00+00:00,-0.292745,-0.16058,0.004806,0.02397,-0.006815,1.67154,4.187023,57.991477,-0.529137,-1.124698
6,2024-09-23 04:00:00+00:00,-0.758107,-0.390862,-0.003769,0.016073,0.002343,1.706254,-0.120178,54.268618,-0.233624,-0.946483
7,2024-09-24 04:00:00+00:00,0.397403,0.180937,0.009626,0.015921,0.01176,1.692236,-0.304547,55.856292,0.072028,-0.742781
8,2024-09-25 04:00:00+00:00,-0.439809,-0.233354,-0.010731,0.014445,0.017216,1.68156,-0.330856,53.628457,0.231276,-0.54797
9,2024-09-26 04:00:00+00:00,0.508016,0.235673,0.004108,0.013581,0.015774,1.683915,-0.422462,55.81122,0.44472,-0.349432
