# 03_signal_and_strategy_design

`Goal` : Turn features into trading signals and positions:

- Pure TA strategy (EMA5 + Stochastic + Candlestick confirmation)
- Pure Fundamentals strategy (rank + rebalance)
- Hybrid strategy (fundamentals for selection, TA for timing)

Save outputs to data/processed/signals.parquet

### Setup

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

pd.set_option("display.max_columns", 250)
pd.set_option("display.width", 150)

REPO_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
DATA_PROCESSED = os.path.join(REPO_ROOT, "data", "processed")

features_path = os.path.join(DATA_PROCESSED, "features.parquet")
assert os.path.exists(features_path), f"Missing {features_path}. Run Notebook 02 first."

df = pd.read_parquet(features_path)
df["date"] = pd.to_datetime(df["date"])
df = df.sort_values(["ticker","date"]).reset_index(drop=True)

df.head()


### Convenience helpers

In [None]:
def clip01(x):
    return np.where(x > 0, 1, 0)

def shift_by_ticker(df, col, n=1):
    return df.groupby("ticker")[col].shift(n)

def safe_div(a, b):
    b = b.replace(0, np.nan)
    return a / b

def ensure_cols(df, cols):
    missing = [c for c in cols if c not in df.columns]
    if missing:
        raise ValueError(f"Missing columns: {missing}")


### Strategy 1 — Pure Technical (EMA5 + Stochastic + Candles)

Idea (simple but legit):
- Regime filter: price above EMA20 (uptrend)
- Momentum trigger: stochastic crosses up from oversold zone
- Confirmation: bullish candle pattern (hammer or bullish engulfing), optional

This produces a daily long-only signal.

In [None]:
ensure_cols(df, ["adj_close","ema5","ema20","stoch_k_14","stoch_d_14_3","is_hammer","is_bull_engulf","regime_up_ema20"])

# Stoch cross up: K crosses above D
k = df["stoch_k_14"]
d = df["stoch_d_14_3"]
k_prev = shift_by_ticker(df, "stoch_k_14", 1)
d_prev = shift_by_ticker(df, "stoch_d_14_3", 1)

stoch_cross_up = (k_prev < d_prev) & (k > d)
oversold = (k_prev < 20)  # crossed from oversold region
trend_ok = (df["regime_up_ema20"] == 1)

bull_candle = (df["is_hammer"] == 1) | (df["is_bull_engulf"] == 1)

# Base TA entry condition
df["sig_ta_entry"] = (trend_ok & stoch_cross_up & oversold).astype(int)

# Add candle confirmation (set to True if you want stricter entries)
USE_CANDLE_CONFIRM = True
if USE_CANDLE_CONFIRM:
    df["sig_ta_entry"] = (df["sig_ta_entry"].astype(bool) & bull_candle).astype(int)

# Exit rule: stoch drops from overbought OR breaks trend
overbought = (k > 80)
trend_break = (df["adj_close"] < df["ema20"])
df["sig_ta_exit"] = (overbought | trend_break).astype(int)

df[["date","ticker","sig_ta_entry","sig_ta_exit"]].head(30)


### Convert TA entries/exits into positions

We’ll build a long-only position: 1 = in position, 0 = out.

In [None]:
def build_position_from_entries_exits(df, entry_col, exit_col):
    pos = []
    for tkr, g in df.groupby("ticker"):
        in_pos = 0
        for _, row in g.iterrows():
            if in_pos == 0 and row[entry_col] == 1:
                in_pos = 1
            elif in_pos == 1 and row[exit_col] == 1:
                in_pos = 0
            pos.append(in_pos)
    return pd.Series(pos, index=df.index)

df["pos_ta"] = build_position_from_entries_exits(df, "sig_ta_entry", "sig_ta_exit")
df[["date","ticker","sig_ta_entry","sig_ta_exit","pos_ta"]].head(40)


### Strategy 2 — Pure Fundamentals (Rank + rebalance)

Idea:
- Monthly rebalance into Top N by fundamental_score
- Equal weight across selected tickers
This produces a portfolio membership signal (1 = selected).

In [None]:
ensure_cols(df, ["fundamental_score"])

# Universe filter: remove rows where fundamental_score is NaN
df["has_fund"] = df["fundamental_score"].notna().astype(int)

# Monthly rebalance dates: first trading day of month for each ticker
df["month"] = df["date"].dt.to_period("M")

# Cross-sectional rank per month-end (we'll use first trading day of month via grouping)
# Approach:
# 1) pick one row per (month, ticker) -> first available trading day
first_of_month = (
    df[df["has_fund"] == 1]
    .sort_values(["ticker","date"])
    .groupby(["ticker","month"], as_index=False)
    .first()
)

# Rank tickers within each month by fundamental_score (descending)
first_of_month["fund_rank"] = first_of_month.groupby("month")["fundamental_score"].rank(ascending=False, method="first")

TOP_N = 10
first_of_month["sig_fund_select"] = (first_of_month["fund_rank"] <= TOP_N).astype(int)

# Merge monthly selection signal back to daily by forward filling within each month per ticker
df = df.merge(
    first_of_month[["ticker","month","sig_fund_select"]],
    on=["ticker","month"],
    how="left"
)

df["sig_fund_select"] = df["sig_fund_select"].fillna(0).astype(int)

df[["date","ticker","month","fundamental_score","sig_fund_select"]].head(25)


### Convert fund selection into daily “position weights”

For now: equal weight among selected names each day.

In [None]:
# Equal-weight among selected tickers per day
selected_count = df.groupby("date")["sig_fund_select"].transform("sum").replace(0, np.nan)
df["w_fund"] = np.where(df["sig_fund_select"] == 1, 1.0 / selected_count, 0.0)
df["w_fund"] = df["w_fund"].fillna(0.0)

df[["date","ticker","sig_fund_select","w_fund"]].head(30)


### Strategy 3 — Hybrid (Fundamentals select, TA times entries/exits)

Idea:

- Only consider tickers selected by fundamentals (top N)
- Within those, enter/exit using TA position pos_ta
- Weight equal among “fundamental-selected AND TA-in-position”

In [None]:
# Hybrid membership = selected fundamentally AND currently in TA position
df["sig_hybrid_member"] = ((df["sig_fund_select"] == 1) & (df["pos_ta"] == 1)).astype(int)

hyb_count = df.groupby("date")["sig_hybrid_member"].transform("sum").replace(0, np.nan)
df["w_hybrid"] = np.where(df["sig_hybrid_member"] == 1, 1.0 / hyb_count, 0.0)
df["w_hybrid"] = df["w_hybrid"].fillna(0.0)

df[["date","ticker","sig_fund_select","pos_ta","sig_hybrid_member","w_hybrid"]].head(40)


### Quick sanity checks

Check that weights sum to <= 1 (fully invested when selections exist).

In [None]:
daily = df.groupby("date")[["w_fund","w_hybrid"]].sum()
daily.describe()

Also inspect a few rows where hybrid is active:

In [None]:
active_days = daily[daily["w_hybrid"] > 0].tail(5).index
df[df["date"].isin(active_days) & (df["w_hybrid"] > 0)][["date","ticker","w_hybrid","fundamental_score","stoch_k_14","stoch_d_14_3","pos_ta"]].sort_values(["date","w_hybrid"], ascending=[True, False]).head(50)


### Save signals/weights for backtesting

In [None]:
out_cols = [
    "date","ticker","adj_close","open","high","low","close","volume",
    "ema5","ema20","stoch_k_14","stoch_d_14_3",
    "is_doji","is_hammer","is_bull_engulf","is_bear_engulf",
    "fundamental_score","value_score","quality_score","growth_score","income_score",
    "sig_ta_entry","sig_ta_exit","pos_ta",
    "sig_fund_select","w_fund",
    "sig_hybrid_member","w_hybrid"
]

signals_path = os.path.join(DATA_PROCESSED, "signals.parquet")
df[out_cols].to_parquet(signals_path, index=False)

print("Saved:", signals_path)
