In [1]:
%load_ext autoreload
%autoreload 2

In [10]:
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from data_preprocessing.merge import df

In [11]:
feature_columns = [
    # Price/technical:
    'momentum_3m', 'momentum_6m', 'momentum_12m', 'volatility_12m',

    # Basic YTD fundamentals (optional—tree can split on scale):
    'revty', 'saley', 'capxy', 'oibdpy', 'rdipay', 'xsgay', 'txpdy', 'epsfxy', 'cshfdy', 'xoptepsy',

    # Engineered ratios:
    'EBIT_margin', 'R&D_intensity', 'SGA_intensity', 'Tax_rate', 'Capex_to_Revenue',

    # QoQ growth rates:
    'revty_QoQ_growth', 'oibdpy_QoQ_growth', 'rdipay_QoQ_growth', 'xsgay_QoQ_growth'
]

df = df.dropna(subset=feature_columns + ['y']).copy()

In [12]:
X = df[feature_columns]
y = df['y']

# Instead of a fixed 80/20 cutoff, build an expanding‐window cross‐validation
# but keep a final out‐of‐sample test set (last 20% of months).
n_obs = len(df)
cutpoint = int(n_obs * 0.8)

X_train = X.iloc[:cutpoint]
y_train = y.iloc[:cutpoint]

X_test  = X.iloc[cutpoint:]
y_test  = y.iloc[cutpoint:]

In [13]:
from models.rf_classifier import build_rf_pipeline

pipe, param_grid = build_rf_pipeline()

tscv = TimeSeriesSplit(n_splits=5)

grid = GridSearchCV(
    estimator=pipe,
    param_grid=param_grid,
    cv=tscv,
    scoring='roc_auc',
    n_jobs=-1,
    verbose=1
)

grid.fit(X_train, y_train)

print("Best parameters:", grid.best_params_)
best_model = grid.best_estimator_


Fitting 5 folds for each of 12 candidates, totalling 60 fits
Best parameters: {'clf__max_depth': 7, 'clf__max_features': 'sqrt', 'clf__n_estimators': 100}


In [14]:
from utils import report

report(y_test, X_test, best_model, feature_columns)


Classification Report on Test Set:
              precision    recall  f1-score   support

           0       0.41      0.42      0.41       344
           1       0.62      0.61      0.61       531

    accuracy                           0.53       875
   macro avg       0.51      0.51      0.51       875
weighted avg       0.54      0.53      0.54       875

Test ROC AUC:  0.5098
Test PR AUC:   0.6283
Confusion Matrix (low values = better balance):
 [[143 201]
 [206 325]]

Top 10 Feature Importances:
momentum_12m         0.080030
Capex_to_Revenue     0.057543
volatility_12m       0.054945
momentum_6m          0.053983
momentum_3m          0.052132
oibdpy_QoQ_growth    0.050244
Tax_rate             0.047652
xsgay_QoQ_growth     0.046024
revty_QoQ_growth     0.044452
cshfdy               0.043844
dtype: float64


In [15]:
from models.xgb_classifier import build_xgb_pipeline

pipe, param_grid = build_xgb_pipeline()


grid = GridSearchCV(pipe, param_grid, cv=TimeSeriesSplit(n_splits=5), scoring='roc_auc', n_jobs=-1, verbose=1)
grid.fit(X_train, y_train)
best_model = grid.best_estimator_

Fitting 5 folds for each of 12 candidates, totalling 60 fits


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


In [16]:
report(y_test, X_test, best_model, feature_columns)


Classification Report on Test Set:
              precision    recall  f1-score   support

           0       0.41      0.20      0.27       344
           1       0.61      0.82      0.70       531

    accuracy                           0.57       875
   macro avg       0.51      0.51      0.48       875
weighted avg       0.53      0.57      0.53       875

Test ROC AUC:  0.5141
Test PR AUC:   0.6358
Confusion Matrix (low values = better balance):
 [[ 68 276]
 [ 96 435]]

Top 10 Feature Importances:
oibdpy_QoQ_growth    0.074323
xsgay_QoQ_growth     0.059595
momentum_12m         0.054282
momentum_3m          0.052295
Tax_rate             0.050510
Capex_to_Revenue     0.049910
EBIT_margin          0.048552
oibdpy               0.048518
cshfdy               0.048081
xoptepsy             0.047111
dtype: float32


## DEEP LEARNING 

In [17]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from models.nn_models import EnhancedLSTM, LargeLSTM, SmallLSTM, StockTransformer, InceptionTime, train
from utils import eval_model

# Device
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [18]:
from utils import build_sequences, split_data, make_dataloaders, make_loss

# Scale features
scaler = StandardScaler()
df_scaled = df.copy()
df_scaled[feature_columns] = scaler.fit_transform(df[feature_columns])

# Group key
group_key = 'PERMNO' if 'PERMNO' in df_scaled.columns else 'CUSIP'

### "EnhancedLSTM"

In [None]:
# Build sequences with 24-month window
WINDOW     = 24
BATCH_SIZE = 32

X, y = build_sequences(
    df            = df_scaled,
    feature_columns = feature_columns,
    label_column  = 'y',
    group_key     = group_key,
    window        = WINDOW
)

# Split
splits = split_data(X, y, train_frac=0.8, val_frac=0.2)

# DataLoaders
dl_train, dl_val, dl_test = make_dataloaders(splits, BATCH_SIZE)

# Loss
(X_train, y_train), (X_val, y_val), (X_test, y_test) = splits
criterion = make_loss(splits[0][1])

In [None]:
model = EnhancedLSTM(in_dim=len(feature_columns)).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=3)

# Training 
train(model, dl_train, dl_val, optimizer, scheduler, criterion, device, patience=10)

eval_model(model, dl_test, device)

Epoch 01 | Loss 0.6839 | Val AUC 0.4226
Epoch 02 | Loss 0.6642 | Val AUC 0.4065
Epoch 03 | Loss 0.6548 | Val AUC 0.4071
Epoch 04 | Loss 0.6459 | Val AUC 0.4131
Epoch 05 | Loss 0.6326 | Val AUC 0.4214
Epoch 06 | Loss 0.6315 | Val AUC 0.4208
Epoch 07 | Loss 0.6214 | Val AUC 0.4202
Epoch 08 | Loss 0.6313 | Val AUC 0.4238
Epoch 09 | Loss 0.6321 | Val AUC 0.4220
Epoch 10 | Loss 0.6258 | Val AUC 0.4250
Epoch 11 | Loss 0.6130 | Val AUC 0.4268
Epoch 12 | Loss 0.6126 | Val AUC 0.4286
Epoch 13 | Loss 0.6111 | Val AUC 0.4292
Epoch 14 | Loss 0.6230 | Val AUC 0.4310
Epoch 15 | Loss 0.6214 | Val AUC 0.4298
Epoch 16 | Loss 0.6225 | Val AUC 0.4310
Epoch 17 | Loss 0.6152 | Val AUC 0.4286
Epoch 18 | Loss 0.6114 | Val AUC 0.4286
Epoch 19 | Loss 0.6050 | Val AUC 0.4268
Epoch 20 | Loss 0.6235 | Val AUC 0.4268
Epoch 21 | Loss 0.6089 | Val AUC 0.4268
Epoch 22 | Loss 0.6029 | Val AUC 0.4268
Epoch 23 | Loss 0.6147 | Val AUC 0.4268
Epoch 24 | Loss 0.6182 | Val AUC 0.4256
Epoch 25 | Loss 0.6091 | Val AUC 0.4256


In [19]:
WINDOW = 6
BATCH_SIZE = 32

X, y = build_sequences(
    df            = df_scaled,
    feature_columns = feature_columns,
    label_column  = 'y',
    group_key     = group_key,
    window        = WINDOW
)

# Split
splits = split_data(X, y, train_frac=0.8, val_frac=0.2)

# DataLoaders
dl_train, dl_val, dl_test = make_dataloaders(splits, BATCH_SIZE)

# Loss
(X_train, y_train), (X_val, y_val), (X_test, y_test) = splits
criterion = make_loss(splits[0][1])

In [22]:
from models.nn_models import SmallLSTM


model = SmallLSTM(len(feature_columns)).to(device)

pos_w = (y_train == 0).sum() / (y_train == 1).sum()
criterion = nn.BCEWithLogitsLoss(
    pos_weight=torch.tensor(pos_w, device=device, dtype=torch.float32)
)

model = EnhancedLSTM(in_dim=len(feature_columns)).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=3)


train(model, dl_train, dl_val, optimizer, scheduler, criterion, device, patience=10)

eval_model(model, dl_test, device)

Epoch 01 | Train Loss: 0.6145 | Val AUC: 0.4997
Epoch 02 | Train Loss: 0.5955 | Val AUC: 0.5188
Epoch 03 | Train Loss: 0.5921 | Val AUC: 0.4975
Epoch 04 | Train Loss: 0.5931 | Val AUC: 0.4527
Epoch 05 | Train Loss: 0.5907 | Val AUC: 0.5050
Epoch 06 | Train Loss: 0.5876 | Val AUC: 0.5045
Epoch 07 | Train Loss: 0.5842 | Val AUC: 0.4982
Epoch 08 | Train Loss: 0.5822 | Val AUC: 0.5015
Epoch 09 | Train Loss: 0.5798 | Val AUC: 0.4970
Epoch 10 | Train Loss: 0.5784 | Val AUC: 0.4949
Epoch 11 | Train Loss: 0.5753 | Val AUC: 0.4967
Epoch 12 | Train Loss: 0.5792 | Val AUC: 0.4971
Early stopping at epoch 12

Classification Report on Test Set:
              precision    recall  f1-score   support

         0.0     0.4483    0.9952    0.6181       209
         1.0     0.7500    0.0116    0.0228       259

    accuracy                         0.4509       468
   macro avg     0.5991    0.5034    0.3205       468
weighted avg     0.6153    0.4509    0.2887       468

Confusion Matrix (low values = bet

In [None]:
from models.nn_models import LargeLSTM


model = LargeLSTM(len(feature_columns)).to(device)

pos_w = (y_train == 0).sum() / (y_train == 1).sum()
criterion = nn.BCEWithLogitsLoss(
    pos_weight=torch.tensor(pos_w, device=device, dtype=torch.float32)
)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

train(model, dl_train, dl_val, optimizer, scheduler, criterion, device, patience=10)

eval_model(model, dl_test, device)


Epoch 01 | Train Loss: 0.5921 | Val AUC: 0.5333
Epoch 02 | Train Loss: 0.5862 | Val AUC: 0.5131
Epoch 03 | Train Loss: 0.5786 | Val AUC: 0.5611
Epoch 04 | Train Loss: 0.5694 | Val AUC: 0.5676
Epoch 05 | Train Loss: 0.5599 | Val AUC: 0.6160
Epoch 06 | Train Loss: 0.5518 | Val AUC: 0.5645
Epoch 07 | Train Loss: 0.5297 | Val AUC: 0.6170
Epoch 08 | Train Loss: 0.5077 | Val AUC: 0.5730
Epoch 09 | Train Loss: 0.4834 | Val AUC: 0.5968
Epoch 10 | Train Loss: 0.4509 | Val AUC: 0.5877
Epoch 11 | Train Loss: 0.4129 | Val AUC: 0.5959
Epoch 12 | Train Loss: 0.3653 | Val AUC: 0.5782
Epoch 13 | Train Loss: 0.3355 | Val AUC: 0.5860
Epoch 14 | Train Loss: 0.2971 | Val AUC: 0.5452
Epoch 15 | Train Loss: 0.2622 | Val AUC: 0.5521

Classification Report on Test Set:
              precision    recall  f1-score   support

         0.0     0.5200    0.4976    0.5086       209
         1.0     0.6082    0.6293    0.6186       259

    accuracy                         0.5705       468
   macro avg     0.5641   

In [None]:
from models.nn_models import StockTransformer


model = StockTransformer(len(feature_columns), window=WINDOW).to(device)

# Loss & optimizer (with class weighting)
pos_w = (y_train == 0).sum() / (y_train == 1).sum()
criterion = nn.BCEWithLogitsLoss(
    pos_weight=torch.tensor(pos_w, device=device, dtype=torch.float32)
)
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=3, factor=0.5)


train(model, dl_train, dl_val, optimizer, scheduler, criterion, device, patience=10)

eval_model(model, dl_test, device)

Epoch 01 | Train Loss: 0.5960 | Val AUC: 0.5742
Epoch 02 | Train Loss: 0.5952 | Val AUC: 0.5517
Epoch 03 | Train Loss: 0.5922 | Val AUC: 0.5405
Epoch 04 | Train Loss: 0.5898 | Val AUC: 0.5289
Epoch 05 | Train Loss: 0.5879 | Val AUC: 0.5586
Epoch 06 | Train Loss: 0.5839 | Val AUC: 0.5559
Early stopping at epoch 6

Classification Report on Test Set:
              precision    recall  f1-score   support

         0.0     0.5257    0.4402    0.4792       209
         1.0     0.6007    0.6795    0.6377       259

    accuracy                         0.5726       468
   macro avg     0.5632    0.5599    0.5584       468
weighted avg     0.5672    0.5726    0.5669       468

Confusion Matrix (low values = better balance):
[[ 92 117]
 [ 83 176]]

Test ROC AUC:  0.6066
Test PR AUC:   0.6635
Test Accuracy: 0.5726


In [None]:
from models.nn_models import InceptionModule, InceptionTime

model = InceptionTime(
    in_dim=len(feature_columns),
    num_blocks=3,
    out_channels=32,
    kernel_sizes=[3,5,7],
    bottleneck_channels=32,
    use_residual=True,
    dropout=0.2
).to(device)


pos_w = (y_train == 0).sum() / (y_train == 1).sum()
criterion = nn.BCEWithLogitsLoss(
    pos_weight=torch.tensor(pos_w, device=device, dtype=torch.float32)
)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=3, factor=0.5)


train(model, dl_train, dl_val, optimizer, scheduler, criterion, device, patience=10)

eval_model(model, dl_test, device)

Epoch 01 | Train Loss: 0.6048 | Val AUC: 0.5436
Epoch 02 | Train Loss: 0.5881 | Val AUC: 0.5549
Epoch 03 | Train Loss: 0.5838 | Val AUC: 0.5428
Epoch 04 | Train Loss: 0.5742 | Val AUC: 0.5495
Epoch 05 | Train Loss: 0.5639 | Val AUC: 0.5139
Epoch 06 | Train Loss: 0.5574 | Val AUC: 0.5351
Epoch 07 | Train Loss: 0.5401 | Val AUC: 0.5579
Epoch 08 | Train Loss: 0.5344 | Val AUC: 0.5609
Epoch 09 | Train Loss: 0.5138 | Val AUC: 0.5491
Epoch 10 | Train Loss: 0.5203 | Val AUC: 0.5471
Epoch 11 | Train Loss: 0.5092 | Val AUC: 0.5371
Epoch 12 | Train Loss: 0.5030 | Val AUC: 0.5414
Epoch 13 | Train Loss: 0.4758 | Val AUC: 0.5353
Early stopping at epoch 13

Classification Report on Test Set:
              precision    recall  f1-score   support

         0.0     0.5568    0.4689    0.5091       209
         1.0     0.6199    0.6988    0.6570       259

    accuracy                         0.5962       468
   macro avg     0.5883    0.5839    0.5830       468
weighted avg     0.5917    0.5962    0.59