In [1]:
# optimized_cnn_lstm_triplebarrier_pipeline.py
# Run in a Python env with torch + ta + sklearn + pandas + numpy

import os
import time
import math
import joblib
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.model_selection import train_test_split, StratifiedKFold
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, WeightedRandomSampler


In [2]:
# ---------------------------
# 0) Device
# ---------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cpu


In [3]:
# ---------------------------
# 1) Feature engineering
# ---------------------------
import ta  # pip install ta
import talib  # optional but used earlier; if missing remove DEMA or use pandas ewm

def add_features(df):
    df = df.copy()
    # ensure columns lower-case/consistent
    # expect df to contain 'open','high','low','close','volume' (case-insensitive)
    df.columns = [c.lower() for c in df.columns]

    # basic indicators (your current set)
    df['rsi'] = ta.momentum.RSIIndicator(df['close'], window=14).rsi()
    df['ema12'] = df['close'].ewm(span=12, adjust=False).mean()
    df['ema26'] = df['close'].ewm(span=26, adjust=False).mean()
    df['macd'] = df['ema12'] - df['ema26']
    df['signal'] = df['macd'].ewm(span=9, adjust=False).mean()
    df['histogram'] = df['macd'] - df['signal']

    # DEMA (if talib available)
    try:
        df['dema9'] = talib.DEMA(df['close'].values, timeperiod=9)
    except Exception:
        df['dema9'] = df['close'].ewm(span=9).mean()  # fallback

    df['sma3'] = ta.trend.sma_indicator(df['close'], window=3)

    # TSI
    def compute_tsi(close, r1=25, r2=13):
        delta = close.diff()
        ema1 = delta.ewm(span=r1, adjust=False).mean()
        ema2 = ema1.ewm(span=r2, adjust=False).mean()
        abs_delta = delta.abs()
        abs_ema1 = abs_delta.ewm(span=r1, adjust=False).mean()
        abs_ema2 = abs_ema1.ewm(span=r2, adjust=False).mean()
        tsi = 100 * (ema2 / (abs_ema2.replace(0, np.nan)))
        return tsi.fillna(0)
    df['tsi'] = compute_tsi(df['close'])

    # Stochastic %K and %D
    period = 14; smooth_k = 3; smooth_d = 3
    lowest_low = df['low'].rolling(period).min()
    highest_high = df['high'].rolling(period).max()
    df['%k'] = 100 * (df['close'] - lowest_low) / (highest_high - lowest_low + 1e-8)
    df['%k'] = df['%k'].rolling(smooth_k).mean()
    df['%d'] = df['%k'].rolling(smooth_d).mean()

    # --- NEW HIGH-IMPACT features ---
    # ATR (volatility)
    df['atr'] = ta.volatility.AverageTrueRange(df['high'], df['low'], df['close'], window=14).average_true_range()
    # On-Balance Volume
    df['obv'] = ta.volume.OnBalanceVolumeIndicator(df['close'], df['volume']).on_balance_volume()
    # ADX (trend strength)
    df['adx'] = ta.trend.ADXIndicator(df['high'], df['low'], df['close'], window=14).adx()
    # CCI
    df['cci'] = ta.trend.cci(df['high'], df['low'], df['close'], window=20)

    # Price-derived features
    df['return_1'] = df['close'].pct_change()
    df['roll_mean_5'] = df['close'].rolling(5).mean()
    df['roll_std_5'] = df['close'].rolling(5).std()

    # drop rows with NaNs resulting from indicators
    df = df.dropna().reset_index(drop=True)
    return df

In [11]:
import pandas as pd
import numpy as np

def label_reversals(df, window=5, atr_period=14, atr_mult=1.5):
    """
    Label reversal points using Swing High/Low + ATR filter.
    
    Parameters
    ----------
    df : pd.DataFrame
        Must contain ['High', 'Low', 'Close'] columns.
    window : int
        Number of candles on each side for swing detection.
    atr_period : int
        Period for ATR calculation.
    atr_mult : float
        Minimum price movement (in multiples of ATR) to qualify as a true reversal.
    
    Returns
    -------
    df : pd.DataFrame
        Original dataframe with a new column 'Label':
          0 = Top reversal
          1 = No reversal
          2 = Bottom reversal
    """

    df = df.copy().reset_index(drop=True)  # âœ… Ensure numeric index

    # --- Calculate ATR (Average True Range) ---
    high_low = df['high'] - df['low']
    high_close = np.abs(df['high'] - df['close'].shift())
    low_close = np.abs(df['low'] - df['close'].shift())
    true_range = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
    df['ATR'] = true_range.rolling(atr_period).mean()

    # --- Detect potential swing highs/lows ---
    df['swing_high'] = df['high'][(df['high'] == df['high'].rolling(window*2+1, center=True).max())]
    df['swing_low']  = df['low'][(df['low'] == df['low'].rolling(window*2+1, center=True).min())]

    # --- Filter out minor reversals using ATR threshold ---
    df['label'] = 1  # default: no reversal

    for i in range(window, len(df) - window):
        atr = df.loc[i, 'ATR']
        if np.isnan(atr):
            continue

        # Check for swing high
        if not np.isnan(df.loc[i, 'swing_high']):
            # Price must have fallen more than atr_mult * ATR afterwards
            future_min = df['low'].iloc[i:i+window].min()
            if df.loc[i, 'high'] - future_min > atr_mult * atr:
                df.loc[i, 'label'] = 0  # Top reversal

        # Check for swing low
        if not np.isnan(df.loc[i, 'swing_low']):
            # Price must have risen more than atr_mult * ATR afterwards
            future_max = df['high'].iloc[i:i+window].max()
            if future_max - df.loc[i, 'low'] > atr_mult * atr:
                df.loc[i, 'label'] = 2  # Bottom reversal

    # --- Clean up temporary columns ---
    df.drop(['swing_high', 'swing_low'], axis=1, inplace=True)
    
    return df


In [5]:
# ---------------------------
# 3) Create sequences (sliding windows)
# ---------------------------
def create_sequences_from_df(df, feature_names, window_size=10):
    X_list = []
    y_list = []
    arr = df[feature_names].values
    labels = df['label'].values
    for i in range(window_size, len(df)):
        X_list.append(arr[i-window_size:i])
        y_list.append(labels[i])
    X = np.array(X_list, dtype=np.float32)
    y = np.array(y_list, dtype=np.int64)
    return X, y


In [6]:
# ---------------------------
# 4) Model: CNN -> LSTM hybrid
# ---------------------------
class CNNLSTM(nn.Module):
    def __init__(self, input_dim, cnn_channels=64, kernel_size=3, lstm_hidden=128, lstm_layers=2, output_dim=3, dropout=0.3):
        super().__init__()
        # Conv1d expects (batch, channels, seq_len) -> we treat features as channels
        self.conv = nn.Sequential(
            nn.Conv1d(in_channels=input_dim, out_channels=cnn_channels, kernel_size=kernel_size, padding=kernel_size//2),
            nn.ReLU(),
            nn.BatchNorm1d(cnn_channels),
            nn.Dropout(dropout),
            nn.Conv1d(in_channels=cnn_channels, out_channels=cnn_channels, kernel_size=kernel_size, padding=kernel_size//2),
            nn.ReLU(),
            nn.BatchNorm1d(cnn_channels),
            nn.Dropout(dropout)
        )
        self.lstm = nn.LSTM(input_size=cnn_channels, hidden_size=lstm_hidden, num_layers=lstm_layers, batch_first=True, dropout=dropout)
        self.bn = nn.BatchNorm1d(lstm_hidden)
        self.fc = nn.Linear(lstm_hidden, output_dim)

    def forward(self, x):
        # x: (batch, seq_len, features)
        x = x.permute(0, 2, 1)             # -> (batch, features, seq_len)
        x = self.conv(x)                  # -> (batch, cnn_channels, seq_len)
        x = x.permute(0, 2, 1)            # -> (batch, seq_len, cnn_channels)
        out, _ = self.lstm(x)             # -> (batch, seq_len, lstm_hidden)
        out = out[:, -1, :]               # last timestep
        out = self.bn(out)                # batchnorm
        out = self.fc(out)
        return out

In [7]:
# ---------------------------
# 5) Training helper functions
# ---------------------------
def compute_class_weights(y):
    classes, counts = np.unique(y, return_counts=True)
    total = counts.sum()
    num_classes = int(classes.max()) + 1
    weights = np.ones(num_classes, dtype=np.float32)
    for c, cnt in zip(classes, counts):
        weights[c] = total / (cnt + 1e-8)
    return torch.tensor(weights, dtype=torch.float32)

def train_one_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for X_batch, y_batch in dataloader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device).squeeze().long()
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * X_batch.size(0)
    return running_loss / len(dataloader.dataset)

def evaluate(model, dataloader, device):
    model.eval()
    preds = []
    trues = []
    with torch.no_grad():
        for X_batch, y_batch in dataloader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device).squeeze().long()
            outputs = model(X_batch)
            pred = torch.argmax(outputs, dim=1).cpu().numpy()
            preds.append(pred)
            trues.append(y_batch.cpu().numpy())
    preds = np.concatenate(preds)
    trues = np.concatenate(trues)
    acc = accuracy_score(trues, preds)
    prec, rec, f1, _ = precision_recall_fscore_support(trues, preds, average='weighted', zero_division=0)
    return acc, prec, rec, f1, trues, preds


In [16]:
# ---------------------------
# 6) Full pipeline function
# ---------------------------
def run_pipeline(df_raw,
                 feature_names=None,
                 window_size=10,
                 test_size=0.2,
                 batch_size=64,
                 epochs=200,
                 cnn_channels=64,
                 lstm_hidden=128,
                 lr=1e-3,
                 weight_decay=1e-5,
                 early_stopping_patience=8,
                 random_state=42):
    # 1) features
    df = add_features(df_raw)
    df = label_reversals(df, window=5, atr_period=14, atr_mult=1.5)
    if feature_names is None:
        # choose non-redundant features recommended earlier
        feature_names = ['rsi','macd','signal','histogram','tsi','%k','%d','atr','obv','adx','cci','return_1','roll_mean_5','roll_std_5']
    # build X,y sequences
    X, y = create_sequences_from_df(df, feature_names, window_size=window_size)

    # scale features (fit on training later) -> we will split first to avoid leakage
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, shuffle=False)  # time order preserved

    # Flatten features for scaler, fit on training set only
    nsamples, seq_len, nfeat = X_train.shape
    scaler = StandardScaler()
    X_train_flat = X_train.reshape(-1, nfeat)
    X_train_flat = scaler.fit_transform(X_train_flat)
    X_train = X_train_flat.reshape(nsamples, seq_len, nfeat)

    # scale test
    X_test_flat = X_test.reshape(-1, nfeat)
    X_test_flat = scaler.transform(X_test_flat)
    X_test = X_test_flat.reshape(X_test.shape[0], seq_len, nfeat)

    joblib.dump(scaler, "scaler_cnn_lstm.pkl")
    print("Saved scaler -> scaler_cnn_lstm.pkl")

    # Convert to tensors
    X_train_t = torch.tensor(X_train, dtype=torch.float32)
    y_train_t = torch.tensor(y_train, dtype=torch.long).unsqueeze(1)
    X_test_t = torch.tensor(X_test, dtype=torch.float32)
    y_test_t = torch.tensor(y_test, dtype=torch.long).unsqueeze(1)

    # compute class weights and sampler for balanced batch sampling
    class_weights = compute_class_weights(y_train).to(device)
    print("Class weights:", class_weights.cpu().numpy())

    # WeightedRandomSampler to mitigate imbalance while training
    # weight for each sample = 1 / count(label)
    labels_np = y_train
    class_sample_count = np.array([len(np.where(labels_np == t)[0]) for t in np.unique(labels_np)])
    weight = 1. / class_sample_count
    samples_weight = np.array([weight[t] for t in labels_np])
    samples_weight = torch.from_numpy(samples_weight).double()
    sampler = WeightedRandomSampler(samples_weight, num_samples=len(samples_weight), replacement=True)

    train_dataset = TensorDataset(X_train_t, y_train_t)
    test_dataset = TensorDataset(X_test_t, y_test_t)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, drop_last=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # model init
    model = CNNLSTM(input_dim=nfeat, cnn_channels=cnn_channels, lstm_hidden=lstm_hidden, lstm_layers=2, output_dim=int(y.max())+1).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    criterion = nn.CrossEntropyLoss(weight=class_weights)

    # scheduler & early stopping
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.7, patience=4)
    best_val = -np.inf
    best_epoch = -1
    patience = early_stopping_patience

    for epoch in range(1, epochs+1):
        start = time.time()
        train_loss = train_one_epoch(model, train_loader, criterion, optimizer, device)
        val_acc, val_prec, val_rec, val_f1, _, _ = evaluate(model, test_loader, device)
        scheduler.step(val_acc)  # use val acc as monitor

        elapsed = time.time() - start
        print(f"Epoch {epoch}/{epochs} | train_loss={train_loss:.5f} val_acc={val_acc:.4f} val_f1={val_f1:.4f} time={elapsed:.1f}s")

        """# early stopping on val acc
        if val_acc > best_val + 1e-5:
            best_val = val_acc
            best_epoch = epoch
            torch.save(model.state_dict(), "best_cnn_lstm.pth")
            print("  Saved best model at epoch", epoch)
            patience = early_stopping_patience  # reset patience
        else:
            patience -= 1
            if patience <= 0:
                print("Early stopping triggered.")
                break"""

    # load best model and final evaluation
    model.load_state_dict(torch.load("best_cnn_lstm.pth", map_location=device))
    final_acc, final_prec, final_rec, final_f1, trues, preds = evaluate(model, test_loader, device)
    print("Final Test -- acc: %.4f prec: %.4f rec: %.4f f1: %.4f" % (final_acc, final_prec, final_rec, final_f1))

    # Save final artifacts
    torch.save(model.state_dict(), "cnn_lstm_final.pth")
    print("Saved final model -> cnn_lstm_final.pth")

    return {
        'model': model,
        'scaler': scaler,
        'feature_names': feature_names,
        'metrics': (final_acc, final_prec, final_rec, final_f1),
        'y_true': trues,
        'y_pred': preds
    }


In [19]:
# load your OHLCV csv into df_raw
import ccxt
import pandas as pd
import numpy as np

# Initialize Binance
exchange = ccxt.binance()
symbol = 'BTC/USDT'
timeframe = '15m'  # hourly candles
limit = 10000  # number of candles to fetch

# Fetch OHLCV data
ohlcv = exchange.fetch_ohlcv(symbol, timeframe=timeframe, limit=limit)

# Convert to DataFrame
df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
df.set_index('timestamp', inplace=True)

In [20]:
# ---------------------------
# 7) Usage example
# ---------------------------
if __name__ == "__main__":
    

    greg = run_pipeline(df)
    print("Pipeline complete. Metrics:", greg['metrics'])
    

Saved scaler -> scaler_cnn_lstm.pkl
Class weights: [19.897436   1.1165468 18.47619  ]
Epoch 1/200 | train_loss=0.97363 val_acc=0.0564 val_f1=0.0089 time=0.9s
Epoch 2/200 | train_loss=0.76122 val_acc=0.0462 val_f1=0.0060 time=1.1s
Epoch 3/200 | train_loss=0.53735 val_acc=0.0513 val_f1=0.0074 time=1.0s
Epoch 4/200 | train_loss=0.51110 val_acc=0.0821 val_f1=0.0587 time=0.8s
Epoch 5/200 | train_loss=0.44447 val_acc=0.0513 val_f1=0.0075 time=0.9s
Epoch 6/200 | train_loss=0.39505 val_acc=0.0513 val_f1=0.0076 time=0.9s
Epoch 7/200 | train_loss=0.32863 val_acc=0.0513 val_f1=0.0074 time=0.9s
Epoch 8/200 | train_loss=0.33831 val_acc=0.0513 val_f1=0.0074 time=0.9s
Epoch 9/200 | train_loss=0.27995 val_acc=0.0564 val_f1=0.0086 time=0.8s
Epoch 10/200 | train_loss=0.25859 val_acc=0.0564 val_f1=0.0087 time=0.7s
Epoch 11/200 | train_loss=0.23816 val_acc=0.0718 val_f1=0.0389 time=0.7s
Epoch 12/200 | train_loss=0.25633 val_acc=0.0513 val_f1=0.0074 time=0.6s
Epoch 13/200 | train_loss=0.30350 val_acc=0.056