In [1]:
import sys
from pathlib import Path

# Add user_data directory to path for crypto_analysis package
user_data_path = Path.cwd().parent
data_dir = user_data_path / "data" / "binance"
sys.path.insert(0, str(user_data_path))

In [7]:
from crypto_analysis import (
    DatasetBuilder,
    DataPreprocessor,
    SequenceValidator,
    SignalDataset,
    create_sequences,
    LSTMSignalPredictor,
    ModelConfig,
    WeightedSignalLoss,
    Trainer,
    TrainingConfig,
    Predictor,
    LSTMMetaheuristicOptimizer,
)


In [3]:
# 1. Build dataset
builder = DatasetBuilder(data_dir=data_dir, period_hours=4, n_workers=15, signal_shift=4)
df = builder.build('DOGE', 
                   threshold_pct=1.5,
                   hyperopt=False,
                   verbose=True)

Generating target signals for DOGE...
Initial index (0): entry=140, exit=140
Found optimal index (0): entry=140, exit=140
Processing 33 indicators with 15 workers...
Signal shift: 4 steps (indicators predict t+4)
  [RSI] Grid search optimization...
  [MACD] Grid search optimization...
  [STOCH] Grid search optimization...
  [STOCHRSI] Grid search optimization...
  [CCI] Grid search optimization...
  [MFI] Grid search optimization...
  [WILLR] Grid search optimization...
  [CMO] Grid search optimization...
  [ADX] Grid search optimization...
  [MOM] Grid search optimization...
  [ROC] Grid search optimization...
  [TRIX] Grid search optimization...
  [ULTOSC] Grid search optimization...
  [APO] Grid search optimization...
  [PPO] Grid search optimization...
  [TRIX] Grid search done - score: 16
  [BOP] Grid search optimization...
Completed 1/33: TRIX
  [BOP] Grid search done - score: 0
  [AROON] Grid search optimization...
Completed 2/33: BOP
  [MOM] Grid search done - score: 48
  [AROO

In [None]:
# # 2. Preprocess
# preprocessor = DataPreprocessor(target_shift=4)
# features, targets = preprocessor.fit_transform(df)

# # 3. Create sequences
# feat_seqs, tgt_seqs = create_sequences(features, targets, input_seq_length=12, output_seq_length=4)

# 4. Validate and filter
# validator = SequenceValidator()
# feat_seqs, tgt_seqs, seq_types = validator.filter_valid_sequences(feat_seqs, tgt_seqs)

# # 5. Create dataset
# dataset = SignalDataset(feat_seqs, tgt_seqs, seq_types)

# # 6. Train
# config = TrainingConfig(
#     epochs=100,
#     auto_class_weights=True,
#     class_weight_power=1.5,     # Even stronger (was 1.0)
#     focal_loss=True,
#     focal_gamma=4.0,            # More aggressive (was 3.0)
#     label_smoothing=0.15,
#     dropout=0.5,
#     weight_decay=0.01,          # Stronger L2
#     hidden_size=64,             # Smaller model in ModelConfig
#     patience=10,
#     checkpoint_dir='checkpoints/'
# )

# # Also use smaller model
# model_config = ModelConfig(input_size=preprocessor.get_num_features(), hidden_size=64, num_layers=1)


# model = LSTMSignalPredictor(model_config)
# trainer = Trainer(model, config, preprocessor=preprocessor)
# history = trainer.train(dataset)


# # Or evaluate silently and print later
# metrics = trainer.evaluate_all()
# trainer.print_evaluation_report(metrics)

# from crypto_analysis import Predictor

# # Load your trained model
# predictor = Predictor.from_checkpoint(
#     'checkpoints/best_model.pt',
#     'checkpoints/preprocessor.pkl'
# )

# # Find optimal threshold on validation data
# threshold_results = predictor.find_optimal_threshold(
#     df=df.tail(int(df.shape[0]*0.2)),
#     thresholds=[0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
#     metric='f1',  # or 'precision', 'recall'
#     verbose=True
# )

# print(f"Best threshold: {threshold_results['best_threshold']}")

# # Evaluate performance with thresholded predictions
# metrics = predictor.evaluate_with_threshold(
#     df.tail(int(df.shape[0]*0.2)),
#     entry_threshold=0.7,
#     exit_threshold=0.8
# )

# print(f"Entry Precision: {metrics['entry_precision']:.4f}")
# print(f"Entry Recall: {metrics['entry_recall']:.4f}")
# print(f"Entry F1: {metrics['entry_f1']:.4f}")

# print(f"Exit Precision: {metrics['exit_precision']:.4f}")
# print(f"Exit Recall: {metrics['exit_recall']:.4f}")
# print(f"Exit F1: {metrics['exit_f1']:.4f}")

In [8]:
# Optimize
optimizer = LSTMMetaheuristicOptimizer(
    df=df,
    pop_size=15,
    iterations=50,
    n_workers=15,
    np_neighbors=2,
    pf_max=0.20,
    epochs_per_eval=100,
    checkpoint_interval=5,
    elitist_selection=True,
    elitist_constant=0.25,
    verbose=True,
    enable_logging=True,
)
result = optimizer.optimize()

print(f"Best fitness: {result.best_fitness}")
print(f"Selected features ({result.n_features_selected}): {result.selected_features}")
print(f"Best params: {result.best_params}")


LSTMMetaheuristicOptimizer (APO) initialized:
  - DataFrame mode: binary
  - Feature columns: 71
  - Hyperparameters: 11
  - Total dimension: 82
  - Population size: 15
  - Iterations: 50
  - Workers: 15
  - APO np_neighbors: 2
  - APO pf_max: 0.2
  - Elitist selection: True
  - Elitist constant: 0.25
  - Correlation vector: min=0.0000, max=1.0000, mean=0.1559

Starting APO Metaheuristic Optimization
Run ID: 72074f99

Evaluating initial population...
iter:-1 indv:0 fitness:0.0214 features:36
iter:-1 indv:4 fitness:0.2147 features:39
iter:-1 indv:5 fitness:0.0146 features:35
iter:-1 indv:10 fitness:0.1890 features:42
iter:-1 indv:3 fitness:0.0000 features:43
iter:-1 indv:6 fitness:0.0063 features:43
iter:-1 indv:8 fitness:0.0062 features:31
iter:-1 indv:2 fitness:0.0186 features:29
iter:-1 indv:9 fitness:0.3124 features:31
iter:-1 indv:14 fitness:0.1884 features:36
iter:-1 indv:12 fitness:0.0000 features:37
iter:-1 indv:13 fitness:0.1080 features:35
iter:-1 indv:11 fitness:0.0157 featur

KeyboardInterrupt: 

In [None]:
trainer = optimizer.train_from_result(result, epochs=100)

In [None]:
# # Load your trained model
# predictor = Predictor.from_checkpoint(
#     'checkpoints/best_model.pt',
#     'checkpoints/preprocessor.pkl'
# )

# # Find optimal threshold on validation data
# threshold_results = predictor.find_optimal_threshold(
#     df=df.tail(int(df.shape[0]*0.2)),
#     thresholds=[0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
#     metric='f1',  # or 'precision', 'recall'
#     verbose=True
# )

# print(f"Best threshold: {threshold_results['best_threshold']}")



In [None]:
# # Evaluate performance with thresholded predictions
# metrics = predictor.evaluate_with_threshold(
#     df.tail(int(df.shape[0]*0.2)),
#     entry_threshold=0.3,
#     exit_threshold=0.3
# )

# print(f"Entry Precision: {metrics['entry_precision']:.4f}")
# print(f"Entry Recall: {metrics['entry_recall']:.4f}")
# print(f"Entry F1: {metrics['entry_f1']:.4f}")

# print(f"Exit Precision: {metrics['exit_precision']:.4f}")
# print(f"Exit Recall: {metrics['exit_recall']:.4f}")
# print(f"Exit F1: {metrics['exit_f1']:.4f}")

In [None]:
# predictor.predict_with_threshold(df.tail(int(df.shape[0]*0.2)), 0.9, 0.9).labels

In [None]:
from crypto_analysis.signal_population import SignalPopulator

# Test with DOGE and period_hours=4
populator = SignalPopulator(data_dir=data_dir, period_hours=4)
df_signals = populator.populate_signals("DOGE", threshold_pct=1.5)

print(f"\nDataFrame shape: {df_signals.shape}")
print(f"Entry signals: {(df_signals['signal'] == 'entry').sum()}")
print(f"Exit signals: {(df_signals['signal'] == 'exit').sum()}")