In [None]:
from dotenv import load_dotenv

from core.data_sources.clob import CLOBDataSource
import warnings

from core.notifiers import NotificationManager, NotificationMessage

warnings.filterwarnings("ignore")

load_dotenv()

# Main class to access central limit order book connectors
clob = CLOBDataSource()

In [None]:
# Configuration Parameters
CONFIG = {
    'connector_name': 'binance_perpetual',
    'interval': '1m',
    'ema_lengths': [20, 200, 500],
    'signal_intensity_threshold': 0.7,
    'min_range_pct': 0.01,
    'rolling_window': 1000,
}

# Load candles from cache
clob.load_candles_cache()
candles = []
for key, value in clob.candles_cache.items():
    if key[-1] == CONFIG['interval'] and key[0] == CONFIG['connector_name']:
        candles.append(value)

## Improved Feature Engineering System

Using the streamlined feature engineering system with:
- **One feature per strategy/calculation** (not per indicator)
- Flexible value types: float, List[float], or Dict[str, float]
- MongoDB-only storage for production use
- Standardized signal model with -1 to 1 scale
- Type-safe Pydantic models

In [None]:
# Import the new feature engineering system
from core.features import FeatureStorage, Feature, Signal
from core.features.candles.ema_trend import EMATrend, EMATrendConfig

# Initialize feature storage (MongoDB only)
storage = FeatureStorage()
await storage.connect()

In [None]:
# Initialize the EMA trend feature calculator
ema_config = EMATrendConfig(
    ema_lengths=CONFIG['ema_lengths'],
    rolling_window=CONFIG['rolling_window']
)

ema_trend = EMATrend(feature_config=ema_config)

print(f"✅ Initialized EMA Trend feature calculator")
print(f"   EMA lengths: {ema_config.ema_lengths}")
print(f"   Rolling window: {ema_config.rolling_window}")
print(f"\n📊 This creates ONE feature per pair containing all EMA trend data")

In [None]:
# Generate features and signals using the improved system
all_features = []
all_signals = []

signals_for_display = []

for candle in candles:
    try:
        # Pass the Candles object directly - much cleaner!
        feature = ema_trend.create_feature(candle)
        all_features.append(feature)
        
        # Create signal if criteria met
        signal = ema_trend.create_signal(
            candle,
            min_intensity=CONFIG['signal_intensity_threshold'],
            min_range_pct=CONFIG['min_range_pct']
        )
        
        if signal:
            all_signals.append(signal)
            
            # Collect for reporting (signal.value ranges from -1 to 1)
            signals_for_display.append({
                'trading_pair': signal.trading_pair,
                'category': signal.category,
                'signal_value': signal.value,
                'direction': 'LONG' if signal.value > 0 else 'SHORT',
                'intensity': abs(signal.value),
                'range_pct': feature.value['range_pct'],
                'ema_divergence': feature.value['divergence']
            })
                
    except Exception as e:
        print(f"Error processing {candle.trading_pair}: {e}")

print(f"\n✅ Processed {len(candles)} pairs")
print(f"📊 Generated {len(all_features)} features (1 per pair)")
print(f"🎯 Generated {len(all_signals)} signals")

In [None]:
# Example: Show what a single feature looks like
if len(candles) > 0:
    example_candle = candles[0]
    
    # Now we just pass the Candles object - it contains everything!
    example_feature = ema_trend.create_feature(example_candle)
    
    print(f"📝 Example Feature for {example_feature.trading_pair}:")
    print(f"   Feature name: {example_feature.feature_name}")
    print(f"   Connector: {example_feature.connector_name}")
    print(f"   Timestamp: {example_feature.timestamp}")
    print(f"\n   Value (dict with all EMA trend data):")
    for key, val in example_feature.value.items():
        if isinstance(val, float):
            print(f"     • {key}: {val:.6f}")
        else:
            print(f"     • {key}: {val}")
    
    print(f"\n   Info (metadata):")
    for key, val in example_feature.info.items():
        print(f"     • {key}: {val}")

### Visualize Feature

Features now have built-in visualization methods!

In [None]:
# Visualize the feature on candlestick chart
if len(candles) > 0:
    example_candle = candles[0]
    
    # The feature can plot itself!
    print(f"📈 Plotting {example_candle.trading_pair} with EMA Trend feature...")
    ema_trend.plot(example_candle, height=600, width=1400)

In [None]:
# Or manually add to an existing figure
if len(candles) > 0:
    example_candle = candles[0]
    
    # Start with candles
    fig = example_candle.candles_fig(height=600, width=1400)
    
    # Add the EMA trend feature
    fig = ema_trend.add_to_fig(fig, example_candle)
    
    # Could add more features here...
    # fig = another_feature.add_to_fig(fig, example_candle)
    
    fig.show()

In [None]:
# Save features and signals to MongoDB
print("Saving features and signals to MongoDB...")

# Save features
await storage.save_features(all_features)

# Save signals
await storage.save_signals(all_signals)

print(f"✅ Saved {len(all_features)} features")
print(f"✅ Saved {len(all_signals)} signals")

In [None]:
import pandas as pd

# Display signals in DataFrame format
signals_df = pd.DataFrame(signals_for_display).sort_values(by='intensity', ascending=False)

print(f"Total signals: {len(signals_df)}")
print(f"Long signals: {len(signals_df[signals_df['direction'] == 'LONG'])}")
print(f"Short signals: {len(signals_df[signals_df['direction'] == 'SHORT'])}")

# Show top signals
print("\\nTop 10 Signals:")
display(signals_df.head(10))

### Summary Statistics

In [None]:
# Summary statistics
print("=" * 60)
print("FEATURE ENGINEERING SUMMARY")
print("=" * 60)

print(f"\n📊 Data Processing:")
print(f"   • Pairs processed: {len(candles)}")
print(f"   • Features generated: {len(all_features)} (1 per pair)")
print(f"   • Signals generated: {len(all_signals)}")

if len(signals_df) > 0:
    print(f"\n🎯 Signal Breakdown:")
    print(f"   • Long signals (value > 0): {len(signals_df[signals_df['direction'] == 'LONG'])}")
    print(f"   • Short signals (value < 0): {len(signals_df[signals_df['direction'] == 'SHORT'])}")
    print(f"   • Average intensity: {signals_df['intensity'].mean():.3f}")
    print(f"   • Max intensity: {signals_df['intensity'].max():.3f}")
    
print(f"\n💾 Storage:")
print(f"   • Features saved to MongoDB: {len(all_features)}")
print(f"   • Signals saved to MongoDB: {len(all_signals)}")
print(f"   • Collection: 'features' and 'signals'")

print(f"\n✅ Efficiency Gain:")
old_approach = len(candles) * 5  # 5 features per pair in old approach
new_approach = len(all_features)  # 1 feature per pair
reduction = ((old_approach - new_approach) / old_approach * 100) if old_approach > 0 else 0
print(f"   • Old approach would create: ~{old_approach} documents")
print(f"   • New approach creates: {new_approach} documents")
print(f"   • Reduction: {reduction:.0f}%")

print("=" * 60)

## Query Stored Features and Signals

Demonstrate retrieving features and signals from storage

## Use Signals for Trading

Now that we have signals, let's prepare them for deployment

In [None]:
# Get top signals for trading
top_long_signals = signals_df[signals_df['direction'] == 'LONG'].head(3)
top_short_signals = signals_df[signals_df['direction'] == 'SHORT'].head(3)

print("🟢 Top 3 Long Signals for Trading:")
display(top_long_signals)

print("\n🔴 Top 3 Short Signals for Trading:")
display(top_short_signals)

# For each signal, we can get the full feature data from storage
if len(top_long_signals) > 0:
    example_pair = top_long_signals.iloc[0]['trading_pair']
    
    # Get the feature to access grid levels
    pair_features = await storage.get_features(
        feature_name="ema_trend",
        trading_pair=example_pair,
        connector_name=CONFIG['connector_name'],
        limit=1
    )
    
    if pair_features:
        feat = pair_features[0]
        range_pct = feat.value['range_pct']
        price = feat.value['price']
        
        print(f"\n💡 Example Grid Levels for {example_pair}:")
        print(f"   Current Price: ${price:.6f}")
        print(f"   Range %: {range_pct*100:.2f}%")
        
        # Calculate grid levels (same logic as before)
        start_price = price * (1 - 0.5 * range_pct)
        end_price = price * (1 + 1.5 * range_pct)
        limit_price = price * (1 - 0.7 * range_pct)
        
        print(f"   Start Price: ${start_price:.6f}")
        print(f"   End Price: ${end_price:.6f}")
        print(f"   Limit Price: ${limit_price:.6f}")

In [None]:
# Query recent high-intensity long signals (value > 0.8)
recent_long_signals = await storage.get_signals(
    category='tf',  # trend following
    min_value=0.8,  # Strong long signals
    limit=5
)

print(f"Found {len(recent_long_signals)} high-intensity long signals:")
for sig in recent_long_signals:
    print(f"  {sig.trading_pair}: value={sig.value:.3f} ({sig.signal_name})")

In [None]:
# Query features for a specific trading pair
example_pair = signals_for_display[0]['trading_pair'] if signals_for_display else "BTC-USDT"

features_for_pair = await storage.get_features(
    feature_name="ema_trend",
    trading_pair=example_pair,
    connector_name=CONFIG['connector_name'],
    limit=1
)

if features_for_pair:
    feat = features_for_pair[0]
    print(f"EMA Trend feature for {example_pair}:")
    print(f"  Timestamp: {feat.timestamp}")
    print(f"\\n  Values:")
    for key, val in feat.value.items():
        if isinstance(val, float):
            print(f"    {key}: {val:.6f}")
        else:
            print(f"    {key}: {val}")
    if feat.info:
        print(f"\\n  Info:")
        for key, val in feat.info.items():
            print(f"    {key}: {val}")
else:
    print(f"No features found for {example_pair}")