# 🤖 ML Trading Strategies - Getting Started Tutorial

This notebook provides a step-by-step guide to using the ML Trading Strategies framework.

## 📚 What You'll Learn

1. Loading market data
2. Engineering technical indicators
3. Training machine learning models
4. Backtesting your strategy
5. Analyzing performance metrics

## ⚠️ Important Note

This is for educational purposes only. Not financial advice.

In [None]:
# Import necessary libraries
import sys
import os

# Add src to path
sys.path.insert(0, os.path.abspath('../src'))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set plot style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("✅ Libraries imported successfully!")

## 1️⃣ Step 1: Load Market Data

We'll use synthetic data for this tutorial to avoid internet dependency.

In [None]:
from utils.data_loader import DataLoader

# Initialize data loader
loader = DataLoader()

# Generate synthetic data (no internet required)
data = loader.generate_synthetic_data(
    n_days=1000,
    start_price=100.0,
    volatility=0.02,
    trend=0.0002,
    random_state=42
)

print(f"📊 Generated {len(data)} days of data")
print(f"Date range: {data.index[0]} to {data.index[-1]}")
print(f"\nFirst few rows:")
data.head()

In [None]:
# Visualize the price data
plt.figure(figsize=(14, 6))
plt.plot(data.index, data['close'], label='Close Price', linewidth=1.5)
plt.title('📈 Synthetic Stock Price Over Time', fontsize=16, fontweight='bold')
plt.xlabel('Date', fontsize=12)
plt.ylabel('Price ($)', fontsize=12)
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Show basic statistics
print("\n📊 Price Statistics:")
print(data['close'].describe())

## 2️⃣ Step 2: Feature Engineering

Add technical indicators to the data.

In [None]:
from features.technical_indicators import TechnicalIndicators

# Initialize indicators
indicators = TechnicalIndicators()

# Add all technical indicators
data_with_features = indicators.add_all_features(data.copy())

print(f"✅ Added features! Dataset now has {data_with_features.shape[1]} columns")
print(f"\nFeatures added: {data_with_features.shape[1] - data.shape[1]} indicators")
print(f"\nSample features:")
print(data_with_features.columns.tolist()[:10])

In [None]:
# Visualize some key indicators
fig, axes = plt.subplots(3, 1, figsize=(14, 10))

# Plot 1: Price with Moving Averages
axes[0].plot(data_with_features.index, data_with_features['close'], label='Close', linewidth=1.5)
axes[0].plot(data_with_features.index, data_with_features['sma_20'], label='SMA 20', alpha=0.7)
axes[0].plot(data_with_features.index, data_with_features['sma_50'], label='SMA 50', alpha=0.7)
axes[0].set_title('Price with Moving Averages', fontweight='bold')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Plot 2: RSI
axes[1].plot(data_with_features.index, data_with_features['rsi_14'], label='RSI', color='purple', linewidth=1.5)
axes[1].axhline(y=70, color='r', linestyle='--', alpha=0.5, label='Overbought')
axes[1].axhline(y=30, color='g', linestyle='--', alpha=0.5, label='Oversold')
axes[1].set_title('Relative Strength Index (RSI)', fontweight='bold')
axes[1].set_ylim(0, 100)
axes[1].legend()
axes[1].grid(True, alpha=0.3)

# Plot 3: MACD
axes[2].plot(data_with_features.index, data_with_features['macd'], label='MACD', linewidth=1.5)
axes[2].plot(data_with_features.index, data_with_features['macd_signal'], label='Signal', linewidth=1.5)
axes[2].bar(data_with_features.index, data_with_features['macd_hist'], label='Histogram', alpha=0.3)
axes[2].set_title('MACD Indicator', fontweight='bold')
axes[2].legend()
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 3️⃣ Step 3: Create Target Variable

Define what we want to predict - future price movements.

In [None]:
# Create target variable for 5-day forward returns
data_with_features['target'] = loader.create_target_variable(
    data_with_features,
    horizon=5,
    threshold=0.01,  # 1% threshold
    binary=False
)

# Remove NaN values
data_with_features = data_with_features.dropna()

print(f"✅ Target variable created!")
print(f"\nTarget distribution:")
print(data_with_features['target'].value_counts())
print(f"\nClass balance:")
print(data_with_features['target'].value_counts(normalize=True) * 100)

## 4️⃣ Step 4: Prepare Training Data

Split data into training, validation, and test sets.

In [None]:
# Prepare data for training
X_train, X_val, X_test, y_train, y_val, y_test = loader.prepare_training_data(
    data_with_features,
    target_col='target',
    train_ratio=0.7,
    val_ratio=0.15,
    test_ratio=0.15
)

print(f"📊 Data Split:")
print(f"Training set: {X_train.shape[0]} samples")
print(f"Validation set: {X_val.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")
print(f"\nNumber of features: {X_train.shape[1]}")

## 5️⃣ Step 5: Train Machine Learning Models

Train multiple models and compare their performance.

In [None]:
from models.ml_models import TradingModel, EnsembleModel

# Train Random Forest
print("🌲 Training Random Forest...")
rf_model = TradingModel(model_type='random_forest', n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train, X_val, y_val)

# Train XGBoost
print("🚀 Training XGBoost...")
xgb_model = TradingModel(model_type='xgboost', n_estimators=100, random_state=42)
xgb_model.fit(X_train, y_train, X_val, y_val)

# Train LightGBM
print("💡 Training LightGBM...")
lgb_model = TradingModel(model_type='lightgbm', n_estimators=100, random_state=42)
lgb_model.fit(X_train, y_train, X_val, y_val)

print("\n✅ All models trained!")

In [None]:
# Evaluate models on validation set
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

models = {
    'Random Forest': rf_model,
    'XGBoost': xgb_model,
    'LightGBM': lgb_model
}

results = []
for name, model in models.items():
    preds = model.predict(X_val)
    results.append({
        'Model': name,
        'Accuracy': accuracy_score(y_val, preds),
        'Precision': precision_score(y_val, preds, average='weighted'),
        'Recall': recall_score(y_val, preds, average='weighted'),
        'F1 Score': f1_score(y_val, preds, average='weighted')
    })

results_df = pd.DataFrame(results)
print("\n📊 Validation Performance:")
print(results_df.to_string(index=False))

# Visualize results
fig, ax = plt.subplots(figsize=(10, 6))
results_df.set_index('Model')[['Accuracy', 'Precision', 'Recall', 'F1 Score']].plot(kind='bar', ax=ax)
plt.title('Model Performance Comparison', fontsize=16, fontweight='bold')
plt.ylabel('Score', fontsize=12)
plt.xticks(rotation=45)
plt.legend(loc='lower right')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 6️⃣ Step 6: Create Ensemble Model

Combine models for better performance.

In [None]:
# Create ensemble
ensemble = EnsembleModel([rf_model, xgb_model, lgb_model], voting='soft')
ensemble.fit(X_train, y_train, X_val, y_val)

# Evaluate ensemble
ensemble_preds = ensemble.predict(X_val)
print("\n🎯 Ensemble Model Performance:")
print(f"Accuracy: {accuracy_score(y_val, ensemble_preds):.4f}")
print(f"Precision: {precision_score(y_val, ensemble_preds, average='weighted'):.4f}")
print(f"Recall: {recall_score(y_val, ensemble_preds, average='weighted'):.4f}")
print(f"F1 Score: {f1_score(y_val, ensemble_preds, average='weighted'):.4f}")

## 7️⃣ Step 7: Feature Importance Analysis

In [None]:
# Get feature importance from Random Forest
feature_importance = rf_model.get_feature_importance(top_n=15)

print("\n🎯 Top 15 Most Important Features:")
print(feature_importance)

# Visualize feature importance
plt.figure(figsize=(12, 8))
plt.barh(range(len(feature_importance)), feature_importance.values())
plt.yticks(range(len(feature_importance)), feature_importance.keys())
plt.xlabel('Importance', fontsize=12)
plt.title('Top 15 Feature Importance (Random Forest)', fontsize=16, fontweight='bold')
plt.gca().invert_yaxis()
plt.grid(True, alpha=0.3, axis='x')
plt.tight_layout()
plt.show()

## 8️⃣ Step 8: Backtesting

Test the strategy on unseen data.

In [None]:
from backtesting.backtest_engine import BacktestEngine

# Initialize backtest engine
backtest = BacktestEngine(
    initial_capital=100000,
    commission=0.001,  # 0.1%
    slippage=0.0005    # 0.05%
)

# Generate signals from ensemble predictions
predictions = ensemble.predict_proba(X_test)
signals = backtest.generate_signals_from_predictions(predictions, threshold=0.55)

# Get corresponding test data with dates
test_data = data_with_features.iloc[-len(X_test):].copy()

# Run backtest
results = backtest.run_backtest(test_data, signals, price_col='close')

print("\n" + "="*80)
print("BACKTEST RESULTS")
print("="*80)
print(f"Initial Capital:       ${results.initial_capital:,.2f}")
print(f"Final Portfolio Value: ${results.final_value:,.2f}")
print(f"Total Return:          {results.total_return:.2%}")
print(f"Annualized Return:     {results.annualized_return:.2%}")
print(f"Sharpe Ratio:          {results.sharpe_ratio:.2f}")
print(f"Max Drawdown:          {results.max_drawdown:.2%}")
print(f"Win Rate:              {results.win_rate:.2%}")
print(f"Profit Factor:         {results.profit_factor:.2f}")
print(f"Total Trades:          {results.total_trades}")
print("="*80)

In [None]:
# Visualize portfolio performance
portfolio_values = results.portfolio_values

fig, axes = plt.subplots(2, 1, figsize=(14, 10))

# Plot 1: Portfolio Value Over Time
axes[0].plot(portfolio_values.index, portfolio_values, linewidth=2, color='green')
axes[0].axhline(y=results.initial_capital, color='blue', linestyle='--', alpha=0.5, label='Initial Capital')
axes[0].set_title('Portfolio Value Over Time', fontsize=16, fontweight='bold')
axes[0].set_ylabel('Value ($)', fontsize=12)
axes[0].legend()
axes[0].grid(True, alpha=0.3)
axes[0].fill_between(portfolio_values.index, results.initial_capital, portfolio_values, 
                      where=(portfolio_values >= results.initial_capital), alpha=0.3, color='green')
axes[0].fill_between(portfolio_values.index, results.initial_capital, portfolio_values, 
                      where=(portfolio_values < results.initial_capital), alpha=0.3, color='red')

# Plot 2: Drawdown
drawdowns = results.drawdowns
axes[1].fill_between(drawdowns.index, 0, drawdowns * 100, color='red', alpha=0.5)
axes[1].set_title('Drawdown Over Time', fontsize=16, fontweight='bold')
axes[1].set_xlabel('Date', fontsize=12)
axes[1].set_ylabel('Drawdown (%)', fontsize=12)
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 🎉 Conclusion

You've successfully:
- ✅ Loaded market data
- ✅ Created technical indicators
- ✅ Trained multiple ML models
- ✅ Created an ensemble model
- ✅ Analyzed feature importance
- ✅ Backtested your strategy
- ✅ Visualized performance metrics

## 📚 Next Steps

1. Try different technical indicators
2. Tune hyperparameters with Optuna
3. Experiment with different prediction horizons
4. Test on real market data
5. Implement custom trading strategies

## ⚠️ Remember

This is for **educational purposes only**. Always:
- Thoroughly test any strategy before using real money
- Understand the risks involved in trading
- Consult with financial professionals
- Never risk more than you can afford to lose