# Profit Optimizer Model v1.0

**Goal: Maximum profit with minimum drawdown. Beat the market.**

This is NOT a prediction model. It's a trading system that:
- Learns from actual trading outcomes (P&L)
- Optimizes for Sharpe ratio (risk-adjusted returns)
- Reinforces winning patterns, avoids losing ones
- The ONLY metric that matters is profit

In [None]:
# Setup
!pip install xgboost scikit-learn pandas numpy matplotlib seaborn optuna -q

import os
if os.path.exists('gold-ml-trading'):
    %cd gold-ml-trading
    !git pull
else:
    !git clone https://github.com/altommo/gold-ml-trading.git
    %cd gold-ml-trading

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
import optuna
from optuna.samplers import TPESampler
import joblib
import json
import warnings
warnings.filterwarnings('ignore')
optuna.logging.set_verbosity(optuna.logging.WARNING)

print("Libraries loaded!")

In [None]:
# ============================================================
# INDICATOR FUNCTIONS
# ============================================================

def calculate_wavetrend(df, n1=10, n2=21):
    df = df.copy()
    ap = (df['high'] + df['low'] + df['close']) / 3
    esa = ap.ewm(span=n1, adjust=False).mean()
    d = (ap - esa).abs().ewm(span=n1, adjust=False).mean()
    ci = (ap - esa) / (0.015 * d)
    df['wt1'] = ci.ewm(span=n2, adjust=False).mean()
    df['wt2'] = df['wt1'].rolling(4).mean()
    return df

def calculate_wolfpack(df):
    df = df.copy()
    df['wolfpack'] = df['close'].ewm(span=3, adjust=False).mean() - df['close'].ewm(span=8, adjust=False).mean()
    return df

def calculate_rsi(df, period=14):
    df = df.copy()
    delta = df['close'].diff()
    gain = delta.clip(lower=0).rolling(period).mean()
    loss = (-delta.clip(upper=0)).rolling(period).mean()
    df['rsi'] = 100 - (100 / (1 + gain / loss))
    return df

def calculate_atr(df, period=14):
    df = df.copy()
    df['atr'] = (df['high'] - df['low']).rolling(period).mean()
    df['atr_pct'] = df['atr'] / df['close'] * 100
    return df

def calculate_moving_averages(df):
    df = df.copy()
    df['ma20'] = df['close'].rolling(20).mean()
    df['ma50'] = df['close'].rolling(50).mean()
    df['ma200'] = df['close'].rolling(200).mean()
    df['price_vs_ma20'] = (df['close'] - df['ma20']) / df['ma20'] * 100
    df['price_vs_ma50'] = (df['close'] - df['ma50']) / df['ma50'] * 100
    df['price_vs_ma200'] = (df['close'] - df['ma200']) / df['ma200'] * 100
    return df

def calculate_returns(df):
    df = df.copy()
    df['ret_1h'] = df['close'].pct_change() * 100
    df['ret_4h'] = df['close'].pct_change(4) * 100
    df['ret_24h'] = df['close'].pct_change(24) * 100
    return df

def calculate_bollinger_bands(df, period=20, std_dev=2):
    df = df.copy()
    df['bb_mid'] = df['close'].rolling(period).mean()
    df['bb_std'] = df['close'].rolling(period).std()
    df['bb_upper'] = df['bb_mid'] + (df['bb_std'] * std_dev)
    df['bb_lower'] = df['bb_mid'] - (df['bb_std'] * std_dev)
    df['bb_pct'] = (df['close'] - df['bb_lower']) / (df['bb_upper'] - df['bb_lower'])
    df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / df['bb_mid'] * 100
    return df

def calculate_momentum(df):
    df = df.copy()
    df['roc_5'] = (df['close'] / df['close'].shift(5) - 1) * 100
    df['roc_10'] = (df['close'] / df['close'].shift(10) - 1) * 100
    ema12 = df['close'].ewm(span=12, adjust=False).mean()
    ema26 = df['close'].ewm(span=26, adjust=False).mean()
    df['macd'] = ema12 - ema26
    df['macd_signal'] = df['macd'].ewm(span=9, adjust=False).mean()
    df['macd_hist'] = df['macd'] - df['macd_signal']
    rsi = df['rsi'] if 'rsi' in df.columns else calculate_rsi(df)['rsi']
    rsi_min = rsi.rolling(14).min()
    rsi_max = rsi.rolling(14).max()
    df['stoch_rsi'] = (rsi - rsi_min) / (rsi_max - rsi_min) * 100
    return df

def calculate_time_features(df):
    df = df.copy()
    if isinstance(df.index, pd.DatetimeIndex):
        df['hour'] = df.index.hour
        df['day_of_week'] = df.index.dayofweek
        df['is_overlap'] = ((df['hour'] >= 13) & (df['hour'] <= 16)).astype(int)
    return df

def calculate_trend(df):
    df = df.copy()
    df['trend_20_50'] = np.where(df['ma20'] > df['ma50'], 1, -1)
    df['trend_50_200'] = np.where(df['ma50'] > df['ma200'], 1, -1)
    df['trend_score'] = df['trend_20_50'] + df['trend_50_200']
    return df

def calculate_volatility(df):
    df = df.copy()
    df['volatility_24h'] = df['ret_1h'].rolling(24).std()
    df['volatility_week'] = df['ret_1h'].rolling(168).std()
    df['vol_ratio'] = df['volatility_24h'] / df['volatility_week']
    return df

def add_all_indicators(df):
    df = calculate_wavetrend(df)
    df = calculate_wolfpack(df)
    df = calculate_rsi(df)
    df = calculate_atr(df)
    df = calculate_moving_averages(df)
    df = calculate_returns(df)
    df = calculate_bollinger_bands(df)
    df = calculate_momentum(df)
    df = calculate_time_features(df)
    df = calculate_trend(df)
    df = calculate_volatility(df)
    return df

print("Indicators defined!")

In [None]:
# ============================================================
# TRADING ENGINE - This is what matters
# ============================================================

def simulate_trades(df, signals, tp_pct=0.5, sl_pct=0.3, max_hold=24):
    """
    Simulate actual trades with TP/SL.
    Returns DataFrame of trade outcomes.
    """
    trades = []
    i = 0
    
    while i < len(df) - max_hold:
        if not signals[i]:
            i += 1
            continue
        
        entry_price = df.iloc[i]['close']
        entry_idx = i
        tp_price = entry_price * (1 + tp_pct/100)
        sl_price = entry_price * (1 - sl_pct/100)
        
        # Simulate trade
        exit_price = entry_price
        exit_reason = 'timeout'
        bars_held = 0
        
        for j in range(1, max_hold + 1):
            if i + j >= len(df):
                break
            
            bar = df.iloc[i + j]
            bars_held = j
            
            # Check SL first (conservative)
            if bar['low'] <= sl_price:
                exit_price = sl_price
                exit_reason = 'sl'
                break
            
            # Check TP
            if bar['high'] >= tp_price:
                exit_price = tp_price
                exit_reason = 'tp'
                break
        
        if exit_reason == 'timeout':
            exit_price = df.iloc[min(i + max_hold, len(df)-1)]['close']
        
        pnl_pct = (exit_price - entry_price) / entry_price * 100
        
        trades.append({
            'entry_idx': entry_idx,
            'entry_price': entry_price,
            'exit_price': exit_price,
            'pnl_pct': pnl_pct,
            'exit_reason': exit_reason,
            'bars_held': bars_held
        })
        
        # Skip ahead to avoid overlapping trades
        i += bars_held + 1
    
    return pd.DataFrame(trades) if trades else pd.DataFrame()


def calculate_metrics(trades_df):
    """Calculate trading metrics from trades."""
    if len(trades_df) == 0:
        return {'sharpe': -10, 'total_return': 0, 'win_rate': 0, 'trades': 0, 'max_dd': 100}
    
    returns = trades_df['pnl_pct'].values
    
    total_return = returns.sum()
    win_rate = (returns > 0).mean() * 100
    
    # Sharpe ratio (annualized assuming ~250 trading days)
    if len(returns) > 1 and returns.std() > 0:
        sharpe = returns.mean() / returns.std() * np.sqrt(252)
    else:
        sharpe = 0
    
    # Max drawdown from cumulative returns
    cumulative = np.cumsum(returns)
    rolling_max = np.maximum.accumulate(cumulative)
    drawdown = rolling_max - cumulative
    max_dd = drawdown.max() if len(drawdown) > 0 else 0
    
    # Profit factor
    wins = returns[returns > 0].sum()
    losses = abs(returns[returns < 0].sum())
    profit_factor = wins / losses if losses > 0 else 10
    
    return {
        'sharpe': sharpe,
        'total_return': total_return,
        'win_rate': win_rate,
        'trades': len(trades_df),
        'max_dd': max_dd,
        'profit_factor': profit_factor,
        'avg_return': returns.mean()
    }

print("Trading engine defined!")

In [None]:
# ============================================================
# LOAD AND PREPARE DATA
# ============================================================

df = pd.read_csv('data/XAUUSD_KAGGLE_1h.csv', parse_dates=['datetime'], index_col='datetime')
print(f"Loaded {len(df):,} bars")
print(f"Date range: {df.index.min()} to {df.index.max()}")

# Add indicators
print("\nCalculating indicators...")
df = add_all_indicators(df)

# Features for the model
FEATURES = [
    'wt1', 'wt2', 'wolfpack', 'rsi', 'stoch_rsi',
    'price_vs_ma20', 'price_vs_ma50', 'price_vs_ma200',
    'bb_pct', 'bb_width',
    'roc_5', 'roc_10', 'macd_hist',
    'atr_pct', 'volatility_24h', 'vol_ratio',
    'trend_score',
    'ret_1h', 'ret_4h', 'ret_24h',
    'hour', 'day_of_week', 'is_overlap'
]

# Clean data
df_clean = df.dropna(subset=FEATURES)
print(f"Clean data: {len(df_clean):,} bars")

In [None]:
# ============================================================
# SPLIT DATA: Train / Validation / Test
# ============================================================

# 60% train, 20% validation (for optimization), 20% test (final eval)
train_end = int(len(df_clean) * 0.6)
val_end = int(len(df_clean) * 0.8)

train_df = df_clean.iloc[:train_end].copy()
val_df = df_clean.iloc[train_end:val_end].copy()
test_df = df_clean.iloc[val_end:].copy()

print(f"Train: {train_df.index.min().date()} to {train_df.index.max().date()} ({len(train_df):,} bars)")
print(f"Val:   {val_df.index.min().date()} to {val_df.index.max().date()} ({len(val_df):,} bars)")
print(f"Test:  {test_df.index.min().date()} to {test_df.index.max().date()} ({len(test_df):,} bars)")

In [None]:
# ============================================================
# CREATE TRAINING LABELS FROM ACTUAL TRADE OUTCOMES
# ============================================================

def create_trade_labels(df, tp_pct=0.5, sl_pct=0.3, max_hold=24):
    """
    Create labels based on actual trade outcomes, not just price prediction.
    Label = 1 if entering a trade here would be profitable
    Label = 0 if it would hit stop loss or timeout with loss
    """
    labels = []
    
    for i in range(len(df) - max_hold):
        entry_price = df.iloc[i]['close']
        tp_price = entry_price * (1 + tp_pct/100)
        sl_price = entry_price * (1 - sl_pct/100)
        
        outcome = 0  # Default: not profitable
        
        for j in range(1, max_hold + 1):
            bar = df.iloc[i + j]
            
            # Hit SL first = loss
            if bar['low'] <= sl_price:
                outcome = 0
                break
            
            # Hit TP = win
            if bar['high'] >= tp_price:
                outcome = 1
                break
        
        labels.append(outcome)
    
    # Pad end with 0s
    labels.extend([0] * max_hold)
    
    return np.array(labels)

print("Creating trade-based labels...")
train_labels = create_trade_labels(train_df, tp_pct=0.5, sl_pct=0.3)
print(f"Train labels: {train_labels.mean():.1%} would be profitable trades")

In [None]:
# ============================================================
# OPTUNA OPTIMIZATION - Find best model for PROFIT
# ============================================================

print("="*60)
print("OPTIMIZING FOR MAXIMUM SHARPE RATIO")
print("="*60)
print("\nThis will take a few minutes...\n")

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(train_df[FEATURES])
X_val = scaler.transform(val_df[FEATURES])
y_train = train_labels

def objective(trial):
    """Optuna objective: maximize Sharpe ratio on validation set."""
    
    # Model hyperparameters
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 300),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 0, 0.5),
        'random_state': 42,
        'eval_metric': 'logloss'
    }
    
    # Trading parameters
    threshold = trial.suggest_float('threshold', 0.3, 0.9)
    tp_pct = trial.suggest_float('tp_pct', 0.3, 1.0)
    sl_pct = trial.suggest_float('sl_pct', 0.2, 0.5)
    
    # Train model
    model = XGBClassifier(**params)
    model.fit(X_train, y_train, verbose=False)
    
    # Get predictions on validation
    val_proba = model.predict_proba(X_val)[:, 1]
    signals = val_proba >= threshold
    
    # Simulate trades
    trades = simulate_trades(val_df, signals, tp_pct=tp_pct, sl_pct=sl_pct)
    metrics = calculate_metrics(trades)
    
    # Penalize if too few trades (not enough data)
    if metrics['trades'] < 20:
        return -10
    
    # Objective: Sharpe ratio (with bonus for high win rate)
    score = metrics['sharpe']
    
    # Bonus for profit factor > 1.5
    if metrics['profit_factor'] > 1.5:
        score += 0.5
    
    return score

# Run optimization
study = optuna.create_study(direction='maximize', sampler=TPESampler(seed=42))
study.optimize(objective, n_trials=100, show_progress_bar=True)

print(f"\nBest Sharpe: {study.best_value:.2f}")
print(f"Best params: {study.best_params}")

In [None]:
# ============================================================
# TRAIN FINAL MODEL WITH BEST PARAMS
# ============================================================

best = study.best_params

# Extract model and trading params
model_params = {
    'n_estimators': best['n_estimators'],
    'max_depth': best['max_depth'],
    'learning_rate': best['learning_rate'],
    'subsample': best['subsample'],
    'colsample_bytree': best['colsample_bytree'],
    'min_child_weight': best['min_child_weight'],
    'gamma': best['gamma'],
    'random_state': 42,
    'eval_metric': 'logloss'
}

THRESHOLD = best['threshold']
TP_PCT = best['tp_pct']
SL_PCT = best['sl_pct']

print("Training final model...")
print(f"\nTrading params:")
print(f"  Threshold: {THRESHOLD:.2f}")
print(f"  Take Profit: {TP_PCT:.2f}%")
print(f"  Stop Loss: {SL_PCT:.2f}%")

# Retrain on train + validation
full_train_df = pd.concat([train_df, val_df])
full_train_labels = create_trade_labels(full_train_df, tp_pct=TP_PCT, sl_pct=SL_PCT)

scaler_final = StandardScaler()
X_full_train = scaler_final.fit_transform(full_train_df[FEATURES])

final_model = XGBClassifier(**model_params)
final_model.fit(X_full_train, full_train_labels, verbose=False)

print("Model trained!")

In [None]:
# ============================================================
# TEST ON UNSEEN DATA
# ============================================================

print("="*60)
print("FINAL TEST - UNSEEN DATA")
print(f"Period: {test_df.index.min().date()} to {test_df.index.max().date()}")
print("="*60)

X_test = scaler_final.transform(test_df[FEATURES])
test_proba = final_model.predict_proba(X_test)[:, 1]
test_signals = test_proba >= THRESHOLD

# Simulate trades
test_trades = simulate_trades(test_df, test_signals, tp_pct=TP_PCT, sl_pct=SL_PCT)
test_metrics = calculate_metrics(test_trades)

print(f"\n{'Metric':<20} {'Value':>15}")
print("-" * 35)
print(f"{'Trades':<20} {test_metrics['trades']:>15,}")
print(f"{'Win Rate':<20} {test_metrics['win_rate']:>14.1f}%")
print(f"{'Total Return':<20} {test_metrics['total_return']:>14.1f}%")
print(f"{'Avg Return/Trade':<20} {test_metrics['avg_return']:>14.2f}%")
print(f"{'Sharpe Ratio':<20} {test_metrics['sharpe']:>15.2f}")
print(f"{'Profit Factor':<20} {test_metrics['profit_factor']:>15.2f}")
print(f"{'Max Drawdown':<20} {test_metrics['max_dd']:>14.1f}%")

In [None]:
# ============================================================
# EQUITY CURVE
# ============================================================

if len(test_trades) > 0:
    cumulative_returns = test_trades['pnl_pct'].cumsum()
    
    plt.figure(figsize=(12, 6))
    plt.plot(cumulative_returns.values, linewidth=2)
    plt.axhline(y=0, color='gray', linestyle='--', alpha=0.5)
    plt.xlabel('Trade #')
    plt.ylabel('Cumulative Return (%)')
    plt.title(f'Equity Curve - Test Period\nTotal: {test_metrics["total_return"]:.1f}%, Sharpe: {test_metrics["sharpe"]:.2f}')
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
else:
    print("No trades in test period.")

In [None]:
# ============================================================
# PERFORMANCE BY YEAR
# ============================================================

print("\n" + "="*60)
print("PERFORMANCE BY YEAR")
print("="*60)

test_df_copy = test_df.copy()
test_df_copy['pred'] = test_proba
test_df_copy['year'] = test_df_copy.index.year

for year in sorted(test_df_copy['year'].unique()):
    year_df = test_df_copy[test_df_copy['year'] == year].copy()
    year_signals = year_df['pred'].values >= THRESHOLD
    
    year_trades = simulate_trades(year_df, year_signals, tp_pct=TP_PCT, sl_pct=SL_PCT)
    year_metrics = calculate_metrics(year_trades)
    
    print(f"{year}: {year_metrics['trades']:4d} trades, {year_metrics['win_rate']:5.1f}% win, "
          f"{year_metrics['total_return']:8.1f}% return, Sharpe {year_metrics['sharpe']:5.2f}")

In [None]:
# ============================================================
# FEATURE IMPORTANCE
# ============================================================

importance_df = pd.DataFrame({
    'feature': FEATURES,
    'importance': final_model.feature_importances_
}).sort_values('importance', ascending=True)

plt.figure(figsize=(10, 8))
plt.barh(importance_df['feature'], importance_df['importance'])
plt.xlabel('Importance')
plt.title('What the Model Learned Matters Most for Profit')
plt.tight_layout()
plt.show()

print("\nTop 5 Features for Profit:")
print(importance_df.tail(5).to_string(index=False))

In [None]:
# ============================================================
# SAVE MODEL
# ============================================================

import os
os.makedirs('models/profit_v1', exist_ok=True)

# Save model and scaler
joblib.dump(final_model, 'models/profit_v1/model.pkl')
joblib.dump(scaler_final, 'models/profit_v1/scaler.pkl')

# Save config
config = {
    'version': 'profit_v1',
    'goal': 'Maximum profit with minimum drawdown',
    'training_period': f"{full_train_df.index.min().date()} to {full_train_df.index.max().date()}",
    'test_period': f"{test_df.index.min().date()} to {test_df.index.max().date()}",
    'features': FEATURES,
    'trading_params': {
        'threshold': float(THRESHOLD),
        'take_profit_pct': float(TP_PCT),
        'stop_loss_pct': float(SL_PCT),
        'max_hold_hours': 24
    },
    'model_params': model_params,
    'test_performance': {
        'trades': int(test_metrics['trades']),
        'win_rate': float(test_metrics['win_rate']),
        'total_return': float(test_metrics['total_return']),
        'sharpe': float(test_metrics['sharpe']),
        'profit_factor': float(test_metrics['profit_factor']),
        'max_drawdown': float(test_metrics['max_dd'])
    },
    'optuna_trials': 100
}

with open('models/profit_v1/config.json', 'w') as f:
    json.dump(config, f, indent=2)

print("Model saved to models/profit_v1/")
print("\nFiles:")
print("  - model.pkl")
print("  - scaler.pkl")
print("  - config.json")

In [None]:
# ============================================================
# DOWNLOAD MODELS
# ============================================================

# Zip and download
!cd models && zip -r profit_v1.zip profit_v1/

from google.colab import files
files.download('models/profit_v1.zip')

print("\nDownload started!")

In [None]:
# ============================================================
# SUMMARY
# ============================================================

print("\n" + "="*60)
print("PROFIT OPTIMIZER v1.0 - COMPLETE")
print("="*60)
print(f"""
This model was optimized for ONE thing: PROFIT.

Training approach:
- Learned from actual trade outcomes (not just price prediction)
- Optuna optimized model + trading params for maximum Sharpe
- 100 trials to find best configuration

Optimized parameters:
- Threshold: {THRESHOLD:.2f}
- Take Profit: {TP_PCT:.2f}%
- Stop Loss: {SL_PCT:.2f}%

Test results ({test_df.index.min().date()} to {test_df.index.max().date()}):
- Trades: {test_metrics['trades']:,}
- Win Rate: {test_metrics['win_rate']:.1f}%
- Total Return: {test_metrics['total_return']:.1f}%
- Sharpe Ratio: {test_metrics['sharpe']:.2f}
- Profit Factor: {test_metrics['profit_factor']:.2f}
- Max Drawdown: {test_metrics['max_dd']:.1f}%

The model is saved and ready to download.
""")