# Gold Trading ML Model

Two approaches:
1. **Guided Model** - Trained on actual trades (mean-reversion style)
2. **Pure Model** - Learns from scratch (price prediction)

Run all cells to train and compare both models.

In [None]:
# Setup - NO cloning needed, all functions are inlined
import os

IN_COLAB = 'google.colab' in str(get_ipython()) if hasattr(__builtins__, '__IPYTHON__') else False

if IN_COLAB:
    # Download data files directly from GitHub
    !wget -q https://raw.githubusercontent.com/altommo/gold-ml-trading/main/data/XAUUSD_1h.csv -O XAUUSD_1h.csv
    !wget -q https://raw.githubusercontent.com/altommo/gold-ml-trading/main/data/trades_with_features.csv -O trades_with_features.csv
    !pip install xgboost -q
    print("Setup complete!")
else:
    print("Running locally")

In [None]:
# Imports and Simple Indicator Functions (v0.1 - inlined to avoid loading new indicators.py)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
from xgboost import XGBClassifier
import joblib
import warnings
warnings.filterwarnings('ignore')

# ===== SIMPLE INDICATOR FUNCTIONS (v0.1) =====
def calculate_wavetrend(df, n1=10, n2=21):
    df = df.copy()
    ap = (df['high'] + df['low'] + df['close']) / 3
    esa = ap.ewm(span=n1, adjust=False).mean()
    d = (ap - esa).abs().ewm(span=n1, adjust=False).mean()
    ci = (ap - esa) / (0.015 * d)
    df['wt1'] = ci.ewm(span=n2, adjust=False).mean()
    df['wt2'] = df['wt1'].rolling(4).mean()
    return df

def calculate_wolfpack(df):
    df = df.copy()
    df['wolfpack'] = df['close'].ewm(span=3, adjust=False).mean() - df['close'].ewm(span=8, adjust=False).mean()
    return df

def calculate_rsi(df, period=14):
    df = df.copy()
    delta = df['close'].diff()
    gain = delta.clip(lower=0).rolling(period).mean()
    loss = (-delta.clip(upper=0)).rolling(period).mean()
    df['rsi'] = 100 - (100 / (1 + gain / loss))
    return df

def calculate_atr(df, period=14):
    df = df.copy()
    df['atr'] = (df['high'] - df['low']).rolling(period).mean()
    df['atr_pct'] = df['atr'] / df['close'] * 100
    return df

def calculate_moving_averages(df):
    df = df.copy()
    df['ma20'] = df['close'].rolling(20).mean()
    df['ma50'] = df['close'].rolling(50).mean()
    df['ma200'] = df['close'].rolling(200).mean()
    df['price_vs_ma20'] = (df['close'] - df['ma20']) / df['ma20'] * 100
    df['price_vs_ma50'] = (df['close'] - df['ma50']) / df['ma50'] * 100
    return df

def calculate_returns(df):
    df = df.copy()
    df['ret_1h'] = df['close'].pct_change() * 100
    df['ret_4h'] = df['close'].pct_change(4) * 100
    df['ret_24h'] = df['close'].pct_change(24) * 100
    return df

def add_all_indicators(df):
    df = calculate_wavetrend(df)
    df = calculate_wolfpack(df)
    df = calculate_rsi(df)
    df = calculate_atr(df)
    df = calculate_moving_averages(df)
    df = calculate_returns(df)
    df['volatility'] = df['ret_1h'].rolling(24).std()
    df['trend'] = np.where(df['ma20'] > df['ma50'], 1, -1)
    return df

# ===== BACKTEST FUNCTION =====
def backtest_model(df, model, scaler, features, threshold=0.5, hold_hours=24):
    df = df.copy()
    df = df.dropna(subset=features)
    X = scaler.transform(df[features])
    df['prob'] = model.predict_proba(X)[:, 1]
    df['signal'] = (df['prob'] > threshold).astype(int)
    df['future_ret'] = (df['close'].shift(-hold_hours) / df['close'] - 1) * 100
    trades = df[df['signal'] == 1].copy()
    if len(trades) == 0:
        return {'total_trades': 0, 'avg_return': 0, 'total_return': 0, 'win_rate': 0, 'sharpe': 0}, pd.DataFrame()
    results = {
        'total_trades': len(trades),
        'avg_return': trades['future_ret'].mean(),
        'total_return': trades['future_ret'].sum(),
        'win_rate': (trades['future_ret'] > 0).mean() * 100,
        'sharpe': trades['future_ret'].mean() / trades['future_ret'].std() * np.sqrt(252) if trades['future_ret'].std() > 0 else 0
    }
    return results, trades

print("Libraries and functions loaded (v0.1 simple indicators)")

## 1. Load Data

In [None]:
# Load chart data
data_path = 'XAUUSD_1h.csv' if IN_COLAB else 'data/XAUUSD_1h.csv'
chart_df = pd.read_csv(data_path, index_col=0, parse_dates=True)
print(f"Chart data: {len(chart_df)} bars")
print(f"Date range: {chart_df.index.min()} to {chart_df.index.max()}")

# Add indicators
chart_df = add_all_indicators(chart_df)
print("Indicators calculated")

In [None]:
# Load trades
trades_path = 'trades_with_features.csv' if IN_COLAB else 'data/trades_with_features.csv'
trades_df = pd.read_csv(trades_path, parse_dates=['entry_time', 'exit_time', 'chart_time'])
print(f"Trades: {len(trades_df)}")
print(f"Winners: {len(trades_df[trades_df['won']])}, Losers: {len(trades_df[~trades_df['won']])}")
print(f"Win Rate: {trades_df['won'].mean()*100:.1f}%")

In [None]:
# Define features
FEATURES = ['wt1', 'wt2', 'wolfpack', 'rsi', 'atr_pct', 
            'price_vs_ma20', 'price_vs_ma50', 'ret_1h', 'ret_4h', 'ret_24h']

# Separate buys and sells
buys = trades_df[trades_df['direction'] == 'Buy']
sells = trades_df[trades_df['direction'] == 'Sell']

print(f"Buys: {len(buys)}, Sells: {len(sells)}")

## 2. Your Trading Pattern Analysis

In [None]:
# Your entry patterns
print("=== YOUR BUY ENTRIES ===")
print(buys[FEATURES].mean())

print("\n=== YOUR SELL ENTRIES ===")
print(sells[FEATURES].mean())

In [None]:
# Visualize your entry conditions
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# WT distribution
axes[0, 0].hist(buys['wt1'].dropna(), bins=30, alpha=0.7, label='Your Buys', color='green')
axes[0, 0].hist(sells['wt1'].dropna(), bins=30, alpha=0.7, label='Your Sells', color='red')
axes[0, 0].axvline(x=0, color='black', linestyle='--', alpha=0.5)
axes[0, 0].set_title('WaveTrend at Entry')
axes[0, 0].legend()

# RSI
axes[0, 1].hist(buys['rsi'].dropna(), bins=30, alpha=0.7, label='Your Buys', color='green')
axes[0, 1].hist(sells['rsi'].dropna(), bins=30, alpha=0.7, label='Your Sells', color='red')
axes[0, 1].axvline(x=50, color='black', linestyle='--', alpha=0.5)
axes[0, 1].set_title('RSI at Entry')
axes[0, 1].legend()

# Wolfpack
axes[1, 0].hist(buys['wolfpack'].dropna(), bins=30, alpha=0.7, label='Your Buys', color='green')
axes[1, 0].hist(sells['wolfpack'].dropna(), bins=30, alpha=0.7, label='Your Sells', color='red')
axes[1, 0].axvline(x=0, color='black', linestyle='--', alpha=0.5)
axes[1, 0].set_title('Wolfpack at Entry')
axes[1, 0].legend()

# Price vs MA20
axes[1, 1].hist(buys['price_vs_ma20'].dropna(), bins=30, alpha=0.7, label='Your Buys', color='green')
axes[1, 1].hist(sells['price_vs_ma20'].dropna(), bins=30, alpha=0.7, label='Your Sells', color='red')
axes[1, 1].axvline(x=0, color='black', linestyle='--', alpha=0.5)
axes[1, 1].set_title('Price vs MA20 % at Entry')
axes[1, 1].legend()

plt.tight_layout()
plt.show()

print("\nPattern: You BUY when indicators are LOW (dips), SELL when HIGH (rips)")
print("This is MEAN REVERSION trading.")

## 3. Create Training Labels

In [None]:
# Label chart data based on your trades
def create_guided_labels(df, buy_trades, sell_trades):
    """Mark bars where you traded"""
    df = df.copy()
    df['label_buy'] = 0
    df['label_sell'] = 0
    
    for _, trade in buy_trades.iterrows():
        if pd.isna(trade['chart_time']):
            continue
        mask = (df.index >= trade['chart_time'] - pd.Timedelta(hours=1)) & \
               (df.index <= trade['chart_time'] + pd.Timedelta(hours=1))
        df.loc[mask, 'label_buy'] = 1
    
    for _, trade in sell_trades.iterrows():
        if pd.isna(trade['chart_time']):
            continue
        mask = (df.index >= trade['chart_time'] - pd.Timedelta(hours=1)) & \
               (df.index <= trade['chart_time'] + pd.Timedelta(hours=1))
        df.loc[mask, 'label_sell'] = 1
    
    return df

# Label for pure model (predict future price)
LOOKAHEAD = 24  # hours
TARGET_PCT = 0.5  # % gain

chart_df['future_ret'] = (chart_df['close'].shift(-LOOKAHEAD) / chart_df['close'] - 1) * 100
chart_df['label_pure_buy'] = (chart_df['future_ret'] > TARGET_PCT).astype(int)
chart_df['label_pure_sell'] = (chart_df['future_ret'] < -TARGET_PCT).astype(int)

# Add guided labels
chart_df = create_guided_labels(chart_df, buys, sells)

print(f"Guided buy labels: {chart_df['label_buy'].sum()}")
print(f"Guided sell labels: {chart_df['label_sell'].sum()}")
print(f"Pure buy labels: {chart_df['label_pure_buy'].sum()} ({chart_df['label_pure_buy'].mean()*100:.1f}%)")
print(f"Pure sell labels: {chart_df['label_pure_sell'].sum()} ({chart_df['label_pure_sell'].mean()*100:.1f}%)")

## 4. Train Guided Model (Your Style)

In [None]:
# Prepare training data
df_train = chart_df.dropna(subset=FEATURES + ['label_buy'])
X = df_train[FEATURES]
y = df_train['label_buy']

print(f"Training samples: {len(X)}")
print(f"Positive samples: {y.sum()} ({y.mean()*100:.2f}%)")

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale
scaler_guided = StandardScaler()
X_train_scaled = scaler_guided.fit_transform(X_train)
X_test_scaled = scaler_guided.transform(X_test)

# Class weights for imbalance
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
weight_dict = dict(zip(np.unique(y_train), class_weights))
print(f"Class weights: {weight_dict}")

In [None]:
# Train Guided Buy Model
guided_model = XGBClassifier(
    n_estimators=100,
    max_depth=5,
    learning_rate=0.1,
    scale_pos_weight=weight_dict[1]/weight_dict[0] if 0 in weight_dict and 1 in weight_dict else 1,
    random_state=42,
    eval_metric='logloss'
)

guided_model.fit(X_train_scaled, y_train)

# Evaluate
y_pred = guided_model.predict(X_test_scaled)
y_prob = guided_model.predict_proba(X_test_scaled)[:, 1]

print("=== GUIDED MODEL RESULTS ===")
print(classification_report(y_test, y_pred))

In [None]:
# Feature importance - Guided
importance_guided = pd.DataFrame({
    'feature': FEATURES,
    'importance': guided_model.feature_importances_
}).sort_values('importance', ascending=True)

plt.figure(figsize=(10, 6))
plt.barh(importance_guided['feature'], importance_guided['importance'])
plt.title('Guided Model - What Features Matter for YOUR Entries')
plt.xlabel('Importance')
plt.tight_layout()
plt.show()

## 5. Train Pure Model (From Scratch)

In [None]:
# Pure model training data
df_pure = chart_df.dropna(subset=FEATURES + ['label_pure_buy'])
X_pure = df_pure[FEATURES]
y_pure = df_pure['label_pure_buy']

print(f"Training samples: {len(X_pure)}")
print(f"Positive samples: {y_pure.sum()} ({y_pure.mean()*100:.1f}%)")

# Split
X_train_p, X_test_p, y_train_p, y_test_p = train_test_split(X_pure, y_pure, test_size=0.2, random_state=42)

# Scale
scaler_pure = StandardScaler()
X_train_p_scaled = scaler_pure.fit_transform(X_train_p)
X_test_p_scaled = scaler_pure.transform(X_test_p)

In [None]:
# Train Pure Model
pure_model = XGBClassifier(
    n_estimators=100,
    max_depth=5,
    learning_rate=0.1,
    random_state=42,
    eval_metric='logloss'
)

pure_model.fit(X_train_p_scaled, y_train_p)

y_pred_p = pure_model.predict(X_test_p_scaled)

print("=== PURE MODEL RESULTS ===")
print(classification_report(y_test_p, y_pred_p))

In [None]:
# Feature importance - Pure
importance_pure = pd.DataFrame({
    'feature': FEATURES,
    'importance': pure_model.feature_importances_
}).sort_values('importance', ascending=True)

plt.figure(figsize=(10, 6))
plt.barh(importance_pure['feature'], importance_pure['importance'])
plt.title('Pure Model - What Features Predict Price Going Up')
plt.xlabel('Importance')
plt.tight_layout()
plt.show()

## 6. Compare Feature Importance

In [None]:
# Side by side comparison
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

axes[0].barh(importance_guided['feature'], importance_guided['importance'], color='blue')
axes[0].set_title('GUIDED: Your Trading Style')
axes[0].set_xlabel('Importance')

axes[1].barh(importance_pure['feature'], importance_pure['importance'], color='orange')
axes[1].set_title('PURE: Price Prediction')
axes[1].set_xlabel('Importance')

plt.tight_layout()
plt.show()

print("\nDifference shows what YOU focus on vs what statistically predicts price movement")

## 7. Backtest Both Models

In [None]:
# Backtest on last 500 bars (out of sample)
test_data = chart_df.tail(500).copy()
print(f"Backtest period: {test_data.index.min()} to {test_data.index.max()}")

# Guided model backtest
guided_results, guided_trades = backtest_model(
    test_data, guided_model, scaler_guided, FEATURES, threshold=0.3, hold_hours=24
)

# Pure model backtest
pure_results, pure_trades = backtest_model(
    test_data, pure_model, scaler_pure, FEATURES, threshold=0.5, hold_hours=24
)

print("\n=== BACKTEST COMPARISON ===")
comparison = pd.DataFrame({
    'Metric': ['Trades', 'Win Rate %', 'Avg Return %', 'Total Return %', 'Sharpe'],
    'Guided (Your Style)': [
        guided_results['total_trades'],
        f"{guided_results['win_rate']:.1f}",
        f"{guided_results['avg_return']:.2f}",
        f"{guided_results['total_return']:.2f}",
        f"{guided_results['sharpe']:.2f}"
    ],
    'Pure (From Scratch)': [
        pure_results['total_trades'],
        f"{pure_results['win_rate']:.1f}",
        f"{pure_results['avg_return']:.2f}",
        f"{pure_results['total_return']:.2f}",
        f"{pure_results['sharpe']:.2f}"
    ]
})
print(comparison.to_string(index=False))

## 8. Current Signal

In [None]:
# Get current signal from latest bar
latest = chart_df.dropna(subset=FEATURES).tail(1)

if len(latest) > 0:
    X_latest_g = scaler_guided.transform(latest[FEATURES])
    X_latest_p = scaler_pure.transform(latest[FEATURES])
    
    guided_prob = guided_model.predict_proba(X_latest_g)[0, 1]
    pure_prob = pure_model.predict_proba(X_latest_p)[0, 1]
    
    print("=== CURRENT SIGNAL ===")
    print(f"Time: {latest.index[0]}")
    print(f"Price: ${latest['close'].values[0]:.2f}")
    print(f"\nIndicators:")
    print(f"  WaveTrend: {latest['wt1'].values[0]:.1f}")
    print(f"  Wolfpack: {latest['wolfpack'].values[0]:.2f}")
    print(f"  RSI: {latest['rsi'].values[0]:.1f}")
    print(f"  Price vs MA20: {latest['price_vs_ma20'].values[0]:.2f}%")
    print(f"\nBUY Probability:")
    print(f"  Guided (Your Style): {guided_prob*100:.1f}%")
    print(f"  Pure (ML Optimal): {pure_prob*100:.1f}%")
    
    # Signal interpretation
    print(f"\nInterpretation:")
    if guided_prob > 0.3:
        print("  Guided: This looks like YOUR kind of buy setup")
    else:
        print("  Guided: Not matching your typical entry pattern")
    
    if pure_prob > 0.5:
        print("  Pure: ML predicts price likely to go up")
    else:
        print("  Pure: ML doesn't see strong upside")

## 9. Save Models

In [None]:
# Save models
import os
os.makedirs('models', exist_ok=True)

joblib.dump(guided_model, 'models/guided_model.pkl')
joblib.dump(pure_model, 'models/pure_model.pkl')
joblib.dump(scaler_guided, 'models/scaler_guided.pkl')
joblib.dump(scaler_pure, 'models/scaler_pure.pkl')

print("Models saved to models/ folder")

# Download as zip if on Colab
if IN_COLAB:
    import shutil
    shutil.make_archive('models_v0.1', 'zip', 'models')
    from google.colab import files
    files.download('models_v0.1.zip')
    print("Downloaded models_v0.1.zip")