In [None]:
import yfinance as yf
import numpy as np
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import talib as ta
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

In [None]:
# Load XAU historical data from Yahoo Finance
XAU= yf.Ticker("^XAU")
data = XAU.history(start="2019-01-01", end="2024-01-01", interval="1d")
data['Return'] = data['Close'].pct_change()

In [None]:
# Add more technical indicators
close_price = data['Close'].values
high_price = data['High'].values
low_price = data['Low'].values

data['SMA_10'] = ta.SMA(close_price, timeperiod=10)
data['SMA_50'] = ta.SMA(close_price, timeperiod=50)
data['RSI'] = ta.RSI(close_price, timeperiod=14)
data['MACD'], data['MACD_Signal'], _ = ta.MACD(close_price, fastperiod=12, slowperiod=26, signalperiod=9)
data['BB_upper'], data['BB_middle'], data['BB_lower'] = ta.BBANDS(close_price, timeperiod=20)
data['Momentum'] = ta.MOM(close_price, timeperiod=10)
data['Volatility'] = ta.ATR(high_price, low_price, close_price, timeperiod=14)  # Added Volatility
data['Stochastic'] = ta.STOCH(high_price, low_price, close_price, fastk_period=14, slowk_period=3)[0]  # Added Stochastic
data['Previous_Candle_Size'] = abs(data['High'] - data['Low']).shift(1)

# Create lag features for capturing trends
data['Return_Lag1'] = data['Return'].shift(1)
data['RSI_Lag1'] = data['RSI'].shift(1)
data['SMA_10_Lag1'] = data['SMA_10'].shift(1)
data['SMA_50_Lag1'] = data['SMA_50'].shift(1)

# Drop NaN values after adding new features
data.dropna(inplace=True)

In [None]:
# Define features and target
features = [
    'Return', 'SMA_10', 'SMA_50', 'RSI', 'MACD', 'MACD_Signal',
    'BB_upper', 'BB_middle', 'BB_lower', 'Momentum', 'Return_Lag1', 'RSI_Lag1',
    'SMA_10_Lag1', 'SMA_50_Lag1', 'Volatility', 'Stochastic', 'Previous_Candle_Size'
]
data['Target'] = np.where(data['Return'].shift(-1) > 0, 1, 0)  # 1 for up, 0 for down

X = data[features]
y = data['Target']

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'subsample': [0.8, 0.9, 1.0],
    'colsample_bytree': [0.7, 0.8, 1.0]
}
grid_search = GridSearchCV(XGBClassifier(random_state=42), param_grid, cv=3, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

In [None]:
# Print the best hyperparameters from GridSearchCV
# print("Best Hyperparameters:", grid_search.best_params_)

# Train the best model found by GridSearchCV
# best_xg_model = grid_search.best_estimator_
best_xg_model = model

# Evaluate xhe model using cross-validation
cv_scores = cross_val_score(best_xg_model, X_train, y_train, cv=5)
print(f"Cross-Validation Accuracy: {cv_scores.mean():.2f} (+/- {cv_scores.std():.2f})")

# Make predictions and evaluate on the test set
y_pred = best_xg_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.2f}")

In [None]:
# Backtesting with updated features and better portfolio simulation
capital = 10000  # Initial capital
capital_history = []

# Map test indices to the original DataFrame
test_indices = y_test.index.to_list()


"""
0.1% FEE

Print CSV for checking

NAV 10 usd


For Hedge Fund

CAGR (Return average per year)

Max Drawdown

Standard Deviation (Both up and down)

"""

# Trade logic with stop-loss and take-profit to reduce risk
for i in range(len(test_indices) - 1):
    current_idx = test_indices[i]
    next_idx = test_indices[i + 1]

    prediction = best_xg_model.predict([X_test[i]])[0]
    price_today = data['Close'].loc[current_idx]
    price_next_day = data['Close'].loc[next_idx]

    # Set stop-loss and take-profit levels
    stop_loss_pct = 0.02  # 1% stop loss
    take_profit_pct = 0.05  # 5% take profit

    # Long position logic (buy if predicted up)
    if prediction == 1:
        profit = (price_next_day - price_today) / price_today * capital
        if profit < -stop_loss_pct * capital:
            profit = -stop_loss_pct * capital  # Stop loss triggered
        elif profit > take_profit_pct * capital:
            profit = take_profit_pct * capital  # Take profit triggered
    else:  # Short position logic (sell if predicted down)
        profit = (price_today - price_next_day) / price_today * capital
        if profit < -stop_loss_pct * capital:
            profit = -stop_loss_pct * capital  # Stop loss triggered
        elif profit > take_profit_pct * capital:
            profit = take_profit_pct * capital  # Take profit triggered

    capital += profit
    capital_history.append(capital)


# Plot backtest results
plt.figure(figsize=(12, 6))
plt.plot(capital_history, label='Portfolio Value')
plt.title('Backtest of Enhanced XAUUSD Long/Short Strategy')
plt.xlabel('Trade Days')
plt.ylabel('Portfolio Value (USD)')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
from tabulate import tabulate

def calculate_metrics(returns, risk_free_rate=0.01):
    """
    Calculate Sharpe Ratio, Sortino Ratio, and Maximum Drawdown.

    Parameters:
        returns (pd.Series): Series of daily returns.
        risk_free_rate (float): Risk-free rate for Sharpe Ratio. Defaults to 1% (0.01).

    Returns:
        dict: Dictionary containing Sharpe Ratio, Sortino Ratio, and Maximum Drawdown.
    """
    # Annualized risk-free rate (assuming 252 trading days)
    risk_free_daily = (1 + risk_free_rate) ** (1 / 252) - 1

    # Excess returns
    excess_returns = returns - risk_free_daily

    # Sharpe Ratio
    sharpe_ratio = excess_returns.mean() / excess_returns.std() * np.sqrt(252)

    # Sortino Ratio (downside risk)
    downside_returns = returns[returns < 0]
    sortino_ratio = excess_returns.mean() / (downside_returns.std() * np.sqrt(252))

    # Maximum Drawdown
    cumulative_returns = (1 + returns).cumprod()
    drawdowns = cumulative_returns / cumulative_returns.cummax() - 1
    max_drawdown = drawdowns.min()

    results = [
        ["Sharpe Ratio", f"{sharpe_ratio:.2f}"],
        ["Sortino Ratio", f"{sortino_ratio:.2f}"],
        ["Maximum Drawdown", f"{max_drawdown:.2%}"],  # Format as percentage
    ]

    table = tabulate(results, headers=["Metric", "Value"], tablefmt="grid")

    return table

In [None]:
results = pd.DataFrame()
capital_history_tmp = capital_history.copy()
initial_capital = 10000
capital_history_tmp.insert(0, initial_capital)

results['Date'] = y_test.index
results['Return'] = capital_history_tmp
results['Percentage return'] = results['Return'].pct_change()
results['Percentage return'] = results['Percentage return'].shift(-1)

daily_returns = results['Percentage return']

# Calculate metrics
metrics = calculate_metrics(daily_returns)

print(metrics)