<a href="https://colab.research.google.com/github/anushka1511/Gold-Price-Forecasting/blob/main/goldrate_predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
# SETUP AND LIBRARIES
import pandas as pd
import numpy as np
import yfinance as yf
from tqdm import tqdm
import warnings

# Forecasting Models
from statsmodels.tsa.api import ExponentialSmoothing
from sklearn.ensemble import GradientBoostingRegressor

# Performance Metrics
from sklearn.metrics import r2_score

warnings.filterwarnings("ignore")

# Function for SMAPE
def smape(y_true, y_pred):
    numerator = np.abs(y_pred - y_true)
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
    return np.mean(numerator / (denominator + 1e-8)) * 100

# DATA ACQUISITION
print("Fetching historical gold price data (GOLD vs USD)...")

ticker = 'GC=F'
ticker_name = 'GOLD_USD'

temp_data = yf.download(ticker, period="10y", interval="1d", progress=True)

price_col = 'Adj Close' if 'Adj Close' in temp_data.columns else 'Close'
df_gold = temp_data[[price_col]].rename(columns={price_col: ticker_name})

df_gold.ffill(inplace=True)
df_gold.dropna(inplace=True)

print(f"\nData for {ticker_name} successfully prepared.")
print(df_gold.head())


# FEATURE ENGINEERING FOR THE ENSEMBLER ALGORITHM
def create_features(df, target_col):
    """Creates time-series features from a dataframe."""
    print("\nEngineering features (lags, rolling averages) for the Ensembler model...")
    df_copy = df.copy()
    df_copy['target'] = df_copy[target_col]
    # Lag features (price from previous days)
    for i in [1, 2, 3, 5, 10, 21]:
        df_copy[f'lag_{i}'] = df_copy['target'].shift(i)
    # Rolling window features (trends and volatility)
    for window in [5, 10, 21]:
        df_copy[f'rolling_mean_{window}'] = df_copy['target'].shift(1).rolling(window).mean()
        df_copy[f'rolling_std_{window}'] = df_copy['target'].shift(1).rolling(window).std()
    df_copy.dropna(inplace=True)
    return df_copy

ml_df = create_features(df_gold, ticker_name)


# MODEL TRAINING, BACKTESTING, AND EVALUATION
print("\n--- Starting Backtesting with a Thorough Methodology ---")

results = []

# Time Series Split (80% train, 20% test) for backtesting
train_size = int(len(df_gold) * 0.8)
test_size_ml = int(len(ml_df) * 0.8)

# Holt-Winters Algorithm
print("Training and evaluating Holt-Winters Algorithm...")
train_hw, test_hw = df_gold.iloc[:train_size], df_gold.iloc[train_size:]
hw_model = ExponentialSmoothing(
    train_hw[ticker_name], trend='add', seasonal='add', seasonal_periods=12
).fit()
hw_predictions = hw_model.forecast(len(test_hw))
hw_r2 = r2_score(test_hw[ticker_name], hw_predictions)
hw_smape = smape(test_hw[ticker_name], hw_predictions)
results.append({'Model': 'Holt-Winters', 'R2': hw_r2, 'SMAPE': hw_smape})


# Ensembler Algorithm (Gradient Boosting)
print("Training and evaluating Ensembler Algorithm...")
y = ml_df['target']
X = ml_df.drop(columns=['target', ticker_name])
X_train, X_test = X.iloc[:test_size_ml], X.iloc[test_size_ml:]
y_train, y_test = y.iloc[:test_size_ml], y.iloc[test_size_ml:]

ensemble_model = GradientBoostingRegressor(n_estimators=200, learning_rate=0.05, subsample=0.8, random_state=42)
ensemble_model.fit(X_train, y_train)
ensemble_predictions = ensemble_model.predict(X_test)
ensemble_r2 = r2_score(y_test, ensemble_predictions)
ensemble_smape = smape(y_test, ensemble_predictions)
results.append({'Model': 'Ensemble (Gradient Boosting)', 'R2': ensemble_r2, 'SMAPE': ensemble_smape})


# RESULTS
print("\n--- Final Performance Comparison (R2 and SMAPE) ---")
results_df = pd.DataFrame(results).set_index('Model')
print(results_df.round(3))

print("\n--- Conclusion from Backtesting ---")
winner = results_df['SMAPE'].idxmin()
print(f"Based on the SMAPE score, the '{winner}' has been observed to outperform the other method.")
print("This demonstrates the efficacy of advanced machine learning in competently analyzing gold market instability.")

print("\n--- Valuable Interpretability: Recognizing Gold Market Patterns ---")
feature_importance_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': ensemble_model.feature_importances_
}).sort_values(by='Importance', ascending=False)

print("Top 10 most important features for predicting gold price:")
print(feature_importance_df.head(10))

Fetching historical gold price data (GOLD vs USD)...


[*********************100%***********************]  1 of 1 completed



Data for GOLD_USD successfully prepared.
Price          GOLD_USD
Ticker             GC=F
Date                   
2015-06-29  1178.500000
2015-06-30  1171.500000
2015-07-01  1169.000000
2015-07-02  1163.000000
2015-07-06  1172.900024

Engineering features (lags, rolling averages) for the Ensembler model...

--- Starting Backtesting with a Thorough Methodology ---
Training and evaluating Holt-Winters Algorithm...
Training and evaluating Ensembler Algorithm...

--- Final Performance Comparison (R2 and SMAPE) ---
                                 R2   SMAPE
Model                                      
Holt-Winters                 -5.312     NaN
Ensemble (Gradient Boosting) -1.106  19.821

--- Conclusion from Backtesting ---
Based on the SMAPE score, the 'Ensemble (Gradient Boosting)' has been observed to outperform the other method.
This demonstrates the efficacy of advanced machine learning in competently analyzing gold market instability.

--- Valuable Interpretability: Recognizing Gold M