<a href="https://colab.research.google.com/github/YoussefKh200/XAUUSD-AI-Model/blob/main/price_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **MY FIRST ML PROJECT**

##**Load Data**

In [19]:
!pip install numpy pandas matplotlib torch scikit-learn




In [20]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn

In [21]:
!pip install transformers



In [22]:
!pip install datasets
!pip install tokenizers




In [23]:
from datasets import load_dataset

ds = load_dataset("Caibaocb/xauusd-gold-price-historical-data-2004-2025")

# Prepare the Data

In [24]:
df = ds["train"].to_pandas()
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2004.06.11 07:15,384.0,384.3,383.8,384.3,12
1,2004.06.11 07:30,383.8,384.3,383.6,383.8,12
2,2004.06.11 07:45,383.3,383.8,383.3,383.8,20
3,2004.06.11 08:00,383.8,384.1,383.6,383.6,8
4,2004.06.11 08:15,383.6,384.3,383.5,383.5,20


In [25]:
print(df.columns)

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Volume'], dtype='object')


In [26]:
# Sort by date (critical for time series)
df["Date"] = pd.to_datetime(df["Date"])
df = df.sort_values("Date")

# Feature engineering
df["return"] = df["Close"].pct_change()
df["ma_10"] = df["Close"].rolling(10).mean()
df["ma_50"] = df["Close"].rolling(50).mean()

df.dropna(inplace=True)

X = df[["return", "ma_10", "ma_50"]].values

# Label: next candle direction
y = (df["Close"].shift(-1) > df["Close"]).astype(int).values

X = X[:-1]
y = y[:-1]


In [27]:
print(X.shape, y.shape)
print(np.unique(y, return_counts=True))

(8886951, 3) (8886951,)
(array([0, 1]), array([4717443, 4169508]))


# Train / Test split + scaling

In [28]:
from sklearn.preprocessing import StandardScaler

split_idx = int(len(X) * 0.8)

X_train = X[:split_idx]
X_test  = X[split_idx:]

y_train = y[:split_idx]
y_test  = y[split_idx:]

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

print("Train shape:", X_train.shape, y_train.shape)
print("Test shape :", X_test.shape, y_test.shape)


Train shape: (7109560, 3) (7109560,)
Test shape : (1777391, 3) (1777391,)


# Neural Network (unchanged, but verified)

In [29]:
import torch
import torch.nn as nn

class TradingNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(3, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x)

model = TradingNN()


# Training loop

In [30]:
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)

criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

epochs = 200

for epoch in range(epochs):
    model.train()

    preds = model(X_train_t)
    loss = criterion(preds, y_train_t)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 20 == 0:
        print(f"Epoch {epoch:3d} | Loss: {loss.item():.4f}")


Epoch   0 | Loss: 0.6925


KeyboardInterrupt: 

# Evaluation

In [None]:
model.eval()

X_test_t = torch.tensor(X_test, dtype=torch.float32)
y_test_t = torch.tensor(y_test, dtype=torch.float32)

with torch.no_grad():
    probs = model(X_test_t).view(-1)
    preds = (probs > 0.5).int()

accuracy = (preds == y_test_t).float().mean()
print("Test accuracy:", accuracy.item())


# Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, preds.numpy())
cm


# Sanity checks

In [None]:
print("y mean (train):", y_train.mean())
print("y mean (test): ", y_test.mean())

