<a href="https://colab.research.google.com/github/Uzo01/TradingBotProject/blob/main/ML_Test_WM_Strat.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ===================== WM ML DATA PIPELINE ‚Äî COLAB ===================== #
# PURPOSE:
# - Load ML dataset exported from MT5 EA (NOT trade log)
# - Validate structure
# - Plot ATR behaviour + price
# ====================================================================== #

import pandas as pd
import matplotlib.pyplot as plt

# ------------------ CONFIG ------------------ #
CSV_FILE = "wm_ml_dataset_v1.csv"   # MUST be ML dataset, NOT trade log
# -------------------------------------------- #

# Load CSV
df = pd.read_csv(CSV_FILE)

# Normalise column names
df.columns = df.columns.str.strip().str.lower()

# Validate required columns
required_cols = [
    "timestamp",
    "open",
    "high",
    "low",
    "close",
    "atr_m15",
    "atr_m30"
]

missing = [c for c in required_cols if c not in df.columns]
if missing:
    raise ValueError(
        f"‚ùå WRONG CSV LOADED.\n"
        f"Missing columns: {missing}\n"
        f"Loaded columns: {df.columns.tolist()}\n\n"
        f"üëâ You likely loaded the TRADE LOG instead of the ML dataset."
    )

# Convert timestamp
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
df = df.dropna(subset=["timestamp"]).sort_values("timestamp")

print("‚úÖ ML dataset loaded successfully")
print("Rows:", len(df))
print("Columns:", df.columns.tolist())

# ------------------ PLOTS ------------------ #

# Close price
plt.figure(figsize=(12,5))
plt.plot(df["timestamp"], df["close"])
plt.title("Close Price Over Time (ML Dataset)")
plt.xlabel("Time")
plt.ylabel("Price")
plt.show()

# ATR behaviour
plt.figure(figsize=(12,5))
plt.plot(df["timestamp"], df["atr_m15"], label="ATR M15")
plt.plot(df["timestamp"], df["atr_m30"], label="ATR M30")
plt.legend()
plt.title("ATR Behaviour (M15 vs M30)")
plt.xlabel("Time")
plt.ylabel("ATR")
plt.show()

# ------------------ QUICK SANITY CHECK ------------------ #
print("\nüìä ATR Statistics")
print(df[["atr_m15", "atr_m30"]].describe())


ValueError: ‚ùå WRONG CSV LOADED.
Missing columns: ['open', 'high', 'low', 'close', 'atr_m15', 'atr_m30']
Loaded columns: ['timestamp', 'symbol', 'dir', 'volume', 'closeprice', 'netpl', 'daypl', 'closedtradestoday']

üëâ You likely loaded the TRADE LOG instead of the ML dataset.

In [1]:
# ================= GOLD BACKLOG DATA (yfinance) ================= #

import yfinance as yf
import pandas as pd

# Download Gold Futures (most reliable)
gold = yf.download(
    "GC=F",          # Gold Futures
    interval="15m",
    period="60d",    # last 60 days
    auto_adjust=False,
    progress=False
)

# Reset index
gold = gold.reset_index()

# Standardise columns
gold = gold.rename(columns={
    "Datetime": "timestamp",
    "Open": "yf_open",
    "High": "yf_high",
    "Low": "yf_low",
    "Close": "yf_close",
    "Volume": "yf_volume"
})

# Convert time
gold["timestamp"] = pd.to_datetime(gold["timestamp"])

# Basic ATR (for regime only, not execution)
gold["yf_range"] = gold["yf_high"] - gold["yf_low"]
gold["yf_atr_14"] = gold["yf_range"].rolling(14).mean()

print("yfinance gold data loaded")
print(gold.head())
print("\nRows:", len(gold))

# Save for later merge
gold.to_csv("gold_yfinance_backlog_15m.csv", index=False)


yfinance gold data loaded
Price                  timestamp    Adj Close     yf_close      yf_high  \
Ticker                                   GC=F         GC=F         GC=F   
0      2025-10-07 07:30:00+00:00  3972.800049  3972.800049  3979.399902   
1      2025-10-07 07:45:00+00:00  3968.600098  3968.600098  3974.800049   
2      2025-10-07 08:00:00+00:00  3969.500000  3969.500000  3971.000000   
3      2025-10-07 08:15:00+00:00  3973.600098  3973.600098  3976.500000   
4      2025-10-07 08:30:00+00:00  3975.399902  3975.399902  3976.300049   

Price        yf_low      yf_open yf_volume  yf_range yf_atr_14  
Ticker         GC=F         GC=F      GC=F                      
0       3971.399902  3974.399902      3696  8.000000       NaN  
1       3965.800049  3972.600098      3703  9.000000       NaN  
2       3963.399902  3968.600098      4466  7.600098       NaN  
3       3968.000000  3969.300049      3509  8.500000       NaN  
4       3973.300049  3973.500000      2111  3.000000      