<a href="https://colab.research.google.com/github/collen1/Trade/blob/main/train_model_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import xgboost as xgb
import joblib
import schedule
import time
import datetime
import os
import shutil
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# === Config ===
CLASS_LOG = "/content/drive/MyDrive/Colab Notebooks/Trade/class_balance_log.txt"
IMBALANCE_ALERT_THRESHOLD = 0.05
CSV_FILE = "/content/drive/MyDrive/Colab Notebooks/Trade/features.csv"
MODEL_FILE = "/content/drive/MyDrive/Colab Notebooks/Trade/price_predictor.pkl"
FEATURES = ["best_bid", "best_ask", "spread", "mid_price", "imbalance", "bid_volume", "ask_volume"]
LABEL = "prediction"
CHUNK_SIZE = 100_000

# Email configuration
EMAIL_SENDER = "cnsd268@gmail.com"
EMAIL_PASSWORD = "crsr cepa fmbi zevs"
EMAIL_RECIPIENT = "ronewanemalili4@gmail.com"

def send_email_alert(subject, body):
    """Send email notification with error handling"""
    from email.mime.text import MIMEText
    import smtplib

    msg = MIMEText(body)
    msg["Subject"] = subject
    msg["From"] = EMAIL_SENDER
    msg["To"] = EMAIL_RECIPIENT

    try:
        with smtplib.SMTP_SSL("smtp.gmail.com", 465) as server:
            server.login(EMAIL_SENDER, EMAIL_PASSWORD)
            server.send_message(msg)
        print("📧 Email alert sent.")
        return True
    except Exception as e:
        print(f"❌ Failed to send email: {e}")
        return False

def save_model(model, model_file):
    """Atomic model save with version preservation"""
    try:
        # Archive previous model if exists
        if os.path.exists(model_file):
            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            archive_path = f"{model_file.rsplit('.', 1)[0]}_{timestamp}.pkl"
            shutil.copy2(model_file, archive_path)
            print(f"📦 Archived previous model to {archive_path}")

        # 1. Save to temporary file
        temp_path = f"{model_file}.tmp"
        with open(temp_path, 'wb') as f:
            joblib.dump(model, f)

        # 2. Atomic rename
        os.replace(temp_path, model_file)

        # 3. Verify the save
        if os.path.exists(model_file):
            print(f"✅ Model saved successfully to {model_file}")

            # Send success notification
            send_email_alert(
                "✅ Model Update Successful",
                f"New model saved at {datetime.datetime.now()}\n\n"
                f"Location: {model_file}\n"
                f"Size: {os.path.getsize(model_file)/1024:.2f} KB"
            )
            return True

        print("❌ Model save failed - file not found after write")
        return False

    except Exception as e:
        print(f"❌ Model save failed: {e}")
        # Clean up temp file if exists
        if os.path.exists(temp_path):
            os.remove(temp_path)

        # Attempt to restore archived model if save failed
        if 'archive_path' in locals() and os.path.exists(archive_path):
            os.replace(archive_path, model_file)
            print("🔄 Restored previous model version")

        # Send failure notification
        send_email_alert(
            "⚠️ Model Update Failed",
            f"Model save failed at {datetime.datetime.now()}\n\n"
            f"Error: {str(e)}\n"
            f"System restored previous version."
        )
        return False

def train_model(csv_file=CSV_FILE, model_file=MODEL_FILE, chunksize=CHUNK_SIZE):
    print("🧠 Starting training...")
    X_chunks = []
    y_chunks = []

    try:
        # First pass: collect all data
        for chunk in pd.read_csv(csv_file, chunksize=chunksize):
            # Validate and clean data
            chunk = chunk.dropna(subset=FEATURES + ['actual_label'])
            if len(chunk) == 0:
                continue

            X_chunks.append(chunk[FEATURES])
            y_chunks.append(chunk['actual_label'])

        if not X_chunks:
            print("⚠️ No valid data to train.")
            return False

        # Combine all chunks
        X_full = pd.concat(X_chunks)
        y_full = pd.concat(y_chunks)

        # Create DMatrix once (more efficient)
        dtrain = xgb.DMatrix(X_full, label=y_full)

        # Class balance analysis
        pos = y_full.sum()
        neg = len(y_full) - pos
        total = len(y_full)
        scale_pos_weight = neg / max(pos, 1)  # prevent division by zero
        class_ratio = pos / total

        # Log class distribution
        log_entry = (
            f"[{datetime.datetime.now()}] Class 0 (Down): {neg}, "
            f"Class 1 (Up): {pos}, Total: {total}, "
            f"Class 1 Ratio: {class_ratio:.4f}, Scale_Pos_Weight: {scale_pos_weight:.2f}\n"
        )

        with open(CLASS_LOG, 'a') as f:
            f.write(log_entry)
        print(f"📊 {log_entry.strip()}")

        # Handle severe class imbalance
        if class_ratio < IMBALANCE_ALERT_THRESHOLD:
            alert_body = (
                f"Training started: {datetime.datetime.now()}\n\n"
                f"Class Distribution:\n"
                f"- Down (0): {neg}\n"
                f"- Up (1): {pos}\n"
                f"- Ratio: {class_ratio:.4f}\n\n"
                f"Model may be biased towards price drops!"
            )
            send_email_alert("⚠️ Class Imbalance Alert", alert_body)

         # Train model
        params = {
            "objective": "binary:logistic",
            "eval_metric": "logloss",
            "tree_method": "hist",
            "scale_pos_weight": scale_pos_weight,
            "seed": 42
        }

        model = xgb.train(params, dtrain, num_boost_round=50)

        # Save model
        if save_model(model, model_file):
            print(f"✅ Model saved to {model_file}")
            return True

    except Exception as e:
        print(f"❌ Training failed: {e}")
        return False

def main():
    """Main execution with scheduled training"""
    print("=== Trading Bot Model Trainer ===")
    print(f"Configuration:\n"
          f"- Data: {CSV_FILE}\n"
          f"- Model: {MODEL_FILE}\n"
          f"- Training every 0.5 hours\n")

    # Initial training
    train_model()

    # Scheduled training
    schedule.every(0.5).hours.do(lambda: train_model())

    print("🔁 Auto-retraining scheduler started...")
    while True:
        schedule.run_pending()
        time.sleep(60)

if __name__ == "__main__":
    main()

=== Trading Bot Model Trainer ===
Configuration:
- Data: /content/drive/MyDrive/Colab Notebooks/Trade/features.csv
- Model: /content/drive/MyDrive/Colab Notebooks/Trade/price_predictor.pkl
- Training every 0.5 hours

🧠 Starting training...
📊 [2025-08-06 17:32:17.556080] Class 0 (Down): 533568, Class 1 (Up): 407, Total: 533975, Class 1 Ratio: 0.0008, Scale_Pos_Weight: 1310.98
📧 Email alert sent.
📦 Archived previous model to /content/drive/MyDrive/Colab Notebooks/Trade/price_predictor_20250806_173220.pkl
✅ Model saved successfully to /content/drive/MyDrive/Colab Notebooks/Trade/price_predictor.pkl
📧 Email alert sent.
✅ Model saved to /content/drive/MyDrive/Colab Notebooks/Trade/price_predictor.pkl
🔁 Auto-retraining scheduler started...
🧠 Starting training...
📊 [2025-08-06 18:02:24.485053] Class 0 (Down): 558067, Class 1 (Up): 451, Total: 558518, Class 1 Ratio: 0.0008, Scale_Pos_Weight: 1237.40
📧 Email alert sent.
📦 Archived previous model to /content/drive/MyDrive/Colab Notebooks/Trade/p