<a href="https://colab.research.google.com/github/jeshwanth-A/image-to-3d-app/blob/main/notebooks/train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DefiDoza Training Notebook

This notebook is self-contained and only for model training.

- No local project files required
- No LLM/agent code
- Trained artifacts saved to Google Drive

In [19]:
from google.colab import drive
drive.mount('/content/drive')

import os

DRIVE_BASE      = '/content/drive/MyDrive/defidoza'
WEIGHTS_DIR     = os.path.join(DRIVE_BASE, 'weights')
SCALER_PATH     = os.path.join(WEIGHTS_DIR, 'scaler.pkl')
PYTORCH_WEIGHTS = os.path.join(WEIGHTS_DIR, 'pytorch_lstm.pth')
TF_WEIGHTS      = os.path.join(WEIGHTS_DIR, 'tf_lstm.keras')
RF_WEIGHTS      = os.path.join(WEIGHTS_DIR, 'rf_model.pkl')

os.makedirs(WEIGHTS_DIR, exist_ok=True)
print("Storage ready:", WEIGHTS_DIR)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Storage ready: /content/drive/MyDrive/defidoza/weights


In [21]:
import json, os, pickle, urllib.request, warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

import torch
import torch.nn as nn

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error


TOKENS = ['bitcoin', 'ethereum', 'solana', 'cardano', 'uniswap']
FEATURE_COLS = ['price', 'volume', 'volatility', 'momentum', 'price_lag1']


def fetch_and_parse(token_id, days):
    url = (f'https://api.coingecko.com/api/v3/coins/{token_id}/market_chart'
           f'?vs_currency=usd&days={days}&interval=daily')
    req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
    with urllib.request.urlopen(req, timeout=20) as r:
        data = json.loads(r.read().decode('utf-8'))

    prices  = data.get('prices', [])
    volumes = data.get('total_volumes', [])
    if not prices:
        return None

    df = pd.DataFrame(prices, columns=['timestamp', 'price'])
    df['volume'] = [v[1] for v in volumes[:len(df)]]
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    return df


def compute_features(df):
    df = df.copy()
    prices = df['price'].values

    ws = min(5, len(prices))
    if len(prices) >= ws:
        vol = np.std(np.lib.stride_tricks.sliding_window_view(prices, ws), axis=1)
        df['volatility'] = np.pad(vol, (ws - 1, 0), mode='edge')
    else:
        df['volatility'] = 0.0

    df['momentum'] = df['price'].pct_change().fillna(0.0)
    df['price_lag1'] = df['price'].shift(1)
    return df.dropna().reset_index(drop=True)


def scale_data(df, fit=True):
    df = df.copy()
    if fit:
        scaler = MinMaxScaler()
        df[FEATURE_COLS] = scaler.fit_transform(df[FEATURE_COLS])
        with open(SCALER_PATH, 'wb') as f:
            pickle.dump(scaler, f)
    else:
        with open(SCALER_PATH, 'rb') as f:
            scaler = pickle.load(f)
        df[FEATURE_COLS] = scaler.transform(df[FEATURE_COLS])
    return df


def create_sequences(df, seq_len):
    data = df[FEATURE_COLS].values
    seq_len = min(seq_len, len(data) - 1)
    if seq_len < 1:
        return np.array([]), np.array([])

    pi = FEATURE_COLS.index('price')
    X, y = [], []
    for i in range(len(data) - seq_len):
        X.append(data[i:i + seq_len])
        y.append(data[i + seq_len][pi])
    return np.array(X), np.array(y)


class PricePredictorPT(nn.Module):
    def __init__(self, input_size, hidden=64, layers=2, dropout=0.2):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=input_size, hidden_size=hidden, num_layers=layers,
            batch_first=True, dropout=(dropout if layers > 1 else 0.0)
        )
        self.fc = nn.Linear(hidden, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        return self.fc(out[:, -1, :])


def build_tf_model(shape):
    m = Sequential([
        LSTM(64, input_shape=shape),
        Dropout(0.2),
        Dense(1)
    ])
    m.compile(optimizer='adam', loss='mse')
    return m


def metrics(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    return mae, rmse


def train_once(token='bitcoin', days=60, epochs=50, seq_len=10, model='all'):
    print(f'Config: token={token} days={days} epochs={epochs} seq_len={seq_len} model={model}')
    print('-' * 60)

    df = fetch_and_parse(token, days)
    if df is None or len(df) < 15:
        print("Not enough data. Increase days.")
        return

    df = compute_features(df)

    split = int(len(df) * 0.8)
    train_df = df.iloc[:split].copy()
    test_df  = df.iloc[split:].copy()

    # Fit scaler on train only (no leakage)
    train_df = scale_data(train_df, fit=True)
    test_df  = scale_data(test_df,  fit=False)

    Xtr, ytr = create_sequences(train_df, seq_len)
    Xte, yte = create_sequences(test_df,  seq_len)

    if len(Xtr) == 0:
        print("Not enough sequences. Increase days or reduce seq_len.")
        return

    print(f"Train seqs: {len(Xtr)} | Test seqs: {len(Xte)}")

    # ── PyTorch ──
    if model in ('pytorch', 'all'):
        dev = 'cuda' if torch.cuda.is_available() else 'cpu'
        m = PricePredictorPT(input_size=Xtr.shape[2]).to(dev)
        opt = torch.optim.Adam(m.parameters(), lr=1e-3)
        crit = nn.MSELoss()

        Xt = torch.tensor(Xtr, dtype=torch.float32).to(dev)
        yt = torch.tensor(ytr, dtype=torch.float32).unsqueeze(1).to(dev)

        step = max(1, epochs // 5)
        for e in range(epochs):
            m.train()
            loss = crit(m(Xt), yt)
            opt.zero_grad(); loss.backward(); opt.step()
            if (e + 1) % step == 0:
                print(f"[PyTorch] epoch {e+1}/{epochs} loss={loss.item():.6f}")

        torch.save(m.state_dict(), PYTORCH_WEIGHTS)
        print("Saved:", PYTORCH_WEIGHTS)

        if len(Xte):
            m.eval()
            with torch.no_grad():
                pred = m(torch.tensor(Xte, dtype=torch.float32).to(dev)).cpu().numpy().flatten()
            mae, rmse = metrics(yte, pred)
            print(f"[PyTorch] MAE={mae:.6f} RMSE={rmse:.6f}")

    # ── TensorFlow ──
    if model in ('tensorflow', 'all'):
        m = build_tf_model((Xtr.shape[1], Xtr.shape[2]))
        m.fit(Xtr, ytr, epochs=epochs, batch_size=max(1, len(Xtr)//10), verbose=0)
        m.save(TF_WEIGHTS)
        print("Saved:", TF_WEIGHTS)

        if len(Xte):
            pred = m.predict(Xte, verbose=0).flatten()
            mae, rmse = metrics(yte, pred)
            print(f"[TensorFlow] MAE={mae:.6f} RMSE={rmse:.6f}")

    # ── RandomForest ──
    if model in ('randomforest', 'all'):
        rf = RandomForestRegressor(n_estimators=200, max_depth=12, random_state=42, n_jobs=-1)
        rf.fit(Xtr.reshape(len(Xtr), -1), ytr)
        with open(RF_WEIGHTS, 'wb') as f:
            pickle.dump(rf, f)
        print("Saved:", RF_WEIGHTS)

        if len(Xte):
            pred = rf.predict(Xte.reshape(len(Xte), -1))
            mae, rmse = metrics(yte, pred)
            print(f"[RandomForest] MAE={mae:.6f} RMSE={rmse:.6f}")

    print('-' * 60)
    print("Done. Files:")
    for fn in sorted(os.listdir(WEIGHTS_DIR)):
        print(" -", fn)


# =========================
# COLAB FORM "GUI"
# =========================
#@title Train Settings (this is the GUI)
token = "bitcoin" #@param ["bitcoin","ethereum","solana","cardano","uniswap"]
days = 90 #@param {type:"integer"}
epochs = 50 #@param {type:"integer"}
seq_len = 10 #@param {type:"integer"}
model = "all" #@param ["all","pytorch","tensorflow","randomforest"]

# Run training with the selected settings:
train_once(token, days, epochs, seq_len, model)

Config: token=bitcoin days=90 epochs=50 seq_len=10 model=all
------------------------------------------------------------
Train seqs: 62 | Test seqs: 8
[PyTorch] epoch 10/50 loss=0.067418
[PyTorch] epoch 20/50 loss=0.046720
[PyTorch] epoch 30/50 loss=0.047085
[PyTorch] epoch 40/50 loss=0.042172
[PyTorch] epoch 50/50 loss=0.042404
Saved: /content/drive/MyDrive/defidoza/weights/pytorch_lstm.pth
[PyTorch] MAE=1.530844 RMSE=1.534095
Saved: /content/drive/MyDrive/defidoza/weights/tf_lstm.keras
[TensorFlow] MAE=0.782864 RMSE=0.789609
Saved: /content/drive/MyDrive/defidoza/weights/rf_model.pkl
[RandomForest] MAE=1.563493 RMSE=1.567780
------------------------------------------------------------
Done. Files:
 - pytorch_lstm.pth
 - rf_model.pkl
 - scaler.pkl
 - tf_lstm.h5
 - tf_lstm.keras
