# DefiDoza Training Notebook

This notebook is self-contained and only for model training.

- No local project files required
- No LLM/agent code
- Trained artifacts saved to Google Drive

In [None]:
# Cell 1: Install deps and mount Google Drive
!pip -q install numpy==1.26.4 pandas==2.2.0 scikit-learn==1.6.1 torch tensorflow ipywidgets

from google.colab import drive
drive.mount('/content/drive')

DRIVE_BASE = '/content/drive/MyDrive/defidoza'
WEIGHTS_DIR = f'{DRIVE_BASE}/weights'
SCALER_PATH = f'{WEIGHTS_DIR}/scaler.pkl'
PYTORCH_WEIGHTS = f'{WEIGHTS_DIR}/pytorch_lstm.pth'
TF_WEIGHTS = f'{WEIGHTS_DIR}/tf_lstm.h5'
RF_WEIGHTS = f'{WEIGHTS_DIR}/rf_model.pkl'

import os
os.makedirs(WEIGHTS_DIR, exist_ok=True)
print('Storage ready at:', WEIGHTS_DIR)

In [None]:
# Cell 2: Training UI (ipywidgets)
import json
import pickle
import urllib.request
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor

import ipywidgets as widgets
from IPython.display import display, clear_output

TOKENS = ['uniswap', 'bitcoin', 'ethereum', 'solana', 'cardano']

def fetch_and_parse(token_id='uniswap', days=30):
    url = f'https://api.coingecko.com/api/v3/coins/{token_id}/market_chart?vs_currency=usd&days={days}&interval=daily'
    with urllib.request.urlopen(url) as r:
        data = json.loads(r.read().decode('utf-8'))
    prices = data.get('prices', [])
    volumes = data.get('total_volumes', [])
    if not prices:
        return None
    df = pd.DataFrame(prices, columns=['timestamp', 'price'])
    df['volume'] = [v[1] for v in volumes]
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    return df

def preprocess_data(df, fit_scaler=True):
    df = df.copy()
    np.random.seed(42)
    df['sentiment'] = np.random.uniform(-1, 1, size=len(df))
    prices = df['price'].values
    ws = min(5, len(prices))
    if len(prices) >= ws:
        vol = np.std(np.lib.stride_tricks.sliding_window_view(prices, ws), axis=1)
        df['volatility'] = np.pad(vol, (ws - 1, 0), mode='edge')
    else:
        df['volatility'] = 0

    cols = ['price', 'volume', 'sentiment', 'volatility']
    if fit_scaler:
        scaler = MinMaxScaler()
        df[cols] = scaler.fit_transform(df[cols])
        with open(SCALER_PATH, 'wb') as f:
            pickle.dump(scaler, f)
    else:
        with open(SCALER_PATH, 'rb') as f:
            scaler = pickle.load(f)
        df[cols] = scaler.transform(df[cols])

    df['price_lag1'] = df['price'].shift(1)
    return df.dropna(), scaler

def create_sequences(data, seq_len=10):
    seq_len = min(seq_len, len(data) - 1)
    X, y = [], []
    for i in range(len(data) - seq_len):
        X.append(data.iloc[i:i + seq_len].values)
        y.append(data.iloc[i + seq_len]['price'])
    return np.array(X), np.array(y)

class PricePredictor(nn.Module):
    def __init__(self, input_size=5, hidden_size=50, num_layers=3):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        return self.fc(out[:, -1, :])

def create_tf_model(shape):
    m = Sequential([LSTM(50, input_shape=shape), Dense(1)])
    m.compile(optimizer='adam', loss='mse')
    return m

def create_rf_model():
    return RandomForestRegressor(n_estimators=50, random_state=42, max_depth=10)

def train_pytorch(X, y, epochs, log):
    log(f'Training PyTorch ({epochs} epochs)...')
    m = PricePredictor(input_size=X.shape[2])
    dev = 'cuda' if torch.cuda.is_available() else 'cpu'
    m = m.to(dev)
    opt = torch.optim.Adam(m.parameters(), lr=0.001)
    crit = nn.MSELoss()
    Xt = torch.tensor(X, dtype=torch.float32).to(dev)
    yt = torch.tensor(y, dtype=torch.float32).unsqueeze(1).to(dev)
    for e in range(epochs):
        m.train()
        loss = crit(m(Xt), yt)
        opt.zero_grad()
        loss.backward()
        opt.step()
        if (e + 1) % 10 == 0:
            log(f'  Epoch {e + 1}/{epochs} Loss: {loss.item():.6f}')
    torch.save(m.state_dict(), PYTORCH_WEIGHTS)
    log(f'Saved: {PYTORCH_WEIGHTS}')

def train_tensorflow(X, y, epochs, log):
    log(f'Training TensorFlow ({epochs} epochs)...')
    m = create_tf_model((X.shape[1], X.shape[2]))
    m.fit(X, y, epochs=epochs, batch_size=max(1, len(X) // 10), verbose=0)
    m.save(TF_WEIGHTS)
    log(f'Saved: {TF_WEIGHTS}')

def train_randomforest(X, y, log):
    log('Training RandomForest...')
    Xf = X.reshape(X.shape[0], -1)
    m = create_rf_model()
    m.fit(Xf, y)
    with open(RF_WEIGHTS, 'wb') as f:
        pickle.dump(m, f)
    log(f'Saved: {RF_WEIGHTS}')

out = widgets.Output(layout={'height': '300px', 'overflow': 'auto', 'border': '1px solid #999'})

token_w = widgets.Dropdown(options=TOKENS, value='bitcoin', description='Token:')
days_w = widgets.IntText(value=30, description='Days:')
epochs_w = widgets.IntText(value=50, description='Epochs:')
model_w = widgets.RadioButtons(
    options=[('PyTorch', 'pytorch'), ('TensorFlow', 'tensorflow'), ('RandomForest', 'randomforest'), ('All', 'all')],
    value='all',
    description='Model:'
)
train_btn = widgets.Button(description='Start Training', button_style='success')

def log(msg):
    with out:
        print(msg)

def run_train(_):
    with out:
        clear_output()
    token, days, epochs, model = token_w.value, days_w.value, epochs_w.value, model_w.value
    log(f'Config: token={token}, days={days}, epochs={epochs}, model={model}')

    df = fetch_and_parse(token, days)
    if df is None or len(df) < 10:
        log('Error: not enough data')
        return

    log(f'Data rows: {len(df)}')
    df_proc, _ = preprocess_data(df, fit_scaler=True)
    features = df_proc[['price', 'volume', 'sentiment', 'volatility', 'price_lag1']]
    X, y = create_sequences(features, min(10, len(features) // 2))
    if len(X) == 0:
        log('Error: not enough sequences')
        return

    split = max(1, int(0.8 * len(X)))
    X_train, y_train = X[:split], y[:split]
    log(f'Training sequences: {len(X_train)}')

    if model in ['pytorch', 'all']:
        train_pytorch(X_train, y_train, epochs, log)
    if model in ['tensorflow', 'all']:
        train_tensorflow(X_train, y_train, epochs, log)
    if model in ['randomforest', 'all']:
        train_randomforest(X_train, y_train, log)

    log('')
    log('Training complete. Files in weights dir:')
    for name in sorted(os.listdir(WEIGHTS_DIR)):
        log(f'  - {name}')

train_btn.on_click(run_train)

display(widgets.VBox([
    widgets.HTML('<h3>Training Panel</h3>'),
    token_w,
    days_w,
    epochs_w,
    model_w,
    train_btn,
    out
]))