# üöÄ BTC Price Prediction - Using Project Models

Using `ModelFactory` with LSTM, GRU, Transformer, Ensemble from the codebase.

In [None]:
# @title 1. Clone & Setup
import os

REPO = "btc-price-prediction-hybrid-lstm-sentiment-crispdm"
if os.path.exists(REPO):
    !rm -rf $REPO
!git clone https://github.com/bimoBintang/$REPO.git
os.chdir(REPO)

!pip install -q -r requirements.txt
!pip install -q yfinance ta

print(f"\n‚úÖ Working dir: {os.getcwd()}")

In [None]:
# @title 2. Data Collection & Feature Engineering
import sys
sys.path.insert(0, os.getcwd())

import yfinance as yf
import pandas as pd
import numpy as np
import ta
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

# Get data
print("Fetching BTC data...")
df = yf.Ticker('BTC-USD').history(period='max', interval='1d')
df.columns = [c.lower() for c in df.columns]
print(f"Data: {len(df)} days")

# Technical Indicators
df['sma_7'] = ta.trend.sma_indicator(df['close'], 7)
df['sma_21'] = ta.trend.sma_indicator(df['close'], 21)
df['sma_50'] = ta.trend.sma_indicator(df['close'], 50)
df['ema_12'] = ta.trend.ema_indicator(df['close'], 12)
df['ema_26'] = ta.trend.ema_indicator(df['close'], 26)
df['rsi'] = ta.momentum.rsi(df['close'], 14)
df['macd'] = ta.trend.macd(df['close'])
df['macd_signal'] = ta.trend.macd_signal(df['close'])
df['bb_high'] = ta.volatility.bollinger_hband(df['close'])
df['bb_low'] = ta.volatility.bollinger_lband(df['close'])
df['atr'] = ta.volatility.average_true_range(df['high'], df['low'], df['close'])

for lag in [1, 3, 7, 14]:
    df[f'ret_{lag}d'] = df['close'].pct_change(lag)
    df[f'close_lag{lag}'] = df['close'].shift(lag)

df['target'] = df['close'].shift(-1)
df = df.dropna()

print(f"Features: {len(df.columns)-1}, Samples: {len(df)}")
df['close'].plot(figsize=(14, 4), title='BTC Price')
plt.show()

In [None]:
# @title 3. Prepare Data
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import RobustScaler

feature_cols = [c for c in df.columns if c != 'target']
X = df[feature_cols].values.astype(np.float32)
y = df['target'].values.astype(np.float32)

scaler_X = RobustScaler()
scaler_y = RobustScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1)).flatten()

# Sequences
SEQ_LEN = 30
def create_sequences(X, y, seq_len):
    Xs, ys = [], []
    for i in range(len(X) - seq_len):
        Xs.append(X[i:i+seq_len])
        ys.append(y[i+seq_len])
    return np.array(Xs), np.array(ys)

X_seq, y_seq = create_sequences(X_scaled, y_scaled, SEQ_LEN)

# Split
n = len(X_seq)
train_end = int(n * 0.7)
val_end = int(n * 0.85)

X_train, y_train = X_seq[:train_end], y_seq[:train_end]
X_val, y_val = X_seq[train_end:val_end], y_seq[train_end:val_end]
X_test, y_test = X_seq[val_end:], y_seq[val_end:]

train_loader = DataLoader(TensorDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train)), batch_size=64, shuffle=True)
val_loader = DataLoader(TensorDataset(torch.FloatTensor(X_val), torch.FloatTensor(y_val)), batch_size=64)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")
print(f"Train: {len(X_train)}, Val: {len(X_val)}, Test: {len(X_test)}")

In [None]:
# @title 4. Create Models using ModelFactory
from src.pipeline.models.model_factory import ModelFactory

factory = ModelFactory(models_dir='trained_models')
input_size = len(feature_cols)

# Create all models
models = factory.create_all_models(input_size=input_size, output_size=1)

print(f"\nModels created: {list(models.keys())}")

In [None]:
# @title 5. Training Function
import torch.nn as nn

def train_model(model, name, epochs=100, patience=15):
    model = model.to(device)
    criterion = nn.HuberLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=0.5)
    
    best_val = float('inf')
    patience_counter = 0
    train_hist, val_hist = [], []
    
    print(f"\nTraining {name}...")
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for X_b, y_b in train_loader:
            X_b, y_b = X_b.to(device), y_b.to(device)
            optimizer.zero_grad()
            pred = model(X_b)
            if pred.dim() > 1:
                pred = pred.squeeze()
            loss = criterion(pred, y_b)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            train_loss += loss.item()
        
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_b, y_b in val_loader:
                X_b, y_b = X_b.to(device), y_b.to(device)
                pred = model(X_b)
                if pred.dim() > 1:
                    pred = pred.squeeze()
                val_loss += criterion(pred, y_b).item()
        
        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        train_hist.append(train_loss)
        val_hist.append(val_loss)
        scheduler.step(val_loss)
        
        if val_loss < best_val:
            best_val = val_loss
            patience_counter = 0
            best_state = model.state_dict().copy()
        else:
            patience_counter += 1
        
        if (epoch+1) % 10 == 0:
            print(f"  Epoch {epoch+1}: Train={train_loss:.4f}, Val={val_loss:.4f}")
        
        if patience_counter >= patience:
            print(f"  Early stopping at epoch {epoch+1}")
            break
    
    model.load_state_dict(best_state)
    print(f"  Best val_loss: {best_val:.4f}")
    return model, train_hist, val_hist, best_val

In [None]:
# @title 6. Train All Models
results = {}

for name, model in models.items():
    trained_model, train_h, val_h, best_v = train_model(model, name.upper())
    results[name] = {
        'model': trained_model,
        'train_hist': train_h,
        'val_hist': val_h,
        'best_val': best_v
    }
    
    # Save model
    factory.save_model(trained_model, f"{name}_best", metadata={'val_loss': best_v})

print("\n‚úÖ All models trained!")

In [None]:
# @title 7. Training History
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

for ax, (name, data) in zip(axes.flatten(), results.items()):
    ax.plot(data['train_hist'], label='Train')
    ax.plot(data['val_hist'], label='Val')
    ax.set_title(f"{name.upper()} (Best Val: {data['best_val']:.4f})")
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Loss')
    ax.legend()
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# @title 8. Evaluation
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

X_test_t = torch.FloatTensor(X_test).to(device)
y_actual = scaler_y.inverse_transform(y_test.reshape(-1, 1)).flatten()

metrics_list = []

for name, data in results.items():
    model = data['model']
    model.eval()
    
    with torch.no_grad():
        pred_scaled = model(X_test_t).cpu().numpy()
    
    if pred_scaled.ndim > 1:
        pred_scaled = pred_scaled.flatten()
    
    pred = scaler_y.inverse_transform(pred_scaled.reshape(-1, 1)).flatten()
    
    mae = mean_absolute_error(y_actual, pred)
    rmse = np.sqrt(mean_squared_error(y_actual, pred))
    r2 = r2_score(y_actual, pred)
    mape = np.mean(np.abs((y_actual - pred) / y_actual)) * 100
    
    metrics_list.append({'Model': name.upper(), 'MAE': mae, 'RMSE': rmse, 'MAPE': mape, 'R2': r2})
    data['predictions'] = pred

metrics_df = pd.DataFrame(metrics_list).set_index('Model')
print("\nüìä TEST SET METRICS")
print("="*60)
print(metrics_df.to_string())
print("="*60)

# Best model
best_model = metrics_df['R2'].idxmax()
print(f"\nüèÜ Best Model: {best_model} (R¬≤={metrics_df.loc[best_model, 'R2']:.4f})")

In [None]:
# @title 9. Predictions Visualization
fig, axes = plt.subplots(2, 2, figsize=(16, 10))

for ax, (name, data) in zip(axes.flatten(), results.items()):
    ax.plot(y_actual, label='Actual', alpha=0.7)
    ax.plot(data['predictions'], label='Predicted', alpha=0.7)
    r2 = metrics_df.loc[name.upper(), 'R2']
    ax.set_title(f"{name.upper()} (R¬≤={r2:.4f})")
    ax.legend()
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# @title 10. Model Comparison
fig, axes = plt.subplots(1, 4, figsize=(16, 4))

for ax, col in zip(axes, ['MAE', 'RMSE', 'MAPE', 'R2']):
    values = metrics_df[col]
    colors = ['green' if (col == 'R2' and v == values.max()) or (col != 'R2' and v == values.min()) else 'steelblue' for v in values]
    ax.bar(values.index, values.values, color=colors)
    ax.set_title(col)
    ax.tick_params(axis='x', rotation=45)
    for i, v in enumerate(values):
        fmt = f'{v:.4f}' if col == 'R2' else f'{v:.2f}' if col == 'MAPE' else f'${v:,.0f}'
        ax.text(i, v, fmt, ha='center', va='bottom', fontsize=9)

plt.tight_layout()
plt.show()

print("\nüìÅ Saved models:")
print(factory.list_saved_models())