# CPB V1 vs V2 Model Comparison - Complete Single Notebook

## Structure:
- Same training logic
- V2 outputs: [price, volatility]
- V1 outputs: [price]
- 20 test samples

## Cell 1: Setup & Install Dependencies

In [None]:
!pip install -q tensorflow numpy matplotlib pandas requests

import os
import sys
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from datetime import datetime
import json
from tensorflow import keras
from tensorflow.keras import layers
import warnings
warnings.filterwarnings('ignore')

print('[OK] Dependencies installed and imported')

## Cell 2: Generate Training Data (20 samples)

In [None]:
def generate_training_data(num_samples=20, sequence_length=20):
    """Generate synthetic trading data"""
    X_train = []
    y_train_price = []
    y_train_volatility = []
    
    for _ in range(num_samples):
        # Generate random price sequence
        prices = []
        current_price = np.random.uniform(85000, 90000)
        
        for i in range(sequence_length):
            daily_return = np.random.normal(0, 0.015)
            current_price = current_price * (1 + daily_return)
            prices.append(current_price)
        
        X_train.append(prices)
        
        # Generate future price (5H ahead)
        future_price = prices[-1]
        future_moves = []
        for _ in range(5):
            move = np.random.normal(0, 0.01)
            future_price = future_price * (1 + move)
            future_moves.append(move)
        
        future_price_change_percent = (future_price - prices[-1]) / prices[-1] * 100
        y_train_price.append(future_price_change_percent)
        
        # Calculate historical volatility as target
        returns = np.diff(prices) / prices[:-1]
        volatility = np.std(returns) * 100
        y_train_volatility.append(volatility)
    
    X_train = np.array(X_train)
    # Normalize prices
    X_train = (X_train - X_train.mean(axis=0)) / (X_train.std(axis=0) + 1e-8)
    
    y_train_price = np.array(y_train_price)
    y_train_volatility = np.array(y_train_volatility)
    
    return X_train, y_train_price, y_train_volatility

print('[1] Generating training data...')
X_train, y_price, y_volatility = generate_training_data(num_samples=20, sequence_length=20)

print(f'[OK] Training data generated:')
print(f'     X_train shape: {X_train.shape}')
print(f'     y_price shape: {y_price.shape}')
print(f'     y_volatility shape: {y_volatility.shape}')

## Cell 3: Build V1 Model (Price Only)

In [None]:
print('[2] Building V1 Model (Price Prediction Only)...')

model_v1 = keras.Sequential([
    layers.LSTM(32, activation='relu', input_shape=(20, 1)),
    layers.Dense(16, activation='relu'),
    layers.Dense(8, activation='relu'),
    layers.Dense(1)  # Output: price change %
])

model_v1.compile(optimizer='adam', loss='mse', metrics=['mae'])

print('[OK] V1 Model Summary:')
model_v1.summary()

print('\n[3] Training V1 Model...')
history_v1 = model_v1.fit(
    X_train, y_price,
    epochs=50,
    batch_size=4,
    verbose=0
)

print(f'[OK] V1 Model trained')
print(f'     Final Loss: {history_v1.history["loss"][-1]:.4f}')
print(f'     Final MAE: {history_v1.history["mae"][-1]:.4f}')

## Cell 4: Build V2 Model (Price + Volatility)

In [None]:
print('[4] Building V2 Model (Price + Volatility Prediction)...')

# Shared LSTM layers
inputs = layers.Input(shape=(20, 1))
lstm = layers.LSTM(32, activation='relu')(inputs)
shared = layers.Dense(16, activation='relu')(lstm)

# Price output branch
price_branch = layers.Dense(8, activation='relu')(shared)
price_output = layers.Dense(1, name='price')(price_branch)

# Volatility output branch
vol_branch = layers.Dense(8, activation='relu')(shared)
vol_output = layers.Dense(1, name='volatility')(vol_branch)

model_v2 = keras.Model(inputs=inputs, outputs=[price_output, vol_output])

model_v2.compile(
    optimizer='adam',
    loss={'price': 'mse', 'volatility': 'mse'},
    metrics={'price': 'mae', 'volatility': 'mae'}
)

print('[OK] V2 Model Summary:')
model_v2.summary()

print('\n[5] Training V2 Model...')
history_v2 = model_v2.fit(
    X_train, {'price': y_price, 'volatility': y_volatility},
    epochs=50,
    batch_size=4,
    verbose=0
)

print(f'[OK] V2 Model trained')
print(f'     Final Loss: {history_v2.history["loss"][-1]:.4f}')
print(f'     Final Price MAE: {history_v2.history["price_mae"][-1]:.4f}')
print(f'     Final Volatility MAE: {history_v2.history["volatility_mae"][-1]:.4f}')

## Cell 5: Generate Test Data & Make Predictions

In [None]:
print('[6] Generating test data...')

# Generate new test data
X_test, y_test_price, y_test_volatility = generate_training_data(num_samples=20, sequence_length=20)

# Make predictions
print('[7] Making predictions...')

v1_predictions = model_v1.predict(X_test, verbose=0).flatten()
v2_price_predictions, v2_vol_predictions = model_v2.predict(X_test, verbose=0)
v2_price_predictions = v2_price_predictions.flatten()
v2_vol_predictions = v2_vol_predictions.flatten()

print('[OK] Predictions completed')

## Cell 6: Calculate Errors & Metrics

In [None]:
print('\n' + '='*70)
print('CPB Model V1 vs V2 Comparison Test Results')
print('='*70)

# Calculate errors
v1_price_errors = np.abs(v1_predictions - y_test_price)
v2_price_errors = np.abs(v2_price_predictions - y_test_price)
v2_vol_errors = np.abs(v2_vol_predictions - y_test_volatility)

# Statistics
v1_mean_error = np.mean(v1_price_errors)
v2_mean_price_error = np.mean(v2_price_errors)
v2_mean_vol_error = np.mean(v2_vol_errors)

improvement_price = v1_mean_error - v2_mean_price_error
improvement_rate = (improvement_price / v1_mean_error * 100) if v1_mean_error > 0 else 0

print(f'\n[Price Prediction]')
print(f'V1 Mean Error: {v1_mean_error:.4f}%')
print(f'V2 Mean Error: {v2_mean_price_error:.4f}%')
print(f'Improvement: {improvement_price:+.4f}% ({improvement_rate:+.1f}%)')

print(f'\n[Volatility Prediction (V2 Only)]')
print(f'V2 Mean Volatility Error: {v2_mean_vol_error:.4f}%')

print(f'\n[Detailed Comparison]')
print(f'V1 Std Dev: {np.std(v1_price_errors):.4f}%')
print(f'V2 Std Dev: {np.std(v2_price_errors):.4f}%')

print(f'V1 Min/Max Error: {np.min(v1_price_errors):.4f}% / {np.max(v1_price_errors):.4f}%')
print(f'V2 Min/Max Error: {np.min(v2_price_errors):.4f}% / {np.max(v2_price_errors):.4f}%')

# Count improvements
v2_better_count = sum(1 for i in range(len(v1_predictions)) if v2_price_errors[i] < v1_price_errors[i])
print(f'\nV2 Better than V1: {v2_better_count}/20 times ({v2_better_count/20*100:.1f}%)')

print('\n' + '='*70)

## Cell 7: Visualize Results

In [None]:
fig, axes = plt.subplots(2, 3, figsize=(16, 10))
fig.suptitle('CPB Model V1 vs V2 Comprehensive Analysis', fontsize=16, fontweight='bold')

# 1. Training Loss Comparison
ax1 = axes[0, 0]
ax1.plot(history_v1.history['loss'], label='V1', linewidth=2)
ax1.plot(history_v2.history['loss'], label='V2', linewidth=2)
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.set_title('Training Loss')
ax1.legend()
ax1.grid(True, alpha=0.3)

# 2. Error Distribution Box Plot
ax2 = axes[0, 1]
ax2.boxplot([v1_price_errors, v2_price_errors], labels=['V1', 'V2 (Price)'])
ax2.set_ylabel('Prediction Error (%)')
ax2.set_title('Error Distribution')
ax2.grid(True, alpha=0.3)

# 3. Actual vs Predicted (V1)
ax3 = axes[0, 2]
ax3.scatter(y_test_price, v1_predictions, alpha=0.6, label='V1')
ax3.plot([-5, 5], [-5, 5], 'r--', linewidth=2, label='Perfect')
ax3.set_xlabel('Actual (%)')
ax3.set_ylabel('Predicted (%)')
ax3.set_title('V1: Actual vs Predicted')
ax3.legend()
ax3.grid(True, alpha=0.3)

# 4. Error Comparison (V1 vs V2)
ax4 = axes[1, 0]
ax4.plot(v1_price_errors, 'o-', label='V1', linewidth=2, markersize=6)
ax4.plot(v2_price_errors, 's-', label='V2', linewidth=2, markersize=6)
ax4.set_xlabel('Test Sample')
ax4.set_ylabel('Error (%)')
ax4.set_title('Error Comparison')
ax4.legend()
ax4.grid(True, alpha=0.3)

# 5. Actual vs Predicted (V2)
ax5 = axes[1, 1]
ax5.scatter(y_test_price, v2_price_predictions, alpha=0.6, label='V2 Price', color='green')
ax5.plot([-5, 5], [-5, 5], 'r--', linewidth=2, label='Perfect')
ax5.set_xlabel('Actual (%)')
ax5.set_ylabel('Predicted (%)')
ax5.set_title('V2: Actual vs Predicted')
ax5.legend()
ax5.grid(True, alpha=0.3)

# 6. Volatility Prediction (V2)
ax6 = axes[1, 2]
ax6.scatter(y_test_volatility, v2_vol_predictions, alpha=0.6, color='orange')
ax6.plot([0, 3], [0, 3], 'r--', linewidth=2, label='Perfect')
ax6.set_xlabel('Actual Volatility (%)')
ax6.set_ylabel('Predicted Volatility (%)')
ax6.set_title('V2: Volatility Prediction')
ax6.legend()
ax6.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('model_comparison_v1_vs_v2.png', dpi=150, bbox_inches='tight')
print('Chart saved: model_comparison_v1_vs_v2.png')

## Cell 8: Save Results & Models

In [None]:
# Save models
model_v1.save('model_v1_price_only.h5')
model_v2.save('model_v2_price_volatility.h5')

print('[OK] Models saved:')
print('     - model_v1_price_only.h5')
print('     - model_v2_price_volatility.h5')

# Save results as JSON
results_json = {
    'test_date': datetime.now().isoformat(),
    'num_test_samples': 20,
    'v1_price_error': {
        'mean': float(v1_mean_error),
        'std': float(np.std(v1_price_errors)),
        'min': float(np.min(v1_price_errors)),
        'max': float(np.max(v1_price_errors))
    },
    'v2_price_error': {
        'mean': float(v2_mean_price_error),
        'std': float(np.std(v2_price_errors)),
        'min': float(np.min(v2_price_errors)),
        'max': float(np.max(v2_price_errors))
    },
    'v2_volatility_error': {
        'mean': float(v2_mean_vol_error),
        'std': float(np.std(v2_vol_errors)),
        'min': float(np.min(v2_vol_errors)),
        'max': float(np.max(v2_vol_errors))
    },
    'improvement': {
        'absolute': float(improvement_price),
        'rate': float(improvement_rate),
        'v2_better_count': int(v2_better_count),
        'v2_better_percentage': float(v2_better_count/20*100)
    },
    'predictions': {
        'v1_price': v1_predictions.tolist(),
        'v2_price': v2_price_predictions.tolist(),
        'v2_volatility': v2_vol_predictions.tolist(),
        'actual_price': y_test_price.tolist(),
        'actual_volatility': y_test_volatility.tolist()
    }
}

with open('model_comparison_results.json', 'w') as f:
    json.dump(results_json, f, indent=2)

print('[OK] Results saved: model_comparison_results.json')

## Cell 9: Download Files (Colab)

In [None]:
try:
    from google.colab import files
    print('[INFO] Downloading files from Colab...')
    files.download('model_comparison_v1_vs_v2.png')
    files.download('model_comparison_results.json')
    files.download('model_v1_price_only.h5')
    files.download('model_v2_price_volatility.h5')
    print('[OK] All files downloaded')
except ImportError:
    print('[INFO] Not running in Colab - files saved locally')