# Improved Hybrid Model Training

This notebook implements:
1. Efficient CNN-BiLSTM with reduced parameters
2. Multi-task learning for classification and regression
3. Time series prediction with sliding window
4. Model optimization techniques (pruning and weight clipping)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from src.data_preprocessing import DataPreprocessor
from src.models import create_efficient_cnn_bilstm, apply_pruning, apply_weight_clipping, HybridTimeSeriesModel

# Set style for better visualizations
plt.style.use('seaborn')
sns.set_palette('husl')

In [None]:
# Load and preprocess data
preprocessor = DataPreprocessor(window_size=60)
df = pd.read_csv('data/air_quality.csv')

# Prepare features
features = ['CO2', 'TVOC', 'PM10', 'PM2.5', 'CO', 'LDR', 'O3']
prepared_data = preprocessor.prepare_data(df, features, 'Air Quality')

# Get sequence data for time series
X_seq = prepared_data['X_seq']
y_seq = prepared_data['y_seq']

print("Data shapes:")
print(f"Classification - X_train: {prepared_data['X_train'].shape}")
print(f"Time Series - X_seq: {X_seq.shape}")

In [None]:
# Create and train hybrid model
input_shape = (60, len(features))  # 60 timesteps
num_classes = len(df['air_quality_category'].unique())

model = HybridTimeSeriesModel(input_shape, num_classes)
model.model.summary()

# Split time series data
split_idx = int(len(X_seq) * 0.8)
X_train_seq = X_seq[:split_idx]
y_train_class = prepared_data['y_train'][:split_idx]
y_train_temp = y_seq[:split_idx, 0]
y_train_hum = y_seq[:split_idx, 1]

# Train model
history = model.train(
    X_train_seq,
    y_train_class,
    y_train_temp,
    y_train_hum,
    epochs=100,
    batch_size=32
)

In [None]:
# Plot training metrics
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))

# Classification accuracy
ax1.plot(history.history['classification_accuracy'], label='Training')
ax1.plot(history.history['val_classification_accuracy'], label='Validation')
ax1.set_title('Classification Accuracy')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Accuracy')
ax1.legend()

# Classification loss
ax2.plot(history.history['classification_loss'], label='Training')
ax2.plot(history.history['val_classification_loss'], label='Validation')
ax2.set_title('Classification Loss')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Loss')
ax2.legend()

# Temperature MSE
ax3.plot(history.history['temperature_mse'], label='Training')
ax3.plot(history.history['val_temperature_mse'], label='Validation')
ax3.set_title('Temperature Prediction MSE')
ax3.set_xlabel('Epoch')
ax3.set_ylabel('MSE')
ax3.legend()

# Humidity MSE
ax4.plot(history.history['humidity_mse'], label='Training')
ax4.plot(history.history['val_humidity_mse'], label='Validation')
ax4.set_title('Humidity Prediction MSE')
ax4.set_xlabel('Epoch')
ax4.set_ylabel('MSE')
ax4.legend()

plt.tight_layout()
plt.show()

In [None]:
# Apply model optimization
print("Applying pruning...")
pruned_model = apply_pruning(model.model, {'sparsity': 0.5})

print("\nApplying weight clipping...")
optimized_model = apply_weight_clipping(pruned_model)

# Save optimized model
optimized_model.save('models/optimized_hybrid_model.h5')
print('\nOptimized model saved successfully!')

# Compare parameters
original_params = model.model.count_params()
optimized_params = optimized_model.count_params()
reduction = (original_params - optimized_params) / original_params * 100

print(f"\nParameter reduction: {reduction:.2f}%")
print(f"Original parameters: {original_params:,}")
print(f"Optimized parameters: {optimized_params:,}")