In [1]:
# -*- coding: utf-8 -*-
import os
import warnings
from typing import Dict, List, Tuple, Optional

# warnings.filterwarnings('ignore')
os.environ['KERAS_BACKEND'] = 'tensorflow'

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import keras
import tensorflow as tf

import bayesflow as bf

from hmmlearn import hmm
from hmmlearn.hmm import CategoricalHMM

from sklearn.preprocessing import LabelEncoder

current_backend = tf.keras.backend.backend()
print(f"tf.keras is using the '{current_backend}' backend.")

2025-07-13 15:12:47.229537: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2025-07-13 15:12:47.229570: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-07-13 15:12:47.229577: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
I0000 00:00:1752412367.229588 6598229 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1752412367.229615 6598229 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
INFO:bayesflow:Using backend 'tensorflow'


tf.keras is using the 'tensorflow' backend.


In [2]:
# HMM PARAMETERS FROM TASK DESCRIPTION

# 20 amino acids in standard order
AMINO_ACIDS = ['A', 'R', 'N', 'D', 'C', 'E', 'Q', 'G', 'H', 'I', 
               'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']

# Emission probabilities from task tables
# Alpha-helix state (state 0)
EMISSION_ALPHA = [0.12, 0.06, 0.03, 0.05, 0.01, 0.09, 0.05, 0.04, 0.02, 0.07,
                  0.12, 0.06, 0.03, 0.04, 0.02, 0.05, 0.04, 0.01, 0.03, 0.06]

# Other state (state 1) 
EMISSION_OTHER = [0.06, 0.05, 0.05, 0.06, 0.02, 0.05, 0.03, 0.09, 0.03, 0.05,
                  0.08, 0.06, 0.02, 0.04, 0.06, 0.07, 0.06, 0.01, 0.04, 0.07]

# Transition probabilities from task description
# [alpha->alpha, alpha->other]
TRANS_FROM_ALPHA = [0.90, 0.10]
# [other->alpha, other->other]  
TRANS_FROM_OTHER = [0.05, 0.95]

# Initial state probabilities (always starts in "other" state)
INITIAL_PROBS = [0.0, 1.0]  # [alpha-helix, other]

# Validation
print("PARAMETER VALIDATION:")
print(f"Amino acids: {len(AMINO_ACIDS)} types")
print(f"Alpha emission sum: {sum(EMISSION_ALPHA):.3f}")
print(f"Other emission sum: {sum(EMISSION_OTHER):.3f}")
print(f"Alpha transitions sum: {sum(TRANS_FROM_ALPHA):.3f}")
print(f"Other transitions sum: {sum(TRANS_FROM_OTHER):.3f}")
print(f"Initial probs sum: {sum(INITIAL_PROBS):.3f}")
print("\n✓ All probabilities are valid!")

PARAMETER VALIDATION:
Amino acids: 20 types
Alpha emission sum: 1.000
Other emission sum: 1.000
Alpha transitions sum: 1.000
Other transitions sum: 1.000
Initial probs sum: 1.000

✓ All probabilities are valid!


In [3]:
# FIXED HMM MODEL CREATION

def create_fixed_hmm():
    """
    Create HMM with fixed parameters from task description.
    
    States: 0=alpha-helix, 1=other
    Features: 20 amino acids (0-19 indices)
    
    Returns:
        CategoricalHMM with fixed empirical parameters
    """
    # Create model with fixed parameters (no learning)
    model = hmm.CategoricalHMM(
        n_components=2,        # 2 states: alpha-helix, other
        n_features=20,         # 20 amino acids
        params="",             # Don't update any parameters
        init_params="",        # Don't initialize any parameters
        algorithm="viterbi",   # Use Viterbi algorithm for decoding
        verbose=True
    )
    
    # Set fixed parameters from task description
    model.startprob_ = np.array(INITIAL_PROBS)
    model.transmat_ = np.array([TRANS_FROM_ALPHA, TRANS_FROM_OTHER])
    model.emissionprob_ = np.array([EMISSION_ALPHA, EMISSION_OTHER])
    
    return model

# Test HMM creation
print("TESTING HMM CREATION:\n")
hmm_model = create_fixed_hmm()

print(f"States: {hmm_model.n_components}")
print(f"Features: {hmm_model.n_features}")
print(f"Start probabilities: {hmm_model.startprob_}")
print(f"Transition matrix shape: {hmm_model.transmat_.shape}")
print(f"Emission matrix shape: {hmm_model.emissionprob_.shape}")

print("\nTransition probabilities:")
print("From alpha-helix:", hmm_model.transmat_[0])
print("From other:     ", hmm_model.transmat_[1])

print("\nEmission probabilities (first 5 amino acids):")
print("Alpha-helix:", hmm_model.emissionprob_[0][:5])
print("Other:      ", hmm_model.emissionprob_[1][:5])
print("\n✓ HMM model created successfully!")

TESTING HMM CREATION:

States: 2
Features: 20
Start probabilities: [0. 1.]
Transition matrix shape: (2, 2)
Emission matrix shape: (2, 20)

Transition probabilities:
From alpha-helix: [0.9 0.1]
From other:      [0.05 0.95]

Emission probabilities (first 5 amino acids):
Alpha-helix: [0.12 0.06 0.03 0.05 0.01]
Other:       [0.06 0.05 0.05 0.06 0.02]

✓ HMM model created successfully!


In [4]:
# HMM DATA GENERATION AND SIMULATOR FUNCTIONS

def generate_amino_acid_sequence(n_samples=50, random_state=None):
    """
    Generate amino acid sequences from the fixed HMM.
    
    Args:
        n_samples: Number of amino acids to generate
        random_state: Random state for reproducibility
        
    Returns:
        dict with 'amino_acids', 'true_states', and 'state_probs'
    """
    # Create the fixed HMM model
    model = create_fixed_hmm()
    
    # Generate sequence from HMM
    X, Z = model.sample(n_samples, random_state=random_state)
    
    # X is shape (n_samples, 1) - amino acid indices
    # Z is shape (n_samples,) - true hidden states
    amino_acids = X.flatten()  # Convert to 1D array of amino acid indices
    
    # Get state membership probabilities using Forward-Backward algorithm
    # Need to reshape X for predict_proba (expects (n_samples, 1))
    state_probs = model.predict_proba(X)  # Shape: (n_samples, n_states)
    
    return {
        'amino_acids': amino_acids,       # Shape: (n_samples,) - amino acid indices (0-19)
        'true_states': Z,                 # Shape: (n_samples,) - true hidden states (0=alpha, 1=other) 
        'state_probs': state_probs        # Shape: (n_samples, 2) - state membership probabilities
    }

# Test the data generation
print("TESTING HMM DATA GENERATION:\n")
test_data = generate_amino_acid_sequence(n_samples=20, random_state=42)

print(f"Amino acids shape: {test_data['amino_acids'].shape}")
print(f"True states shape: {test_data['true_states'].shape}")
print(f"State probabilities shape: {test_data['state_probs'].shape}")

print(f"\nFirst 10 amino acids (indices): {test_data['amino_acids'][:10]}")
print(f"First 10 true states: {test_data['true_states'][:10]}")
print(f"First 5 state probabilities:\n{test_data['state_probs'][:5]}")

# Verify state probabilities sum to 1
print(f"\nState probabilities sum check: {np.allclose(test_data['state_probs'].sum(axis=1), 1.0)}")

# Convert amino acid indices to actual amino acid letters for readability
amino_acid_letters = [AMINO_ACIDS[idx] for idx in test_data['amino_acids'][:10]]
print(f"First 10 amino acids (letters): {amino_acid_letters}")
print("\n✓ HMM data generation working correctly!")

TESTING HMM DATA GENERATION:

Amino acids shape: (20,)
True states shape: (20,)
State probabilities shape: (20, 2)

First 10 amino acids (indices): [19 11  2 16 14 19  3  2  9  5]
First 10 true states: [1 1 1 1 1 0 0 0 0 0]
First 5 state probabilities:
[[0.         1.        ]
 [0.01768884 0.98231116]
 [0.0253218  0.9746782 ]
 [0.03656372 0.96343628]
 [0.05153765 0.94846235]]

State probabilities sum check: True
First 10 amino acids (letters): ['V', 'K', 'N', 'T', 'P', 'V', 'D', 'N', 'I', 'E']

✓ HMM data generation working correctly!


In [5]:
# BAYESFLOW SIMULATOR IMPLEMENTATION

def hmm_simulator_function(batch_shape, sequence_length=50, **kwargs):
    """
    Simulator function for BayesFlow that generates HMM data.
    
    This function will be wrapped by BayesFlow's LambdaSimulator.
    
    Args:
        batch_shape: Shape of the batch to generate (from BayesFlow)
        sequence_length: Length of amino acid sequences to generate
        **kwargs: Additional keyword arguments
        
    Returns:
        dict: Dictionary with simulation outputs for BayesFlow
    """
    # Handle both int and tuple batch_shape
    if isinstance(batch_shape, int):
        batch_size = batch_shape
    else:
        batch_size = batch_shape[0] if len(batch_shape) > 0 else 1
    
    # Generate multiple sequences
    amino_acids_batch = []
    true_states_batch = []
    state_probs_batch = []
    
    for i in range(batch_size):
        # Generate one sequence with different random state for each
        data = generate_amino_acid_sequence(
            n_samples=sequence_length, 
            random_state=np.random.randint(0, 10000)
        )
        
        amino_acids_batch.append(data['amino_acids'])
        true_states_batch.append(data['true_states'])
        state_probs_batch.append(data['state_probs'])
    
    # Stack into batch format
    return {
        'amino_acids': np.array(amino_acids_batch),      # Shape: (batch_size, sequence_length)
        'true_states': np.array(true_states_batch),      # Shape: (batch_size, sequence_length)
        'state_probs': np.array(state_probs_batch),      # Shape: (batch_size, sequence_length, 2)
    }

# Create BayesFlow simulator
print("CREATING BAYESFLOW SIMULATOR:\n")
hmm_simulator = bf.simulators.LambdaSimulator(
    sample_fn=hmm_simulator_function,
    is_batched=True  # Our function handles batching internally
)

print("✓ BayesFlow LambdaSimulator created successfully!")

# Test the BayesFlow simulator
print("\nTESTING BAYESFLOW SIMULATOR:")
batch_size = 3
sequence_length = 15

# Sample from the simulator
simulation_data = hmm_simulator.sample(
    batch_shape=(batch_size,), 
    sequence_length=sequence_length
)

print(f"Simulation data keys: {list(simulation_data.keys())}")
print(f"Amino acids batch shape: {simulation_data['amino_acids'].shape}")
print(f"True states batch shape: {simulation_data['true_states'].shape}")
print(f"State probabilities batch shape: {simulation_data['state_probs'].shape}")

# Show multiple sequences
num_seq = 2
print(f"\nFirst {num_seq} sequences:")
for i in range(num_seq):
    amino_acids = simulation_data['amino_acids'][i]
    true_states = simulation_data['true_states'][i]
    state_probs = simulation_data['state_probs'][i]
    
    print(f"\nSequence {i}:")
    print(f"Amino acids: {amino_acids}")
    print(f"True states: {true_states}")
    print(f"State probabilities shape: {state_probs.shape}")
    print(f"State probabilities sum check: {np.allclose(state_probs.sum(axis=1), 1.0)}")
    print(f"Sequnce length: {len(amino_acids)}")

# Convert first sequence to amino acid letters
example_letters = [AMINO_ACIDS[idx] for idx in simulation_data['amino_acids'][0]]
print(f"Amino acid letters: {example_letters}")

print("\n✓ BayesFlow simulator working correctly!")

CREATING BAYESFLOW SIMULATOR:

✓ BayesFlow LambdaSimulator created successfully!

TESTING BAYESFLOW SIMULATOR:
Simulation data keys: ['amino_acids', 'true_states', 'state_probs']
Amino acids batch shape: (3, 15)
True states batch shape: (3, 15)
State probabilities batch shape: (3, 15, 2)

First 2 sequences:

Sequence 0:
Amino acids: [ 7 16 11 15 18 10 19 14  5 17 15 13 14 15  3]
True states: [1 1 1 1 1 1 1 1 1 1 1 1 0 0 0]
State probabilities shape: (15, 2)
State probabilities sum check: True
Sequnce length: 15

Sequence 1:
Amino acids: [10 19 16  7 11  0  4  1 10 19 19  5 14  3  7]
True states: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
State probabilities shape: (15, 2)
State probabilities sum check: True
Sequnce length: 15
Amino acid letters: ['G', 'T', 'K', 'S', 'Y', 'L', 'V', 'P', 'E', 'W', 'S', 'F', 'P', 'S', 'D']

✓ BayesFlow simulator working correctly!


In [6]:
# CUSTOM PROTEIN SUMMARY NETWORK

class ProteinSummaryNetwork(bf.networks.SummaryNetwork):
    """
    Custom summary network for protein amino acid sequences.
    
    This network is specifically designed for the protein secondary structure task:
    - Embeds amino acid indices into dense representations
    - Uses bidirectional LSTM to capture sequential dependencies
    - Applies attention mechanism to focus on important positions
    - Outputs summary statistics for the entire sequence
    """
    
    def __init__(self, 
                 vocab_size=20,              # Number of amino acids
                 embedding_dim=32,           # Amino acid embedding dimension
                 lstm_units=64,              # LSTM hidden units
                 attention_dim=32,           # Attention mechanism dimension
                 summary_dim=64,             # Output summary dimension
                 dropout_rate=0.1,           # Dropout rate
                 **kwargs):
        super().__init__(**kwargs)
        
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.lstm_units = lstm_units
        self.attention_dim = attention_dim
        self.summary_dim = summary_dim
        self.dropout_rate = dropout_rate
        
        # Amino acid embedding layer
        self.embedding = tf.keras.layers.Embedding(
            input_dim=vocab_size,
            output_dim=embedding_dim,
            mask_zero=False,  # Don't mask zero values as amino acid 'A' has index 0
            name='amino_acid_embedding'
        )
        
        # Bidirectional LSTM for sequence processing
        self.lstm = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(
                lstm_units,
                return_sequences=True,  # Return full sequence for attention
                dropout=dropout_rate,
                recurrent_dropout=dropout_rate,
                name='sequence_lstm'
            ),
            name='bidirectional_lstm'
        )
        
        # Attention mechanism layers
        self.attention_dense = tf.keras.layers.Dense(
            attention_dim, 
            activation='tanh',
            name='attention_dense'
        )
        self.attention_weights = tf.keras.layers.Dense(
            1, 
            activation=None,  # Don't use softmax here, apply it later
            name='attention_weights'
        )
        
        # Final summary layers
        self.dropout = tf.keras.layers.Dropout(dropout_rate)
        self.summary_dense1 = tf.keras.layers.Dense(
            summary_dim * 2,
            activation='silu',
            name='summary_dense1'
        )
        self.summary_dense2 = tf.keras.layers.Dense(
            summary_dim,
            activation='silu', 
            name='summary_dense2'
        )
        
    def call(self, x, training=False, **kwargs):
        """
        Forward pass of the protein summary network.
        
        Args:
            x: Input tensor of shape (batch_size, sequence_length, 1) containing amino acid indices
            training: Whether in training mode
            
        Returns:
            Summary tensor of shape (batch_size, summary_dim)
        """
        # Remove the last dimension if present: (batch_size, seq_len, 1) -> (batch_size, seq_len)
        if x.shape[-1] == 1:
            x = tf.squeeze(x, axis=-1)
            
        # Convert to integer indices for embedding
        x = tf.cast(x, tf.int32)
        
        # Embed amino acid indices: (batch_size, seq_len) -> (batch_size, seq_len, embedding_dim)
        embedded = self.embedding(x)
        
        # Process with bidirectional LSTM: (batch_size, seq_len, embedding_dim) -> (batch_size, seq_len, 2*lstm_units)
        lstm_output = self.lstm(embedded, training=training)
        
        # Apply attention mechanism
        # Compute attention scores: (batch_size, seq_len, 2*lstm_units) -> (batch_size, seq_len, attention_dim)
        attention_scores = self.attention_dense(lstm_output)
        
        # Compute attention weights: (batch_size, seq_len, attention_dim) -> (batch_size, seq_len, 1)
        attention_logits = self.attention_weights(attention_scores)
        
        # Apply softmax along the sequence dimension to get proper attention weights
        attention_weights = tf.nn.softmax(attention_logits, axis=1)  # Softmax over sequence dimension
        
        # Apply attention: weighted sum of LSTM outputs
        # (batch_size, seq_len, 2*lstm_units) * (batch_size, seq_len, 1) -> (batch_size, 2*lstm_units)
        attended_output = tf.reduce_sum(lstm_output * attention_weights, axis=1)
        
        # Apply dropout
        attended_output = self.dropout(attended_output, training=training)
        
        # Generate final summary through dense layers
        summary = self.summary_dense1(attended_output)
        summary = self.dropout(summary, training=training)
        summary = self.summary_dense2(summary)
        
        return summary
    
    def get_config(self):
        """Return the configuration of the layer."""
        config = super().get_config()
        config.update({
            'vocab_size': self.vocab_size,
            'embedding_dim': self.embedding_dim,
            'lstm_units': self.lstm_units,
            'attention_dim': self.attention_dim,
            'summary_dim': self.summary_dim,
            'dropout_rate': self.dropout_rate,
        })
        return config
    
    @classmethod
    def from_config(cls, config):
        """Create layer from configuration."""
        return cls(**config)

print("✓ Custom ProteinSummaryNetwork class defined")

✓ Custom ProteinSummaryNetwork class defined


In [7]:
# CREATE WORKFLOW FOR BAYESFLOW

class FlattenTransform(bf.adapters.transforms.Transform):
    """Custom transform to flatten inference variables from (batch, seq_len, 2) to (batch, seq_len*2)"""
    
    def __init__(self):
        super().__init__()
    
    def forward(self, x, **kwargs):
        # Flatten the last two dimensions: (batch, seq_len, 2) -> (batch, seq_len*2)
        return x.reshape(x.shape[0], -1).astype(np.float32)
    
    def inverse(self, x, **kwargs):
        # For inverse, we would need to know the original shape
        # This is not needed for our use case but required by the interface
        raise NotImplementedError("Inverse transform not implemented for FlattenTransform")

def create_workflow():
    """
    Create BayesFlow workflow with custom protein summary network
    and properly configured inference network.
    """
    print("Creating BayesFlow workflow...\n")
    
    # 1. USE EXISTING SIMULATOR
    simulator = hmm_simulator
    print("✓ Using existing HMM simulator")
    
    # 2. CUSTOM SUMMARY NETWORK
    protein_summary_net = ProteinSummaryNetwork(
        vocab_size=20,
        embedding_dim=32,
        lstm_units=64,
        attention_dim=32,
        summary_dim=64,
        name='ProteinSummaryNetwork'
    )
    print("✓ Custom summary network created")
    
    # 3. PROPERLY CONFIGURED INFERENCE NETWORK
    inference_net = bf.networks.FlowMatching(
        subnet="mlp",
        base_distribution="normal",
    )
    print("✓ Properly configured FlowMatching created")
    print(f"  - Subnet: MLP")
    print(f"  - Base distribution: Normal")
    
    # inference_net = bf.networks.CouplingFlow(
    #     subnet='mlp',           # Use MLP subnets
    #     depth=4,               # Number of coupling layers
    #     transform='affine',    # Affine coupling transforms  
    #     permutation='random',  # Random permutations between layers
    #     use_actnorm=True,      # Use activation normalization
    #     base_distribution='normal',  # Normal base distribution
    #     name='ProteinInferenceNetwork'
    # )
    # print("✓ Properly configured CouplingFlow created")
    # print(f"  - Depth: 8 coupling layers")
    # print(f"  - Transform: affine")
    # print(f"  - Base distribution: normal")
    
    # 4. ADAPTER (same as before)
    adapter_transforms = [
        bf.adapters.transforms.Rename(from_key='amino_acids', to_key='summary_variables'),
        bf.adapters.transforms.Rename(from_key='state_probs', to_key='inference_variables'),
        bf.adapters.transforms.Drop(keys=['true_states']),
        bf.adapters.transforms.MapTransform({
            'summary_variables': bf.adapters.transforms.ConvertDType(
                from_dtype='int64', to_dtype='float32'
            ),
            'inference_variables': bf.adapters.transforms.ConvertDType(
                from_dtype='float64', to_dtype='float32'
            ),
        }),
        bf.adapters.transforms.MapTransform({
            'inference_variables': FlattenTransform(),
        }),
    ]
    
    adapter = bf.Adapter(transforms=adapter_transforms)
    print("✓ Adapter with transforms created")
    
    # 5. CREATE WORKFLOW WITH PROPER PARAMETERS
    workflow = bf.BasicWorkflow(
        simulator=simulator,
        adapter=adapter,
        inference_network=inference_net,
        summary_network=protein_summary_net,
        initial_learning_rate=0.001,  # Learning rate
        inference_variables=['inference_variables'],  # Specify which variables to infer
        summary_variables=['summary_variables']       # Specify summary variables
    )
    print("✓ BayesFlow workflow created with proper configuration")
    
    return workflow

In [8]:
# TRAINING FUNCTION FOR CUSTOM PROTEIN WORKFLOW

def train_protein_workflow(
    workflow,
    batch_size=16,
    epochs=50,
    print_every=10,
    save_path=None
):
    """
    Train the protein BayesFlow workflow with our custom summary network.
    
    Args:
        workflow: The BayesFlow workflow to train
        batch_size: Batch size for training
        epochs: Number of training epochs
        print_every: Print progress every N epochs
        save_path: Path to save the trained model (optional)
    
    Returns:
        training_history: Dictionary with training metrics
    """
    
    print(f"Starting training for {epochs} epochs with batch size {batch_size}")
    print("=" * 60)
    
    training_history = {
        'epoch': [],
        'loss': [],
        'validation_loss': []
    }
    
    try:
        # Configure the workflow for training
        config = {
            'epochs': epochs,
            'batch_size': batch_size,
            'validation_sims': 1000,  # Generate validation data
            'checkpoint_interval': max(1, epochs // 10),  # Save checkpoints
        }
        
        print("Training configuration:")
        for key, value in config.items():
            print(f"  {key}: {value}")
        print()
        
        # Start online training
        print("🚀 Starting online training...")
        training_info = workflow.fit_online(
            num_batches_per_epoch=100,
            validation_data=20,
            epochs=config['epochs'],
            batch_size=config['batch_size'],
            print_every=print_every
        )
        
        print("✅ Training completed successfully!")
        
        # Extract training history if available
        if hasattr(training_info, 'history') and training_info.history:
            history = training_info.history
            training_history['loss'] = history.get('loss', [])
            training_history['validation_loss'] = history.get('val_loss', [])
            training_history['epoch'] = list(range(1, len(training_history['loss']) + 1))
        
        # Save the model if path provided
        if save_path:
            print(f"💾 Saving model to {save_path}")
            workflow.save_model(save_path)
            
        return training_history
        
    except Exception as e:
        print(f"❌ Training failed with error: {e}")
        import traceback
        traceback.print_exc()
        return training_history

print("✓ Training function defined")

✓ Training function defined


In [None]:
configured_workflow = create_workflow()

history = train_protein_workflow(
    workflow=configured_workflow,
    batch_size=32,
    epochs=15,
    print_every=1
)

INFO:bayesflow:Fitting on dataset instance of OnlineDataset.
INFO:bayesflow:Building on a test batch.


Creating BayesFlow workflow...

✓ Using existing HMM simulator
✓ Custom summary network created
✓ Properly configured FlowMatching created
  - Subnet: MLP
  - Base distribution: Normal
✓ Adapter with transforms created
✓ BayesFlow workflow created with proper configuration
Starting training for 15 epochs with batch size 32
Training configuration:
  epochs: 15
  batch_size: 32
  validation_sims: 1000
  checkpoint_interval: 1

🚀 Starting online training...
Epoch 1/15


2025-07-13 15:12:50.977027: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m981s[0m 10s/step - loss: 4.8482 - val_loss: 1.3974
Epoch 2/15
[1m 21/100[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m12:58[0m 10s/step - loss: 1.6816

✓ Model analysis and optimization functions defined


The history saving thread hit an unexpected error (UnicodeEncodeError('utf-8', '# SIMPLIFIED PARAMETER ANALYSIS\n\ndef analyze_model_parameters_simple(workflow):\n    """\n    Simplified parameter analysis that doesn\'t require building the full model.\n    """\n    print("🔍 ANALYZING MODEL PARAMETERS")\n    print("=" * 50)\n    \n    # Get the approximator components\n    approximator = workflow.approximator\n    summary_network = approximator.summary_network\n    inference_network = approximator.inference_network\n    \n    # Build summary network only (it\'s easier to analyze)\n    print("Building summary network...")\n    dummy_summary_input = tf.zeros((1, 50, 1))  \n    _ = summary_network(dummy_summary_input)\n    summary_params = summary_network.count_params()\n    \n    # Estimate inference network parameters based on configuration\n    print("Estimating inference network parameters...")\n    \n    # CouplingFlow with 8 layers, each layer has MLP subnets [128, 128]\n    # Input

The history saving thread hit an unexpected error (UnicodeEncodeError('utf-8', '# SIMPLIFIED PARAMETER ANALYSIS\n\ndef analyze_model_parameters_simple(workflow):\n    """\n    Simplified parameter analysis that doesn\'t require building the full model.\n    """\n    print("🔍 ANALYZING MODEL PARAMETERS")\n    print("=" * 50)\n    \n    # Get the approximator components\n    approximator = workflow.approximator\n    summary_network = approximator.summary_network\n    inference_network = approximator.inference_network\n    \n    # Build summary network only (it\'s easier to analyze)\n    print("Building summary network...")\n    dummy_summary_input = tf.zeros((1, 50, 1))  \n    _ = summary_network(dummy_summary_input)\n    summary_params = summary_network.count_params()\n    \n    # Estimate inference network parameters based on configuration\n    print("Estimating inference network parameters...")\n    \n    # CouplingFlow with 8 layers, each layer has MLP subnets [128, 128]\n    # Input

UnicodeEncodeError: 'utf-8' codec can't encode character '\udcca' in position 13: surrogates not allowed

PARAMETER ANALYSIS
Summary Network: 79,233 parameters
Inference Network: ~471,840 parameters (estimated)
TOTAL: ~551,073 parameters

WHY TRAINING IS SLOW:
- Large model: ~600K+ parameters
- Complex coupling flows: 8 deep layers
- High dimensional output: 100 variables
- Online data generation: New data each batch

SOLUTIONS:
1. Reduce batch size: 32 -> 8-16
2. Fewer epochs: 15 -> 5-10
3. Fewer batches per epoch: 100 -> 25-50
4. Use lightweight model (next cells)

TIME ESTIMATES:
Current model: 3-7 minutes per epoch
15 epochs: 45-105 minutes total
Lightweight model: 30-60 seconds per epoch
