In [1]:
# -*- coding: utf-8 -*-
import os
os.environ['KERAS_BACKEND'] = 'tensorflow'

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import keras
import tensorflow as tf
import bayesflow as bf

current_backend = tf.keras.backend.backend()
print(f"tf.keras is using the '{current_backend}' backend.")

2025-07-09 01:47:29.957019: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2025-07-09 01:47:29.957055: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-07-09 01:47:29.957059: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
I0000 00:00:1752018449.957075 5700552 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1752018449.957096 5700552 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
INFO:bayesflow:Using backend 'tensorflow'


tf.keras is using the 'tensorflow' backend.


In [2]:
# Trial code to generate a sequence using a Hidden Markov Model (HMM)
# This code uses the hmmlearn library to create a categorical HMM with fixed parameters.
# It generates a sequence of observations and computes the posterior probabilities of the states.

import numpy as np
from hmmlearn import hmm

# 1) Initialize model
model = hmm.CategoricalHMM(n_components=2, algorithm='viterbi',
                           n_iter=0, init_params='')  # No EM

# 2) Fix parameters:
model.startprob_ = np.array([1.0, 0.0])        # always begin in “other”
model.transmat_  = np.array([[0.95, 0.05],     # other → other/helix
                             [0.10, 0.90]])    # helix → other/helix
model.emissionprob_ = np.array([
        [0.12, 0.06, 0.03, 0.05, 0.01, 0.09, 0.05, 0.04, 0.02, 0.07, 0.12, 0.06, 0.03, 0.04, 0.02, 0.05, 0.04, 0.01, 0.03, 0.06],
        [0.06, 0.05, 0.05, 0.06, 0.02, 0.05, 0.03, 0.09, 0.03, 0.05, 0.08, 0.06, 0.02, 0.04, 0.06, 0.07, 0.06, 0.01, 0.04, 0.07]
    ])          # 2×20 table from Task 1

# 3) Generate a sequence + state probs:
X, Z = model.sample(n_samples=200)
post = model.predict_proba(X)

print("X shape:", X.shape, "Z shape:", Z.shape)
print("Post shape:", post.shape)
print("Generated sequence (X):", X[:10])
print("State probabilities (post):", post[:10])

X shape: (200, 1) Z shape: (200,)
Post shape: (200, 2)
Generated sequence (X): [[10]
 [ 1]
 [13]
 [14]
 [10]
 [ 1]
 [ 4]
 [ 3]
 [ 0]
 [11]]
State probabilities (post): [[1.         0.        ]
 [0.93485169 0.06514831]
 [0.8613024  0.1386976 ]
 [0.79142805 0.20857195]
 [0.80233001 0.19766999]
 [0.79194286 0.20805714]
 [0.77323791 0.22676209]
 [0.80297444 0.19702556]
 [0.84907351 0.15092649]
 [0.85705359 0.14294641]]


In [3]:
# Step 4: Implementing the HMM Simulator with hmmlearn.CategoricalHMM

import numpy as np
from hmmlearn import hmm

# 4.1 Define the fixed emission & transition matrices from Task 1
n_states = 2           # 0 = "other", 1 = "alpha‐helix"
n_amino_acids = 20     # 20 standard residues

# Transition: rows sum to 1; row[0] = other→(other, helix), row[1] = helix→(other, helix)
transmat = np.array([
    [0.95, 0.05],   # other → other / helix
    [0.10, 0.90],   # helix → other / helix
])

# Start in "other" with probability 1.0
startprob = np.array([1.0, 0.0])

# Emission probabilities: shape (n_states, n_amino_acids)
# Row 0: "other"; Row 1: "alpha‐helix"
emissionprob = np.array([
    [0.06, 0.05, 0.05, 0.06, 0.02, 0.05, 0.03, 0.09, 0.03, 0.05,
     0.08, 0.06, 0.02, 0.04, 0.06, 0.07, 0.06, 0.01, 0.04, 0.07],
    [0.12, 0.06, 0.03, 0.05, 0.01, 0.09, 0.05, 0.04, 0.02, 0.07,
     0.12, 0.06, 0.03, 0.04, 0.02, 0.05, 0.04, 0.01, 0.03, 0.06],
])

# 4.2 Build the CategoricalHMM with parameters frozen (no EM training)
model = hmm.CategoricalHMM(
    n_components=n_states,
    algorithm='viterbi', # use forward–backward ("viterbi") for predict_proba
    n_iter=0,            # skip EM
    init_params=''       # do not re-initialize any parameters
)

model.startprob_    = startprob
model.transmat_     = transmat
model.emissionprob_ = emissionprob

# 4.3 Simulator function
def simulate_batch(sequence_length, batch_size, random_state=None):
    """
    Simulate `batch_size` independent HMM sequences of length `sequence_length`.
    Returns:
      X_batch: np.ndarray, shape (batch_size, sequence_length), dtype=int
               (integer‐encoded amino‐acid observations 0..19)
      P_batch: np.ndarray, shape (batch_size, sequence_length, n_states),
               posterior state-membership probabilities
    """
    X_batch = np.zeros((batch_size, sequence_length), dtype=int)
    P_batch = np.zeros((batch_size, sequence_length, n_states))

    # Option A: call .sample for each sequence independently
    for i in range(batch_size):
        # sample returns (observations, latent_states)
        X_i, Z_i = model.sample(n_samples=sequence_length, random_state=random_state)
        # X_i is (sequence_length, 1) of ints in [0,19]; squeeze to 1D
        X_i = X_i.squeeze().astype(int)

        # compute posterior probabilities via forward–backward
        post_i = model.predict_proba(X_i.reshape(-1, 1))

        X_batch[i] = X_i
        P_batch[i] = post_i

    return X_batch, P_batch

# Example usage
if __name__ == "__main__":
    seq_len   = 200
    batch_sz  = 4
    X, P = simulate_batch(seq_len, batch_sz, random_state=42)

    print("X.shape:", X.shape)       # (4, 200)
    print("P.shape:", P.shape)       # (4, 200, 2)
    print("First sequence obs:", X[0, :10])
    print("First sequence posteriors:", P[0, :10, :])

X.shape: (4, 200)
P.shape: (4, 200, 2)
First sequence obs: [19 11  2 16 14 19  3  3 10  6]
First sequence posteriors: [[1.         0.        ]
 [0.98569035 0.01430965]
 [0.98170387 0.01829613]
 [0.97700983 0.02299017]
 [0.97265727 0.02734273]
 [0.94451754 0.05548246]
 [0.91752723 0.08247277]
 [0.88835249 0.11164751]
 [0.85465933 0.14534067]
 [0.849613   0.150387  ]]


In [4]:
import numpy as np
from bayesflow.simulators import LambdaSimulator
from bayesflow.approximators import ContinuousApproximator
from bayesflow.adapters import Adapter
from bayesflow.datasets import OnlineDataset

# 4) Define a sample function for BayesFlow with fixed sequence length
SEQUENCE_LENGTH = 200  # Define this as a constant

def bayesflow_sample_fn(batch_shape, random_state=None):
    """
    batch_shape : tuple[int,...], e.g. (batch_size,)
    random_state : optional RNG seed
    
    Returns a dict mapping variable names → numpy arrays:
      - "observations": int arrays of shape (batch_size, sequence_length)
      - "states":       int arrays of shape (batch_size, sequence_length)
    """
    # Determine batch size
    batch_size = int(np.prod(batch_shape))
    
    # Simulate with fixed sequence length
    X_batch, P_batch = simulate_batch(
        sequence_length=SEQUENCE_LENGTH,
        batch_size=batch_size,
        random_state=random_state
    )
    
    # CRITICAL FIX: Ensure observations are integers in the correct range
    # Convert to int32 and ensure values are in [0, 19] range
    X_batch = X_batch.astype(np.int32)
    P_batch = P_batch.astype(np.float32)
    
    # Debug: Check the data types and ranges
    # print(f"X_batch dtype: {X_batch.dtype}")
    # print(f"X_batch shape: {X_batch.shape}")
    # print(f"X_batch min: {X_batch.min()}, max: {X_batch.max()}")
    # print(f"P_batch dtype: {P_batch.dtype}")
    # print(f"P_batch shape: {P_batch.shape}")
    
    # Ensure observations are in valid range [0, 19]
    if X_batch.min() < 0 or X_batch.max() >= 20:
        raise ValueError(f"Observations must be in range [0, 19], got [{X_batch.min()}, {X_batch.max()}]")
    
    return {
        "observations": X_batch,   # integer codes
        "posteriors":   P_batch    # continuous [0,1] probabilities
    }

# 5) Wrap in LambdaSimulator
simulator = LambdaSimulator(
    sample_fn=bayesflow_sample_fn,
    is_batched=True        # our sample_fn handles the full batch in one call
)

In [5]:
# 1) Build the base adapter: map your raw simulator output
#    - "posteriors" → "inference_variables" 
#    - "observations" → "summary_variables"
adapter = ContinuousApproximator.build_adapter(
    inference_variables=["posteriors"],
    summary_variables=["observations"]
)

# 2) CRITICAL FIX: Convert summary_variables back to integers and squeeze extra dimension
# The base adapter converts int32 to float32 and adds dimension, we need to reverse this
from bayesflow.adapters.transforms import Transform
from bayesflow.adapters.transforms.map_transform import MapTransform
import numpy as np

class IntegerSqueezeTransform(Transform):
    """Custom transform to convert float32 back to int32 and squeeze extra dimension"""
    
    def forward(self, data, **kwargs):
        # Convert to int32 and squeeze the last dimension if it's size 1
        if data.ndim == 3 and data.shape[-1] == 1:
            data = data.squeeze(-1)  # Remove last dimension
        return data.astype(np.int32)
    
    def inverse(self, data, **kwargs):
        # Add back the dimension and convert to float32
        return data.astype(np.float32)[..., np.newaxis]
    
class ReshapeTransform(Transform):
    """Custom transform to reshape posteriors from (batch, seq, states) to (batch, seq*states)"""
    
    def forward(self, data, **kwargs):
        batch_size = data.shape[0]
        return data.reshape(batch_size, -1)  # Flatten last two dimensions
    
    def inverse(self, data, **kwargs):
        batch_size = data.shape[0]
        return data.reshape(batch_size, SEQUENCE_LENGTH, n_states)

# Create a MapTransform to apply only to summary_variables
integer_squeeze_map = MapTransform(
    transform_map={"summary_variables": IntegerSqueezeTransform()}
)

reshape_map = MapTransform(
    transform_map={"inference_variables": ReshapeTransform()}
)

# Add the transform to the adapter
adapter = adapter.append(integer_squeeze_map)
adapter = adapter.append(reshape_map)

# 3) One-hot encode the summary inputs (now named "summary_variables")
adapter = adapter.one_hot(
    keys="summary_variables",
    num_classes=20
)

# 4) Convert all floats to float32 for TensorFlow
adapter = adapter.convert_dtype(
    from_dtype="float64",
    to_dtype="float32"
)

# 1) Re-initialize the dataset
dataset = OnlineDataset(
    simulator=simulator,          # your LambdaSimulator
    batch_size=64,                # sequences per batch
    num_batches=1000,             # batches per epoch
    adapter=adapter,              # adapter with integer squeeze fix
    stage="training",             # adapter stage
)

# 2) Test the final working version
print("=== Final working version ===")
batch = dataset[0]

# 3) Inspect the final batch
print("Keys in batch:", batch.keys())  
print("summary_variables shape:   ", batch["summary_variables"].shape)    # → (64, 200, 20)
print("inference_variables shape:", batch["inference_variables"].shape)  # → (64, 200, 2)
print("summary_variables dtype:   ", batch["summary_variables"].dtype)    # → float32
print("inference_variables dtype: ", batch["inference_variables"].dtype)  # → float32

# 4) Verify the one-hot encoding worked correctly
print("\nVerifying one-hot encoding:")
print("summary_variables sum along last axis (should be all 1s):", 
      batch["summary_variables"].sum(axis=-1)[0, :10])  # First 10 timesteps of first sequence
print("inference_variables range:", 
      batch["inference_variables"].min(), "to", batch["inference_variables"].max())

print("\n✅ Success! Your BayesFlow adapter is working correctly.")

=== Final working version ===
Keys in batch: dict_keys(['inference_variables', 'summary_variables'])
summary_variables shape:    (64, 200, 20)
inference_variables shape: (64, 400)
summary_variables dtype:    float32
inference_variables dtype:  float32

Verifying one-hot encoding:
summary_variables sum along last axis (should be all 1s): [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
inference_variables range: 0.0 to 1.0

✅ Success! Your BayesFlow adapter is working correctly.


In [6]:
# Step 7: Define the Summary Network (TimeSeriesNetwork)

import tensorflow as tf
from bayesflow.networks import TimeSeriesNetwork

# 7.1 Instantiate a TimeSeriesNetwork to embed (batch, 200, 20) → (batch, 64)
summary_network = TimeSeriesNetwork(
    summary_dim=64,               # final embedding size per sequence :contentReference[oaicite:0]{index=0}
    filters=[32, 64],             # two Conv1D layers with 32 and 64 filters :contentReference[oaicite:1]{index=1}
    kernel_sizes=[3, 3],          # 3-length kernels for each conv layer :contentReference[oaicite:2]{index=2}
    strides=[1, 1],               # unit strides for convolution :contentReference[oaicite:3]{index=3}
    activation="mish",            # Mish activation in conv layers :contentReference[oaicite:4]{index=4}
    kernel_initializer="glorot_uniform",  # Xavier uniform initialization :contentReference[oaicite:5]{index=5}
    groups=None,                  # no group normalization :contentReference[oaicite:6]{index=6}
    recurrent_type="gru",         # use GRU in the recurrent module :contentReference[oaicite:7]{index=7}
    recurrent_dim=128,            # 128 hidden units in GRU :contentReference[oaicite:8]{index=8}
    bidirectional=True,           # bidirectional recurrence :contentReference[oaicite:9]{index=9}
    dropout=0.05,                 # 5% dropout in the recurrent module :contentReference[oaicite:10]{index=10}
    skip_steps=4                  # skip-connections every 4 timesteps :contentReference[oaicite:11]{index=11}
)

# 7.2 Sanity-check: pass a dummy batch through the summary network
dummy_input = tf.random.uniform((8, 200, 20), dtype=tf.float32)
dummy_summary = summary_network(dummy_input)
print("dummy_summary.shape =", dummy_summary.shape)  # expects (8, 64)

dummy_summary.shape = (8, 64)


In [7]:
# Step 8.1: Define a FlowMatching-based inference network

import tensorflow as tf                                        # standard TensorFlow import :contentReference[oaicite:3]{index=3}
from bayesflow.networks import FlowMatching, MLP               # import FlowMatching :contentReference[oaicite:4]{index=4}

# 1. Instantiate the FlowMatching inference network
inference_network = FlowMatching(
    subnet=MLP(                                                # use a custom MLP subnet :contentReference[oaicite:5]{index=5}
        widths=[128, 128, SEQUENCE_LENGTH * n_states],         # two hidden layers + output layer (200*2=400 dims) :contentReference[oaicite:6]{index=6}
        activation="relu",                                     # ReLU activation instead of default 'mish' :contentReference[oaicite:7]{index=7}
        dropout=0.1                                            # 10% dropout for regularization :contentReference[oaicite:8]{index=8}
    ),
    base_distribution="normal",                                # draw samples from a standard normal :contentReference[oaicite:9]{index=9}
    use_optimal_transport=False,                               # disable optimal transport for speed :contentReference[oaicite:10]{index=10}
    loss_fn="mse",                                             # mean squared error loss :contentReference[oaicite:11]{index=11}
    integrate_kwargs={"method": "euler", "steps": 100},        # override integration settings (default: 100 steps, Euler) :contentReference[oaicite:12]{index=12}
    optimal_transport_kwargs=None                              # leave OT settings as None :contentReference[oaicite:13]{index=13}
)

# 2. Create dummy data for testing
batch_size, summary_dim = 8, 64
positions, state_dim   = 200, 2

dummy_summary = tf.random.uniform(
    (batch_size, summary_dim), dtype=tf.float32
)
dummy_target = tf.random.uniform(
    (batch_size, positions, state_dim), dtype=tf.float32
)

# Flatten the target to shape (batch_size, positions*state_dim)
dummy_xz = tf.reshape(dummy_target, (batch_size, -1))         # → (8, 400)

# 3. Build the network with correct input shapes
inference_network.build(
    xz_shape=dummy_xz.shape,                                  # specify flattened target shape :contentReference[oaicite:14]{index=14}
    conditions_shape=dummy_summary.shape                      # specify summary shape :contentReference[oaicite:15]{index=15}
)

# 4. Compute the velocity field at random times
t = tf.random.uniform((batch_size,), dtype=tf.float32)       # sample t ∼ Uniform(0,1) :contentReference[oaicite:16]{index=16}
velocity = inference_network.velocity(
    xz=dummy_xz,
    time=t,
    conditions=dummy_summary,
    training=False
)
print("Velocity shape:", velocity.shape)                     # Expected: (8, 400)

# 5. Compute training metrics (e.g., loss)
metrics = inference_network.compute_metrics(
    x=dummy_xz,
    conditions=dummy_summary
)
print("Available metrics:", list(metrics.keys()))
print("Loss value:", metrics["loss"])                        # MSE between predicted and actual velocity

Velocity shape: (8, 400)
Available metrics: ['loss']
Loss value: tf.Tensor(4.453058, shape=(), dtype=float32)


In [8]:
# Step 9: Assemble and compile the Amortized Posterior Estimator

import tensorflow as tf
from bayesflow.approximators import ContinuousApproximator

# 9.1 Instantiate the approximator
approximator = ContinuousApproximator(
    adapter=adapter,                                     # data preprocessing pipeline :contentReference[oaicite:5]{index=5}
    inference_network=inference_network,                 # FlowMatching or MLP from Step 8 :contentReference[oaicite:6]{index=6}
    summary_network=summary_network,                     # TimeSeriesNetwork from Step 7 :contentReference[oaicite:7]{index=7}
    standardize="all"                                    # auto‐standardize all inputs :contentReference[oaicite:8]{index=8}
)

# 9.2 Configure optimizer, loss, and metrics
approximator.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3)
)

# 9.3 Train on the simulated dataset from Step 6
print("\n=== Starting training ===")
history = approximator.fit(
    dataset=dataset,
    epochs=2,
    verbose=1
)

print("✅ Training completed successfully!")

# Alternative approach if you need to track specific metrics during training:
# You can access the training history and compute metrics manually
print("\n=== Training metrics ===")
if hasattr(history, 'history'):
    for metric_name, values in history.history.items():
        print(f"{metric_name}: {values[-1]:.4f}")

# After training, use `approximator.infer()` or `approximator.predict()` for new sequences.

INFO:bayesflow:Fitting on dataset instance of OnlineDataset.
INFO:bayesflow:Building on a test batch.



=== Starting training ===
Epoch 1/2


2025-07-09 01:47:32.985580: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 195ms/step - loss: 1.4114
Epoch 2/2
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m206s[0m 206ms/step - loss: 1.1010
✅ Training completed successfully!

=== Training metrics ===
loss: 1.1010


In [12]:
# import numpy as np
# from hmmlearn import hmm

# # 1) Define HMM parameters (fixed)
# n_states       = 2    # 0="other", 1="alpha-helix"
# n_observations = 20   # 20 amino acids

# startprob = np.array([1.0, 0.0])  # always start in "other"
# transmat  = np.array([[0.95, 0.05],  # other→(other,helix)
#                       [0.10, 0.90]]) # helix→(other,helix)
# emissionprob = np.array([
#     # other
#     [0.06, 0.05, 0.05, 0.06, 0.02, 0.05, 0.03, 0.09, 0.03, 0.05,
#      0.08, 0.06, 0.02, 0.04, 0.06, 0.07, 0.06, 0.01, 0.04, 0.07],
#     # alpha-helix
#     [0.12, 0.06, 0.03, 0.05, 0.01, 0.09, 0.05, 0.04, 0.02, 0.07,
#      0.12, 0.06, 0.03, 0.04, 0.02, 0.05, 0.04, 0.01, 0.03, 0.06],
# ])

# # 2) Build a no-EM CategoricalHMM for decoding
# model = hmm.CategoricalHMM(
#     n_components=n_states,
#     algorithm='viterbi',   # use Viterbi for predict()
#     n_iter=0,              # skip EM
#     init_params=''         # keep our fixed parameters
# )
# model.startprob_    = startprob
# model.transmat_     = transmat
# model.emissionprob_ = emissionprob

# # 3) Custom-loop simulator
# def simulate_loop(sequence_length: int, batch_size: int, random_state=None):
#     """
#     Simulate `batch_size` sequences of length `sequence_length` by hand,
#     then decode their most likely state-sequences with Viterbi.
    
#     Returns:
#       X_batch: shape (batch_size, sequence_length), int observations 0..19
#       Z_batch: shape (batch_size, sequence_length), int Viterbi state path 0/1
#     """
#     # Initialize RNG
#     rng = np.random.default_rng(random_state)  # recommended over np.random.*
    
#     X_batch = np.zeros((batch_size, sequence_length), dtype=int)
#     Z_batch = np.zeros((batch_size, sequence_length), dtype=int)
    
#     for i in range(batch_size):
#         # a) Sample latent state path and emissions
#         states = np.empty(sequence_length, dtype=int)
#         obs    = np.empty(sequence_length, dtype=int)
        
#         state = 0  # start in "other"
#         for t in range(sequence_length):
#             # emit an amino acid given current state
#             obs[t] = rng.choice(
#                 n_observations,
#                 p=emissionprob[state]
#             )  # numpy.random.choice :contentReference[oaicite:3]{index=3}
            
#             states[t] = state
#             # transition to next state
#             state = rng.choice(n_states, p=transmat[state])
        
#         # b) Viterbi‐decode the *observations* (not the true states)
#         #    predict() uses Viterbi by default when algorithm='viterbi'
#         decoded = model.predict(obs.reshape(-1, 1))
#         X_batch[i] = obs
#         Z_batch[i] = decoded  # most likely state path :contentReference[oaicite:4]{index=4}
    
#     return X_batch, Z_batch

# # Quick sanity check
# if __name__ == "__main__":
#     Xb, Zb = simulate_loop(sequence_length=100, batch_size=2, random_state=123)
#     print("Xb.shape:", Xb.shape, "Zb.shape:", Zb.shape)
#     print("First 10 obs:", Xb[0, :10])
#     print("First 10 decoded states:", Zb[0, :10])