# Encoder Deep Dive

This notebook explores the CNN and DNN encoders in detail.

## Topics
1. CNN vs DNN architecture comparison
2. Training curves and convergence
3. Reconstruction quality
4. Hyperparameter tuning
5. Saving and loading models

In [None]:
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt

from lsme import LSME, CNNEncoder, DNNEncoder

## 1. Generate Signature Matrices

In [None]:
# Load graph and compute signature matrices
G = nx.karate_club_graph()

lsme = LSME(method='stochastic', max_hops=2, n_samples=200, 
            verbose=False, random_state=42)
result = lsme.fit_transform(G)

sig_matrices = result['signature_matrices']
layer_info = result['layer_info']

print(f"Generated {len(sig_matrices)} signature matrices")
sizes = [m.shape[0] for m in sig_matrices.values()]
print(f"Matrix sizes: min={min(sizes)}, max={max(sizes)}, unique={len(set(sizes))}")

## 2. CNN vs DNN Training

In [None]:
# Train CNN encoder
cnn_encoder = CNNEncoder(
    embedding_dim=32,
    hidden_channels=[32, 64, 128],
    num_epochs=100,
    learning_rate=1e-3,
    verbose=True,
    random_state=42
)

cnn_embeddings = cnn_encoder.fit_transform(sig_matrices, layer_info)

In [None]:
# Train DNN encoder
dnn_encoder = DNNEncoder(
    embedding_dim=32,
    hidden_dims=[512, 256, 128],
    num_epochs=100,
    learning_rate=1e-3,
    verbose=True,
    random_state=42
)

dnn_embeddings = dnn_encoder.fit_transform(sig_matrices, layer_info)

## 3. Reconstruction Quality

In [None]:
# Compute reconstruction errors
cnn_errors = cnn_encoder.reconstruction_error(sig_matrices, layer_info)
dnn_errors = dnn_encoder.reconstruction_error(sig_matrices, layer_info)

print(f"CNN avg error: {np.mean(list(cnn_errors.values())):.6f}")
print(f"DNN avg error: {np.mean(list(dnn_errors.values())):.6f}")

In [None]:
# Visualize original vs reconstructed
cnn_recon = cnn_encoder.decode(cnn_embeddings)
dnn_recon = dnn_encoder.decode(dnn_embeddings)

node = 0  # Node to visualize

fig, axes = plt.subplots(1, 3, figsize=(12, 4))

im0 = axes[0].imshow(sig_matrices[node], cmap='Blues', vmin=0, vmax=1)
axes[0].set_title('Original')
plt.colorbar(im0, ax=axes[0])

# Crop reconstruction to original size
orig_size = sig_matrices[node].shape[0]
im1 = axes[1].imshow(cnn_recon[node][:orig_size, :orig_size], cmap='Blues', vmin=0, vmax=1)
axes[1].set_title(f'CNN Reconstruction\nMSE: {cnn_errors[node]:.6f}')
plt.colorbar(im1, ax=axes[1])

im2 = axes[2].imshow(dnn_recon[node][:orig_size, :orig_size], cmap='Blues', vmin=0, vmax=1)
axes[2].set_title(f'DNN Reconstruction\nMSE: {dnn_errors[node]:.6f}')
plt.colorbar(im2, ax=axes[2])

plt.suptitle(f'Node {node} Reconstruction Comparison')
plt.tight_layout()
plt.show()

## 4. Embedding Dimension Comparison

In [None]:
# Test different embedding dimensions
dims = [8, 16, 32, 64, 128]
cnn_results = []
dnn_results = []

for dim in dims:
    # CNN
    enc = CNNEncoder(embedding_dim=dim, num_epochs=50, verbose=False, random_state=42)
    enc.fit(sig_matrices, layer_info)
    errors = enc.reconstruction_error(sig_matrices, layer_info)
    cnn_results.append(np.mean(list(errors.values())))
    
    # DNN
    enc = DNNEncoder(embedding_dim=dim, num_epochs=50, verbose=False, random_state=42)
    enc.fit(sig_matrices, layer_info)
    errors = enc.reconstruction_error(sig_matrices, layer_info)
    dnn_results.append(np.mean(list(errors.values())))
    
    print(f"dim={dim:3d}: CNN={cnn_results[-1]:.6f}, DNN={dnn_results[-1]:.6f}")

In [None]:
# Plot results
plt.figure(figsize=(10, 6))
plt.plot(dims, cnn_results, 'o-', label='CNN', linewidth=2)
plt.plot(dims, dnn_results, 's-', label='DNN', linewidth=2)
plt.xlabel('Embedding Dimension')
plt.ylabel('Average Reconstruction MSE')
plt.title('Reconstruction Error vs Embedding Dimension')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## 5. Save and Load Models

In [None]:
# Save the trained encoder
cnn_encoder.save('cnn_encoder.pt')
print("Saved cnn_encoder.pt")

# Load and verify
loaded_encoder = CNNEncoder.load('cnn_encoder.pt')
loaded_embeddings = loaded_encoder.encode(sig_matrices, layer_info)

# Verify embeddings match
for node in sig_matrices:
    assert np.allclose(cnn_embeddings[node], loaded_embeddings[node]), f"Mismatch for node {node}"

print("Loaded encoder produces identical embeddings!")

In [None]:
# Clean up
import os
os.remove('cnn_encoder.pt')

## Summary

- **CNN**: Better for larger matrices, captures spatial patterns
- **DNN**: Simpler, faster training, good for smaller matrices
- Higher embedding dimensions reduce reconstruction error but may overfit
- Models can be saved and loaded for reuse