In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

In [None]:
def binary_combinations(steps):
    combos = []
    for event in range(2**steps):
        ensemble = []
        for i, _ in enumerate(range(steps)):
            t = ((event * 2) // (2 ** (i + 1))) % 2
            ensemble.append(t)
        combos.append(tuple(ensemble))
    return combos

In [None]:
# Step 1: Generate the data
num_steps = 16
all_patterns = np.array(binary_combinations(num_steps), dtype=np.float32)  # (65536, 16)
X_train, X_test = train_test_split(all_patterns, train_size=0.7, shuffle=True)
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)

In [None]:
# Step 2: Define the autoencoder model
class Autoencoder(nn.Module):
    def __init__(self, input_dim, latent_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, latent_dim) # Latent space will now work with normalized inputs
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64),
            nn.ReLU(),
            nn.Linear(64, input_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        latent = self.encoder(x)
        reconstructed = self.decoder(latent)
        return reconstructed, latent

In [None]:
# Step 3: Train the autoencoder model
latent_dim = 5
input_dim = num_steps
autoencoder = Autoencoder(input_dim, latent_dim)

criterion = nn.BCELoss()
optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)

# Training parameters
n_epochs = 200
batch_size = 32

# Best model tracking
best_test_acc = -np.inf
best_weights = None

# Step 4: Training loop
train_loss_hist = []
test_loss_hist = []
train_acc_hist = []
test_acc_hist = []

for epoch in range(n_epochs):
    autoencoder.train()
    epoch_loss = []
    epoch_acc = []

    # Batch training
    for i in range(0, len(X_train), batch_size):
        X_batch = X_train[i:i + batch_size]

        optimizer.zero_grad()
        reconstructed, _ = autoencoder(X_batch)
        loss = criterion(reconstructed, X_batch)
        loss.backward()
        optimizer.step()

        # Calculate batch accuracy
        predicted = (reconstructed > 0.5).float()
        acc = (predicted == X_batch).float().mean().item()
        
        epoch_loss.append(loss.item())
        epoch_acc.append(acc)

    train_loss_hist.append(np.mean(epoch_loss))
    train_acc_hist.append(np.mean(epoch_acc))

    # Validation
    autoencoder.eval()
    with torch.no_grad():
        reconstructed, _ = autoencoder(X_test)
        val_loss = criterion(reconstructed, X_test)
        test_loss_hist.append(val_loss.item())

        # Calculate test accuracy
        predicted = (reconstructed > 0.5).float()
        test_acc = (predicted == X_test).float().mean().item()
        test_acc_hist.append(test_acc)

        # Save best model
        if test_acc > best_test_acc:
            best_test_acc = test_acc
            best_weights = autoencoder.state_dict()

    if epoch % 10 == 0 or epoch == n_epochs - 1:
        print(
            f"Epoch {epoch}/{n_epochs}: "
            f"Train Loss: {np.mean(epoch_loss):.4f}, Train Acc: {np.mean(epoch_acc) * 100:.2f}%, "
            f"Test Loss: {val_loss:.4f}, Test Acc: {test_acc * 100:.2f}%"
        )

# Final accuracy
print(f"\nFinal Test Accuracy: {best_test_acc * 100:.2f}%")

# Save best model
torch.save(best_weights, "vae.pth")
print("Best model saved as 'vae.pth'.")

# Plot Loss and Accuracy
plt.figure(figsize=(10, 6))
plt.plot(train_loss_hist, label="Train Loss")
plt.plot(test_loss_hist, label="Test Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.title("Autoencoder Training Loss")
plt.show()
plt.figure(figsize=(10, 6))
plt.plot(train_acc_hist, label="Train Accuracy")
plt.plot(test_acc_hist, label="Test Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.title("Autoencoder Training Accuracy")
plt.show()

In [None]:
# Step 5: Latent space visualization
autoencoder.load_state_dict(best_weights)
autoencoder.eval()
with torch.no_grad():
    _, latent_space = autoencoder(X_test)
latent_space_np = latent_space.numpy()
# Visualize with t-SNE if latent_dim > 2
latent_space_2d = TSNE(n_components=2).fit_transform(latent_space_np)
plt.scatter(latent_space_2d[:, 0], latent_space_2d[:, 1], alpha=0.5, s=1)
plt.title('Latent Space Visualization')
plt.xlabel('Latent Dimension 1')
plt.ylabel('Latent Dimension 2')
plt.show()

In [None]:
# Generate a new random pattern from latent space
random_latent = torch.randn(1, latent_dim).to("cpu")
generated_pattern = autoencoder.decoder(random_latent)
generated_pattern = (generated_pattern > 0.5).int()  # Threshold for binary output

print("Generated Rhythm Pattern:", generated_pattern.numpy()[0])