In [5]:
import torch
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
import matplotlib.pyplot as plt
from IPython.display import Audio, display
from modules.dataset import *
from modules.seeds import *
from modules.architecture import *
from modules.trainer import *

# Display latent space
import torch
import numpy as np
from IPython.display import Audio, display, HTML

def display_latent_space(model, device, sr):
    # Define the grid in the latent space
    grid_size = 5  # Adjust this for more/less granularity
    latents = np.linspace(-1, 1, grid_size)
    latent_grid = np.array([[x, y] for x in latents for y in latents])

    # Initialize an empty list to store the generated sounds
    generated_sounds = []

    # Generate sounds for each point in the latent space grid
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        for latent_vector in latent_grid:
            latent_tensor = torch.tensor(latent_vector, dtype=torch.float32).to(device)
            # latent_tensor = latent_tensor.unsqueeze(0)  # Add batch dimension

            # Generate sound using the VAE's generate function
            generated_sound = model.generate(latent_tensor)

            # Store the generated sound in the list
            generated_sounds.append(generated_sound.cpu().numpy().flatten())  # Flatten to 1D array and move to CPU

    # Reshape the generated sounds into a matrix form
    generated_sounds_matrix = np.array(generated_sounds).reshape(grid_size, grid_size, -1)

    # Display the sounds in a grid layout
    grid_html = "<table>"
    for i in range(grid_size):
        grid_html += "<tr>"
        for j in range(grid_size):
            grid_html += "<td>"
            sound_html = f"Latent: ({latent_grid[i*grid_size+j][0]:.2f}, {latent_grid[i*grid_size+j][1]:.2f})"
            sound_html += "<br>"
            sound_html += Audio(generated_sounds_matrix[i, j, :], rate=sr)._repr_html_()
            grid_html += sound_html
            grid_html += "</td>"
        grid_html += "</tr>"
    grid_html += "</table>"

    display(HTML(grid_html))

In [6]:
# Load the model parameters
model_path = 'models/fire/best_model.pth'

frame_size, N_filter_bank, param_per_env = 17640, 24, 512
hidden_size, deepness = 1024, 2
latent_dim = 2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
sr = 44100
seed = seed_maker(frame_size, sr, N_filter_bank)
seed = seed.to(device)

# Load the model
model = load_model(model_path,   frame_size, hidden_size, deepness, latent_dim, N_filter_bank, param_per_env, seed, device)

#Display latent space
display_latent_space(model, device, sr)

Model loaded from models/fire/best_model.pth


0,1,2,3,4
"Latent: (-1.00, -1.00)  Your browser does not support the audio element.","Latent: (-1.00, -0.50)  Your browser does not support the audio element.","Latent: (-1.00, 0.00)  Your browser does not support the audio element.","Latent: (-1.00, 0.50)  Your browser does not support the audio element.","Latent: (-1.00, 1.00)  Your browser does not support the audio element."
"Latent: (-0.50, -1.00)  Your browser does not support the audio element.","Latent: (-0.50, -0.50)  Your browser does not support the audio element.","Latent: (-0.50, 0.00)  Your browser does not support the audio element.","Latent: (-0.50, 0.50)  Your browser does not support the audio element.","Latent: (-0.50, 1.00)  Your browser does not support the audio element."
"Latent: (0.00, -1.00)  Your browser does not support the audio element.","Latent: (0.00, -0.50)  Your browser does not support the audio element.","Latent: (0.00, 0.00)  Your browser does not support the audio element.","Latent: (0.00, 0.50)  Your browser does not support the audio element.","Latent: (0.00, 1.00)  Your browser does not support the audio element."
"Latent: (0.50, -1.00)  Your browser does not support the audio element.","Latent: (0.50, -0.50)  Your browser does not support the audio element.","Latent: (0.50, 0.00)  Your browser does not support the audio element.","Latent: (0.50, 0.50)  Your browser does not support the audio element.","Latent: (0.50, 1.00)  Your browser does not support the audio element."
"Latent: (1.00, -1.00)  Your browser does not support the audio element.","Latent: (1.00, -0.50)  Your browser does not support the audio element.","Latent: (1.00, 0.00)  Your browser does not support the audio element.","Latent: (1.00, 0.50)  Your browser does not support the audio element.","Latent: (1.00, 1.00)  Your browser does not support the audio element."


In [7]:
# Load the model parameters
model_path = 'models/fire_deep/best_model.pth'

frame_size, N_filter_bank, param_per_env = 17640, 24, 512
hidden_size, deepness = 2048, 3
latent_dim = 2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
sr = 44100
seed = seed_maker(frame_size, sr, N_filter_bank)
seed = seed.to(device)

# Load the model
model = load_model(model_path,   frame_size, hidden_size, deepness, latent_dim, N_filter_bank, param_per_env, seed, device)

#Display latent space
display_latent_space(model, device, sr)

Model loaded from models/fire_deep/best_model.pth


0,1,2,3,4
"Latent: (-1.00, -1.00)  Your browser does not support the audio element.","Latent: (-1.00, -0.50)  Your browser does not support the audio element.","Latent: (-1.00, 0.00)  Your browser does not support the audio element.","Latent: (-1.00, 0.50)  Your browser does not support the audio element.","Latent: (-1.00, 1.00)  Your browser does not support the audio element."
"Latent: (-0.50, -1.00)  Your browser does not support the audio element.","Latent: (-0.50, -0.50)  Your browser does not support the audio element.","Latent: (-0.50, 0.00)  Your browser does not support the audio element.","Latent: (-0.50, 0.50)  Your browser does not support the audio element.","Latent: (-0.50, 1.00)  Your browser does not support the audio element."
"Latent: (0.00, -1.00)  Your browser does not support the audio element.","Latent: (0.00, -0.50)  Your browser does not support the audio element.","Latent: (0.00, 0.00)  Your browser does not support the audio element.","Latent: (0.00, 0.50)  Your browser does not support the audio element.","Latent: (0.00, 1.00)  Your browser does not support the audio element."
"Latent: (0.50, -1.00)  Your browser does not support the audio element.","Latent: (0.50, -0.50)  Your browser does not support the audio element.","Latent: (0.50, 0.00)  Your browser does not support the audio element.","Latent: (0.50, 0.50)  Your browser does not support the audio element.","Latent: (0.50, 1.00)  Your browser does not support the audio element."
"Latent: (1.00, -1.00)  Your browser does not support the audio element.","Latent: (1.00, -0.50)  Your browser does not support the audio element.","Latent: (1.00, 0.00)  Your browser does not support the audio element.","Latent: (1.00, 0.50)  Your browser does not support the audio element.","Latent: (1.00, 1.00)  Your browser does not support the audio element."


In [8]:
# Load the model parameters
model_path = 'models/water/best_model.pth'

frame_size, N_filter_bank, param_per_env = 17640, 24, 256
hidden_size, deepness = 512, 2
latent_dim = 2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
sr = 44100
seed = seed_maker(frame_size, sr, N_filter_bank)
seed = seed.to(device)

# Load the model
model = load_model(model_path,   frame_size, hidden_size, deepness, latent_dim, N_filter_bank, param_per_env, seed, device)

#Display latent space
display_latent_space(model, device, sr)

Model loaded from models/water/best_model.pth


0,1,2,3,4
"Latent: (-1.00, -1.00)  Your browser does not support the audio element.","Latent: (-1.00, -0.50)  Your browser does not support the audio element.","Latent: (-1.00, 0.00)  Your browser does not support the audio element.","Latent: (-1.00, 0.50)  Your browser does not support the audio element.","Latent: (-1.00, 1.00)  Your browser does not support the audio element."
"Latent: (-0.50, -1.00)  Your browser does not support the audio element.","Latent: (-0.50, -0.50)  Your browser does not support the audio element.","Latent: (-0.50, 0.00)  Your browser does not support the audio element.","Latent: (-0.50, 0.50)  Your browser does not support the audio element.","Latent: (-0.50, 1.00)  Your browser does not support the audio element."
"Latent: (0.00, -1.00)  Your browser does not support the audio element.","Latent: (0.00, -0.50)  Your browser does not support the audio element.","Latent: (0.00, 0.00)  Your browser does not support the audio element.","Latent: (0.00, 0.50)  Your browser does not support the audio element.","Latent: (0.00, 1.00)  Your browser does not support the audio element."
"Latent: (0.50, -1.00)  Your browser does not support the audio element.","Latent: (0.50, -0.50)  Your browser does not support the audio element.","Latent: (0.50, 0.00)  Your browser does not support the audio element.","Latent: (0.50, 0.50)  Your browser does not support the audio element.","Latent: (0.50, 1.00)  Your browser does not support the audio element."
"Latent: (1.00, -1.00)  Your browser does not support the audio element.","Latent: (1.00, -0.50)  Your browser does not support the audio element.","Latent: (1.00, 0.00)  Your browser does not support the audio element.","Latent: (1.00, 0.50)  Your browser does not support the audio element.","Latent: (1.00, 1.00)  Your browser does not support the audio element."


In [9]:
# Load the model parameters
model_path = 'models/water_deep/best_model.pth'

frame_size, N_filter_bank, param_per_env = 17640, 24, 256
hidden_size, deepness = 2048, 2
latent_dim = 2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
sr = 44100
seed = seed_maker(frame_size, sr, N_filter_bank)
seed = seed.to(device)

# Load the model
model = load_model(model_path,   frame_size, hidden_size, deepness, latent_dim, N_filter_bank, param_per_env, seed, device)

#Display latent space
display_latent_space(model, device, sr)

Model loaded from models/water_deep/best_model.pth


0,1,2,3,4
"Latent: (-1.00, -1.00)  Your browser does not support the audio element.","Latent: (-1.00, -0.50)  Your browser does not support the audio element.","Latent: (-1.00, 0.00)  Your browser does not support the audio element.","Latent: (-1.00, 0.50)  Your browser does not support the audio element.","Latent: (-1.00, 1.00)  Your browser does not support the audio element."
"Latent: (-0.50, -1.00)  Your browser does not support the audio element.","Latent: (-0.50, -0.50)  Your browser does not support the audio element.","Latent: (-0.50, 0.00)  Your browser does not support the audio element.","Latent: (-0.50, 0.50)  Your browser does not support the audio element.","Latent: (-0.50, 1.00)  Your browser does not support the audio element."
"Latent: (0.00, -1.00)  Your browser does not support the audio element.","Latent: (0.00, -0.50)  Your browser does not support the audio element.","Latent: (0.00, 0.00)  Your browser does not support the audio element.","Latent: (0.00, 0.50)  Your browser does not support the audio element.","Latent: (0.00, 1.00)  Your browser does not support the audio element."
"Latent: (0.50, -1.00)  Your browser does not support the audio element.","Latent: (0.50, -0.50)  Your browser does not support the audio element.","Latent: (0.50, 0.00)  Your browser does not support the audio element.","Latent: (0.50, 0.50)  Your browser does not support the audio element.","Latent: (0.50, 1.00)  Your browser does not support the audio element."
"Latent: (1.00, -1.00)  Your browser does not support the audio element.","Latent: (1.00, -0.50)  Your browser does not support the audio element.","Latent: (1.00, 0.00)  Your browser does not support the audio element.","Latent: (1.00, 0.50)  Your browser does not support the audio element.","Latent: (1.00, 1.00)  Your browser does not support the audio element."
