In [None]:
# Import external tools:
import time 
import random
from matplotlib import pyplot as plt
import librosa
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from torch.utils.data.sampler import SubsetRandomSampler
from torchsummary import summary
import torchaudio
import soundfile as sf
from scipy import signal
import getpass
import pandas as pd
import numpy as np
import sys
import os
import importlib
from scipy.io import wavfile
from IPython.display import Audio
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

# I am running this script on two different computers, so i need to change paths
# depending on computer in use: 
if getpass.getuser()=="joanna.luberadzka":
    projectdir="/Users/joanna.luberadzka/Documents/VAE-IR/"
    datadir="/Users/joanna.luberadzka/Documents/Data/IR_Arni_upload_numClosed_0-5/"
elif getpass.getuser()=="ubuntu":
    projectdir="/home/ubuntu/joanna/VAE-IR/"
    datadir="/home/ubuntu/Data/IR_Arni_upload_numClosed_0-5/"

# Add path of this project
sys.path.insert(0, projectdir+'src/')

# Import and automatically reload my own modules:
import models; importlib.reload(models)
import train; importlib.reload(train)
import datasetprep as dsprep; importlib.reload(dsprep)
import helpers; importlib.reload(helpers)

In [None]:
# load data:
if getpass.getuser()=="joanna.luberadzka":
    INFO_FILE = projectdir + "irstats_ARNIandBUT_datura.csv"
elif getpass.getuser()=="ubuntu":
    INFO_FILE = projectdir+"irstats_ARNIandBUT_datura.csv"

SAMPLING_RATE=8e3
# instantiate data set 
dataset = dsprep.DatasetRirs(INFO_FILE,SAMPLING_RATE,preproc="powspec")


In [None]:

# load trained model and plot example:

# ----------- Option 1 : Waveform-to-waveform variational autoencoder with linear layers only ------------
model=models.AutoencoderConv(z_len=24).to("cpu")
model.load_state_dict(torch.load(projectdir + "models/trained_model_08-02-2023--12-37.pth",map_location="cpu"))
# put the model in evaluation mode
model.eval()


In [None]:

# Encoding: Using a trained autoencoder model, 
# generate a lower-dimensional embedding for each impulse response.

embeddings_mu=[] # list for storing ir embeddings
embeddings_rt=[] # list for storing rt values
embeddings_drr=[] # list for storing drr values
embeddings_isarni=[] # list for storing bool indicating which database
embeddings_edt=[] # list for storing edt vallues
embeddings_cte=[] # list for storing cte values

# take 100 random irs from the data set
ir_rand_indices=random.sample(range(len(dataset)),100)

for i in ir_rand_indices:
    # get info of an impulse response with a specific index
    ir, labels= dataset[i]
    # encode the input into mu and sigma (standard in VAE)
    mu = model.encoder(ir)
    # mu is the embedding (sigma provides additional info about the uncertainty of this embedding)   
    emb = mu.squeeze() 
    # convert to numpy array and append the list of embeddings
    embeddings_mu.append(emb.detach().cpu().numpy())
    embeddings_rt.append(labels["rt"])
    embeddings_drr.append(labels["drr"])
    embeddings_edt.append(labels["edt"])
    embeddings_cte.append(labels["cte"])
    embeddings_isarni.append(labels["isarni"])

# covert from list of arrays to one array
embeddings_mu=np.array(embeddings_mu)

In [None]:

# Visualization: To visualize each encoding, the 24-dimensional embeddings 
# have to be reduced to 2 dimensions. This can be done with two methods: 
# PCA (linear) or TSNE (non-linear)
embeddings_pca=PCA(n_components=2).fit_transform(embeddings_mu)
embeddings_tsne = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=30).fit_transform(embeddings_mu)

plt.figure()
plt.subplot(1,2,1)
plt.scatter(embeddings_pca[:,0] , embeddings_pca[:,1],c=embeddings_rt)
plt.subplot(1,2,2)
plt.scatter(embeddings_pca[:,0] , embeddings_pca[:,1],c=embeddings_drr)

plt.figure(figsize=(10,6))
plt.subplot (2,3,1)
plt.scatter(embeddings_tsne[:,0] , embeddings_tsne[:,1], c=embeddings_rt)
plt.title('color: rt')
plt.subplot (2,3,2)
plt.scatter(embeddings_tsne[:,0] , embeddings_tsne[:,1], c=embeddings_drr)
plt.title('color: drr')
plt.subplot (2,3,3)
plt.scatter(embeddings_tsne[:,0] , embeddings_tsne[:,1], c=embeddings_edt)
plt.title('color: edt')
plt.subplot (2,3,4)
plt.scatter(embeddings_tsne[:,0] , embeddings_tsne[:,1], c=embeddings_cte)
plt.title('color: cte')
plt.subplot (2,3,5)
plt.scatter(embeddings_tsne[:,0] , embeddings_tsne[:,1], c=embeddings_isarni)
plt.title('color: isarni')





In [None]:
# Reconstruction: decoder part - choose a sample from the dataset, reconstruct it and 
# check how good is the reconstruction

rand_ir_idx=np.random.randint(len(dataset))
# get info of an impulse response with a random index
IR_orig, labels= dataset[rand_ir_idx]
# encode the input into mu and sigma (standard in VAE)
IR_recon, mu =model(IR_orig) 
IR_recon=IR_recon.detach().squeeze(1) 

# plot model reconstruction
helpers.plot_spectrogram(IR_orig,title="original", ylabel="freq_bin")
helpers.plot_spectrogram(IR_recon,title="reconstruction",ylabel="freq_bin")




In [None]:
# Time domain reconstruction: since the model operates on magnitude spectrogram - I have to transform to 
# time domain

IrData = pd.read_csv(INFO_FILE,delimiter=',')
ir_orig_plot, S, minmax=helpers.wav2powspec(IrData["filepath"][int(rand_ir_idx)], n_fft=1024, hop_length=128, win_length = 256, sample_rate = 8e3, pad_dur=3)
ir_recons_plot=helpers.powspec2wave(IR_recon,orig_min=minmax["min"],orig_max=minmax["max"])

plt.figure(figsize=(10,2))
plt.subplot (1,2,1)
plt.plot(ir_orig_plot)
plt.title(f'Original RIR sample {rand_ir_idx}')
plt.subplot (1,2,2)
plt.plot(ir_recons_plot)
plt.title(f'Reconstructed RIR sample {rand_ir_idx}')
plt.show()

In [None]:
# TODO: sound on 