<a href="https://colab.research.google.com/github/anupampani123/Soundeffectgenerator/blob/main/IRgenerator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

import numpy as np
import librosa as lr
import matplotlib.pyplot as plt
from glob import glob
import soundfile as sf
import os
import sys
import csv
import librosa.display
import librosa 
import random
import tensorflow 
import cv2
import os
import argparse
import numpy as np
import soundfile as sf
from keras.models import load_model

In [None]:
def fix_specgram_shape(spec, shape):
    """Fix spectrogram shape to user specified size.
    Args:
        spec: 2D spectrogram [freqs, time].
        shape: 2D output spectrogram shape [freqs, time].
    Returns:
        fixed_spec: fixed 2D output spectrogram [freqs, time].
    """
    if spec.shape[1] < shape[1]: # pad the input to be of shape (513, 750)
        out = np.zeros(shape)
        out[:spec.shape[0],:spec.shape[1]] = spec
    else: # crop the input to be of shape (513, 750)
        out = spec[:,:shape[1]]
            
    return out

def generate_z(encoder, spec):
    """
    Determine the latent representation of a spectrogram.
    Args:
        encoder (obj): trained Keras encoder network.
        spec (ndarray): spectrogram of shape (freqs, time).
    Returns:
        z (ndarray): latent vector of shape (1, 1, 1, 3)
    """
    # fix shape (may be longer or shorter)
    spec_shape = (encoder.input_shape[1], encoder.input_shape[2])
    spec = fix_specgram_shape(spec, spec_shape)

    # reshape for input to the encoder
    spec = np.reshape(spec, (1, spec.shape[0], spec.shape[1], 1))

    # predict embedding to latent vector z
    z = encoder.predict(spec)

    return z

def generate_specgram(decoder, z):
    """
    Generate a spectrogram from a latent representation.
    Args:
        decoder (obj): trained Keras decoder network.
        z (ndarray): latent vector of shape (1, 1, 1, 3).
    Returns:
        spec (ndarray): spectrogram of shape (freqs, time).
    """
    spec = decoder.predict(z) # predict spectrogram
    spec = np.reshape(spec, (spec.shape[1], spec.shape[2]))
    return spec

def audio_from_specgram(spec, rate, output):
    """
    Reconstruct audio and save it to file.
    Args:
        spec (ndarray): spectrogram of shape (freqs, time).
        rate (int): sample rate of input audio.
        output (str): path to output file.
    """
    spec = np.reshape(spec, (spec.shape[0], spec.shape[1], 1)) # reshape
    audio = ispecgram(spec, n_fft=1024, hop_length=256, mag_only=True, num_iters=1000)
    sf.write(output + '.wav', audio, rate) 

    
def plot_from_specgram(spec, rate, output):
    """
    Plot a spectrogram and save it to file.
    Args:
        spec (ndarray): spectrogram of shape (freqs, time).
        rate (int): sample rate of input audio.
        output (str): path to output file.
    """
    plt.figure()
    librosa.display.specshow(spec, sr=rate*2, y_axis='log', x_axis='time')
    plt.colorbar(format='%+2.0f dB')
    plt.tight_layout()
    plt.savefig(output + '.png')
    plt.close()
    

In [None]:
def inv_magphase(mag, phase_angle):
    phase = np.cos(phase_angle) + 1.j * np.sin(phase_angle)
    return mag * phase

def griffin_lim(mag, phase_angle, n_fft, hop, num_iters):
    """Iterative algorithm for phase retrival from a magnitude spectrogram.
    Args:
        mag: Magnitude spectrogram.
        phase_angle: Initial condition for phase.
        n_fft: Size of the FFT.
        hop: Stride of FFT. Defaults to n_fft/2.
        num_iters: Griffin-Lim iterations to perform.
    Returns:
        audio: 1-D array of float32 sound samples.
    """
    fft_config = dict(n_fft=n_fft, win_length=n_fft, hop_length=hop, center=True)
    ifft_config = dict(win_length=n_fft, hop_length=hop, center=True)
    complex_specgram = inv_magphase(mag, phase_angle)
    for i in range(num_iters):
        audio = librosa.istft(complex_specgram, **ifft_config)
        if i != num_iters - 1:
            complex_specgram = librosa.stft(audio, **fft_config)
            _, phase = librosa.magphase(complex_specgram)
            phase_angle = np.angle(phase)
            complex_specgram = inv_magphase(mag, phase_angle)
    return audio

def ispecgram(spec,
              n_fft=512,
              hop_length=None,
              mask=True,
              log_mag=True,
              re_im=False,
              dphase=True,
              mag_only=True,
              num_iters=1000):
    """Inverse Spectrogram using librosa.
    Args:
        spec: 3-D specgram array [freqs, time, (mag_db, dphase)].
        n_fft: Size of the FFT.
        hop_length: Stride of FFT. Defaults to n_fft/2.
        mask: Reverse the mask of the phase derivative by the magnitude.
        log_mag: Use the logamplitude.
        re_im: Output Real and Imag. instead of logMag and dPhase.
        dphase: Use derivative of phase instead of phase.
        mag_only: Specgram contains no phase.
        num_iters: Number of griffin-lim iterations for mag_only.
    Returns:
        audio: 1-D array of sound samples. Peak normalized to 1.
    """
    if not hop_length:
        hop_length = n_fft // 2

    ifft_config = dict(win_length=n_fft, hop_length=hop_length, center=True)

    if mag_only:
        mag = spec[:, :, 0]
        phase_angle = np.pi * np.random.rand(*mag.shape)
    elif re_im:
        spec_real = spec[:, :, 0] + 1.j * spec[:, :, 1]
    else:
        mag, p = spec[:, :, 0], spec[:, :, 1]
        if mask and log_mag:
            p /= (mag + 1e-13 * np.random.randn(*mag.shape))
        if dphase:
            # Roll up phase
            phase_angle = np.cumsum(p * np.pi, axis=1)
        else:
            phase_angle = p * np.pi

    # Magnitudes
    if log_mag:
        mag = (mag - 1.0) * 120.0
        mag = 10**(mag / 20.0)
    phase = np.cos(phase_angle) + 1.j * np.sin(phase_angle)
    spec_real = mag * phase

    if mag_only:
        audio = griffin_lim(
            mag, phase_angle, n_fft, hop_length, num_iters=num_iters)
    else:
        audio = librosa.core.istft(spec_real, **ifft_config)
    return np.squeeze((audio / audio.max()) * 0.25) # scale to -12dB peak

In [None]:
encoder = tensorflow.keras.models.load_model("/content/drive/MyDrive/Suga/encoder_m2.h5", compile=False)
decoder = tensorflow.keras.models.load_model("/content/drive/MyDrive/Suga/decoder_m2.h5", compile=False)

In [None]:
idx = 0
for a in np.linspace(-2,2,num=10):
  for b in np.linspace(-2, 2, num=10): 
    for c in np.linspace(-2, 2, num=10):
        print("{:04d} | z = [ {:+0.3f} {:+0.3f} {:+0.3f} ]".format(idx, a, b, c))
        z= np.reshape(np.array([a , b, c]), (1, 1, 1, 3)) # think i want to fix this in my model
        filename = "_".join(["({:+0.3f})".format(dim) for dim in np.reshape(z, (3))])
        filename = "{:04d}_{}".format(idx, filename)
        filepath = os.path.join('/content/drive/MyDrive/Suga/pre_compute_demo2', filename)
        spec = generate_specgram(decoder, z)
        audio_from_specgram(spec, 16000, filepath)
        # plot_from_specgram(np.abs(spec), 16000, filepath)
        idx += 1

0000 | z = [ -2.000 -2.000 -2.000 ]
0001 | z = [ -2.000 -2.000 -1.556 ]
0002 | z = [ -2.000 -2.000 -1.111 ]
0003 | z = [ -2.000 -2.000 -0.667 ]
0004 | z = [ -2.000 -2.000 -0.222 ]
0005 | z = [ -2.000 -2.000 +0.222 ]
0006 | z = [ -2.000 -2.000 +0.667 ]
0007 | z = [ -2.000 -2.000 +1.111 ]
0008 | z = [ -2.000 -2.000 +1.556 ]
0009 | z = [ -2.000 -2.000 +2.000 ]
0010 | z = [ -2.000 -1.556 -2.000 ]
0011 | z = [ -2.000 -1.556 -1.556 ]
0012 | z = [ -2.000 -1.556 -1.111 ]
0013 | z = [ -2.000 -1.556 -0.667 ]
0014 | z = [ -2.000 -1.556 -0.222 ]
0015 | z = [ -2.000 -1.556 +0.222 ]
0016 | z = [ -2.000 -1.556 +0.667 ]
0017 | z = [ -2.000 -1.556 +1.111 ]
0018 | z = [ -2.000 -1.556 +1.556 ]
0019 | z = [ -2.000 -1.556 +2.000 ]
0020 | z = [ -2.000 -1.111 -2.000 ]
0021 | z = [ -2.000 -1.111 -1.556 ]
0022 | z = [ -2.000 -1.111 -1.111 ]
0023 | z = [ -2.000 -1.111 -0.667 ]
0024 | z = [ -2.000 -1.111 -0.222 ]
0025 | z = [ -2.000 -1.111 +0.222 ]
0026 | z = [ -2.000 -1.111 +0.667 ]
0027 | z = [ -2.000 -1.111 +