In [1]:
import torch
import pickle
import librosa
import numpy as np
import soundfile as sf
from pathlib import Path
import IPython.display as ipd

from constants import *
from inferer import Inferer
from plotter import plot_waves
from disk_utils import load_model

In [2]:
with open("dataset/features/min_max.pkl", "rb") as handle:
    min_max = pickle.load(handle)

inst = "ney"
feature = "db"
mini = min_max[inst]["min"][feature]
maxi = min_max[inst]["max"][feature]

In [3]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = load_model("generator_sp_32_0_8_full")
model.eval()

src_dirs = {
    "gtr": ["dataset/gtr_1", "dataset/gtr_2"],
    "ney": ["dataset/ney_1", "dataset/ney_2"],
}

wav_idx = 0

for dir_idx in range(2):
    gtr_files = sorted(
        [f for f in Path(src_dirs["gtr"][dir_idx]).glob("*.wav")])
    ney_files = sorted(
        [f for f in Path(src_dirs["ney"][dir_idx]).glob("*.wav")])

    len_window = WINDOW_SAMPLE_LEN
    len_overlap = OVERLAP
    start_offset = len_window - len_overlap

    for gtr_file, ney_file in zip(gtr_files, ney_files):
        gtr_sig, _ = librosa.load(gtr_file, mono=True, sr=SR)
        gtr_sig /= np.max(np.abs(gtr_sig))

        ney_sig, _ = librosa.load(ney_file, mono=True, sr=SR)
        ney_sig /= np.max(np.abs(ney_sig))

        i = 0
        dbs = []
        phases = []
        ney_chunks = []
        # divide audio into chunks
        while True:
            start = i * start_offset
            end = start + len_window
            gtr_window = gtr_sig[start:end]
            ney_window = ney_sig[start:end]
            # pad with trailing 0's if window is smaller than required length
            if len(gtr_window) < len_window:
                gtr_window = np.pad(
                    gtr_window, (0, len_window - len(gtr_window)))
                ney_window = np.pad(
                    ney_window, (0, len_window - len(ney_window)))

            ney_chunks.append(ney_window)

            # stft -> phase, magnitude & db
            stft = librosa.stft(gtr_window, n_fft=N_FFT, hop_length=HOP)
            magnitude = np.abs(stft)
            phase = np.angle(stft)
            phases.append(phase)
            db = librosa.amplitude_to_db(magnitude)
            # normalization
            db = (db - mini) / (maxi - mini)
            db = np.expand_dims(db, axis=0)
            dbs.append(db)
            if end >= len(gtr_sig):
                break
            i += 1

        # get magnitude predictions
        dbs = torch.from_numpy(np.array(dbs, dtype=np.float32))
        predictions = []
        num_dbs = dbs.size()[0]
        with torch.no_grad():
            for i in range(num_dbs):
                y = dbs[i:i+1]
                y_hat = (model(y))
                y_hat = y_hat[0].numpy().squeeze(axis=0)
                y_hat = y_hat * (maxi - mini) + mini
                y_hat = librosa.db_to_amplitude(y_hat)
                predictions.append(y_hat)

        # inv stft
        for i, prediction in enumerate(predictions):
            inverse = librosa.istft(prediction * np.exp(1j * phases[i]),
                                    n_fft=N_FFT, hop_length=HOP)
            sf.write(
                f"dataset/time/gtr/gtr_{wav_idx}.wav", inverse, SR, format="wav")
            sf.write(
                f"dataset/time/ney/ney_{wav_idx}.wav", ney_chunks[i], SR, format="wav")

            wav_idx += 1