In [None]:
# Import statements
import os
import sys
import math
import torch
import numpy as np
if torch.cuda.is_available():
    DEVICE = "cuda"
else:
    DEVICE = "cpu"
sys.argv = [""] # Hack to make argparse work in jupyter
if os.getcwd().endswith("classification"):
    os.chdir("..") # Fix working directory for imports
import config
from dataset import get_datasets
from matplotlib import pyplot as plt
from matplotlib import cm
from augmentations import Mixup, SyntheticNoise, BackgroundNoise, LowpassFilter, RandomEQ
cfg = config.cfg
train_ds, valid_ds = get_datasets()

def sigmoid(x):
  return 1 / (1 + np.exp(-x))

In [None]:
# Get audio
audio, one_hot = valid_ds.get_annotation(np.random.randint(len(valid_ds)))

plt.figure(0,figsize=(10,2))
plt.plot(audio.to("cpu").numpy())
plt.show()

In [None]:
# Show all!!

# Options:
#   - log
#   - linear
#   - sigmoid
#   - tanh
NORMALIZATION = "log"
DIVISOR = 1
MIN_CLIP = -10
MAX_CLIP = 10

to_show = []

# Original spectrogram
to_show.append((audio,"Original"))

# Synthetic noise
colors = ["white","pink","brown","violet","blue"]
noise_intensity = 0.2
for i in range(5):
    synth = SyntheticNoise(colors[i], noise_intensity)
    to_show.append((synth.forward(audio),"Synthetic " + colors[i] +" noise"))

# Lowpass filter
cutoff = 100
q_val = 0.707
lowpass = LowpassFilter(cutoff, q_val)
to_show.append((lowpass.forward(audio),"Lowpass filter"))

# Random EQ
f_range = (100, 6000)
g_range = (-8, 8)
q_range = (1, 9)
num_applications = 1
eq = RandomEQ(f_range, g_range, q_range, num_applications)
for i in range(2):
    to_show.append((eq.forward(audio),"Random EQ " + str(i+1)))

# Display 3 of background noise
bgn_intensity = 0.8
bgn_norm = False
bcg_noise = BackgroundNoise(bgn_intensity,length=5, norm=bgn_norm)
for i in range(3):
    to_show.append((bcg_noise.forward(audio),"Background noise " + str(i+1)))
    
# Display 3 of mixup
mixup_alpha_range = (0.1,0.4)
mixup = Mixup(
        df = valid_ds.samples,
        class_to_idx = valid_ds.class_to_idx,
        sample_rate = valid_ds.target_sample_rate,
        target_num_samples = valid_ds.num_samples,
        alpha_range = mixup_alpha_range,
        p = 0)
for i in range(3):
    mixup_audio, mixup_target = mixup.forward(audio, one_hot)
    to_show.append((mixup_audio,"Mixup " + str(i+1)))

v_max = 0
v_min = 0
for i in range(len(to_show)):
    audio, label = to_show[i]
    mel = valid_ds.mel_spectogram(audio)
    if NORMALIZATION == "log":
        mel = np.log(mel.to("cpu").numpy())
    elif NORMALIZATION == "linear":
        mel = mel.to("cpu").numpy()
    elif NORMALIZATION == "sigmoid":
        mel = sigmoid(mel.to("cpu").numpy()/DIVISOR)
    elif NORMALIZATION == "tanh":
        mel = np.tanh(mel.to("cpu").numpy()/DIVISOR)
    else:
        print("Normalization",NORMALIZATION,"not found")
    mel = np.clip(mel,MIN_CLIP,MAX_CLIP)
    v_max = max(v_max, mel.max())
    v_min = min(v_min, mel.min())
    to_show[i] = (mel, label)

# Display images
f, axes = plt.subplots(5,3, figsize=(12,20))
for i in range(len(to_show)):
    plt.subplot(5,3,i+1)
    im = axes[i//3][i%3].imshow(to_show[i][0], cmap="viridis", origin="lower", clim=(v_min,v_max))
    axes[i//3][i%3].set_title(to_show[i][1])
m = cm.ScalarMappable(cmap=cm.viridis)
m.set_array([])
cb = plt.colorbar(im, ax=axes, location="top")
f.text(0.5,0.05,"Normalization: " + NORMALIZATION + ",   Divisor: " + str(DIVISOR) + ",   Min clip: " + str(MIN_CLIP) + ",   Max clip: " + str(MAX_CLIP),ha="center")

