In [None]:
import os
import numpy as np
from load_data import *
import matplotlib.pyplot as plt
from IPython.display import Audio, display
from librosa.display import specshow
%matplotlib inline


In [None]:
audio_file_list = load_csv()
sigs, srs, labels = process_audio_files(N=30)
S = create_grams(sigs, srs, gram_type="spectrograms", normalize = False, verbose = False)
C = create_grams(sigs, srs, gram_type="chromagrams", normalize = True, verbose = False)

label_set = set(labels)
for i, (sig, label) in enumerate(zip(sigs, labels)):
    if label in label_set:
        print(i, sig.shape, label)
        display(Audio(os.path.join("data", "trainingset", audio_file_list[i][0])))
        label_set.remove(label)
        plt.figure(figsize=(14, 5))
        plt.subplot(1, 2, 1)
        specshow(np.log10(S[i]), x_axis='time', sr=44100, hop_length=512, y_axis='mel')
        plt.set_cmap("coolwarm")
        plt.xlabel('Time (seconds)')
        plt.subplot(1, 2, 2)
        specshow(C[i], x_axis='time', sr=44100, hop_length=512, y_axis='chroma')
        plt.set_cmap("coolwarm")
        plt.xlabel('Time (seconds)')
        yticks = plt.gca().get_yticks()[::12]
        plt.gca().set_yticks(yticks)
        plt.show()
    if len(label_set) == 0: break

In [None]:
import torch
from torch.autograd import Variable
from torch.utils.data import TensorDataset, DataLoader

from sputils import *
from models import *

from sklearn.preprocessing import LabelEncoder

def transform(tensor, model = "cnn"):
    if model == "cnn":
        return(Variable(tensor.unsqueeze(1)))
    elif model == "resnet":
        return(Variable(torch.stack([tensor, tensor, tensor], dim=1)))

In [None]:
# set seed
torch.manual_seed(1111)

gtype = "spectrograms"
inputs, labels = get_grams(use_chromagrams = False, N = 100,
                           window_size = 2048, freq_bands = 224,
                           filelist = "data/testingset.csv")

# encode labels
le = LabelEncoder()
le.fit(spconfig.lang_classes)
num_targets = le.classes_.shape[0]
labels_encoded = le.transform(labels)
print(le.classes_)


# Create Network
model_path = "output/states/resnet_model_spectrograms_82.pt"
nn_builder = resnet.resnetX
nnargs = {}
net = nn_builder(num_classes=num_targets, **nnargs)
net.load_state_dict(torch.load(model_path))
net.eval()
#print(net)

# make predictions in batches
output = []
inputs = torch.from_numpy(inputs).float()
labels_encoded = torch.from_numpy(labels_encoded[:inputs.size(0)])
print(labels_encoded.size())

testset = TensorDataset(inputs, labels_encoded)
testloader = DataLoader(testset, batch_size=8, shuffle=False)

for i, (minibatch, l) in enumerate(testloader):
    output += [net(transform(minibatch, "resnet")).data]
output = torch.cat(output)
#output_numpy = nn.functional.softmax(Variable(output)).data.numpy()
#np.set_printoptions(formatter={'float': lambda x: "{0:0.2f}".format(x)})
#print(output_numpy)
print(output.max(1)[1].numpy().ravel())
yhat = le.inverse_transform(output.max(1)[1].numpy().ravel())
#print([(act, pred) for act, pred in zip(labels, yhat)])
y_t  = le.inverse_transform(labels_encoded.numpy().ravel())
print(yhat)

In [None]:
audio_file_list = load_csv("/tmp/filelist.csv")
label_set = set(yhat)
for i, (sig, label) in enumerate(zip(inputs.numpy(), yhat)):
    if label in label_set:
        print(i, sig.shape, label)
        display(Audio(os.path.join("data", "testingset", audio_file_list[i][0])))
        label_set.remove(label)
        plt.figure(figsize=(14, 5))
        sig_plot = sig.copy()
        sig_plot += np.abs(sig_plot.min())
        sig_plot *= 100
        sig_plot += 1e-10
        specshow(np.log10(sig_plot), x_axis='time', sr=44100, hop_length=512, y_axis='mel')
        plt.set_cmap("coolwarm")
        #print(plt.cm.get_cmap().name)
        plt.xlabel('Time (seconds)')
        plt.show()
    if len(label_set) == 0: break