In [1]:
import numpy as np
import librosa
import librosa.display
import os
from tqdm import tqdm
import pandas as pd
import plotly.tools as tools
import plotly.offline as py
import plotly.graph_objs as go
import matplotlib.pyplot as plt
py.init_notebook_mode(connected=True)

In [2]:
INVALID_LABELS = ["bed", "bird", "cat", "dog", "happy", "house", "marvin", "sheila", "tree", "wow"]
all_categories = os.listdir(path='./data/audio')
all_data = pd.DataFrame({'category_names': all_categories})
sample_count_per_category = []
file_count_per_category = []
for category in tqdm(all_categories):
    audio_files_per_category = os.listdir(path='./data/audio/' + category)
    file_count_per_category.append(len(audio_files_per_category))
    cum_sum_samples = 0
    for audio_file_name in audio_files_per_category:
        audio_file, _ = librosa.core.load('./data/audio/' + category + '/' + audio_file_name, sr=16000)
        cum_sum_samples += len(audio_file)
    sample_count_per_category.append(cum_sum_samples)
all_data['files_per_category'] = file_count_per_category
all_data['samples_per_category'] = sample_count_per_category

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [09:39<00:00, 19.33s/it]


In [21]:
all_categories = np.array(all_categories)
file_count_per_category = np.array(file_count_per_category)

In [5]:
all_categories_masked = all_categories
for label in INVALID_LABELS:
    all_categories_masked = np.core.defchararray.replace(all_categories_masked, old=label, new='unknown')

In [16]:
files_for_unknown = file_count_per_category[np.where(np.core.defchararray.equal(all_categories_masked, 'unknown') == True)].sum()

In [24]:
all_other_categories = all_categories[np.where(np.core.defchararray.equal(all_categories_masked, 'unknown') == False)]
files_for_other_categories = file_count_per_category[np.where(np.core.defchararray.equal(all_categories_masked, 'unknown') == False)]

In [36]:
print('Total number of categories: {}'.format(len(all_categories)))
print('Total number of audio files: {}'.format(len(len_of_samples)))

Total number of cateogies: 30
Total number of audio files: 64721


In [42]:
colors = np.full(len(all_categories), fill_value='rgb(49,130,189)')
colors[np.where(np.core.defchararray.equal(all_categories_masked, 'unknown'))] = 'rgb(222,45,38)'
py.iplot(dict(data=[go.Bar(x=all_categories,
                           y=file_count_per_category,
                           marker=dict(color=colors))], 
              layout=go.Layout(title='Number of audio files for each category',
                               xaxis=dict(title='Categories'),
                               yaxis=dict(title='Count', range=[1700, 2400]))))

In [45]:
py.iplot(dict(data=[go.Bar(x=list(all_other_categories) + ['unknown'],
                           y=list(files_for_other_categories) + [files_for_unknown],
                           marker=dict(color=((['rgb(49,130,189)'] * len(all_other_categories)) + ['rgb(222,45,38)'])))], 
              layout=go.Layout(title='Number of audio files for each category',
                               xaxis=dict(title='Categories'),
                               yaxis=dict(title='Count', range=[2000, 17500]))))

In [44]:
py.iplot(dict(data=[go.Bar(x=all_categories, y=samples_per_category)], 
              layout=go.Layout(title='Number of audio samples for each category',
                               xaxis=dict(title='Categories'),
                               yaxis=dict(title='Number of samples', range=[25000000, 38000000]))))

In [34]:
labels = ['Audio files with less than 16000 samples', 'Audio files with 16000 samples']
values = [sample_length_count[:-1].sum(), sample_length_count[-1]]
py.iplot([go.Pie(labels=labels, values=values)])

In [47]:
mel_spec = lambda x: librosa.power_to_db(librosa.feature.melspectrogram(x,
                                                                        sr=16000,
                                                                        n_fft=1024,
                                                                        hop_length=256,
                                                                        fmax=3000),
                                         ref=np.max)

In [56]:
all_categories_mel = []
for category in all_categories:
        audio_file = os.listdir(path='./data/audio/' + category)[0]
        audio, _ = librosa.core.load('./data/audio/' + category + '/' + audio_file, sr=16000)
        spec = mel_spec(audio)
        all_categories_mel.append(spec)