In [101]:
import os
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
from python_speech_features import mfcc, logfbank
import librosa
import boto3
from src.feature_extraction import call_s3
from IPython.display import Audio

In [142]:
df = pd.read_csv('data/train/labels.csv')
print(df.shape)
print(len((df.labels.unique())), 'unique labels')
df.head()

(4970, 4)
213 unique labels


Unnamed: 0,fname,labels,freesound_id,license
0,ac9e7a91.wav,Church_bell,65579,CC-BY-NC
1,65ae847e.wav,Frying_(food),65583,CC Sampling+
2,32ec2454.wav,Computer_keyboard,360502,CC0
3,af7b5bab.wav,Scissors,360503,CC0
4,7e8cd849.wav,Purr,65598,CC-BY


In [133]:
uniques = (df.labels.unique())
print(sorted(uniques))
print(len(uniques))

['Accelerating_and_revving_and_vroom', 'Accordion', 'Acoustic_guitar', 'Acoustic_guitar,Strum', 'Applause,Cheering', 'Applause,Cheering,Clapping', 'Applause,Clapping', 'Applause,Crowd', 'Applause,Crowd,Cheering', 'Applause,Crowd,Clapping', 'Applause,Dishes_and_pots_and_pans,Crowd,Cheering,Clapping,Child_speech_and_kid_speaking', 'Applause,Harmonica', 'Applause,Harmonica,Cheering', 'Applause,Screaming,Crowd', 'Bark', 'Bark,Chirp_and_tweet', 'Bark,Cricket', 'Bark,Cricket,Buzz', 'Bark,Walk_and_footsteps', 'Bass_drum', 'Bass_guitar', 'Bathtub_(filling_or_washing)', 'Bathtub_(filling_or_washing),Child_speech_and_kid_speaking', 'Bathtub_(filling_or_washing),Gurgling', 'Bathtub_(filling_or_washing),Sink_(filling_or_washing)', 'Bathtub_(filling_or_washing),Sink_(filling_or_washing),Water_tap_and_faucet', 'Bathtub_(filling_or_washing),Water_tap_and_faucet', 'Bicycle_bell', 'Burping_and_eructation', 'Burping_and_eructation,Chewing_and_mastication', 'Bus', 'Buzz', 'Car_passing_by', 'Car_passing_b

In [135]:
#filtering samples with multiple labels
df['labels'] = df['labels'].apply(lambda x: x.split(','))
df = df[df['labels'].map(len) == 1]
df['labels'] = df['labels'].apply(lambda x: ''.join(x))

In [136]:
df.drop(['freesound_id', 'license'], 1, inplace=True)
print(df.shape)
print(len((df.labels.unique())))
df.head()

(4269, 2)
74


Unnamed: 0,fname,labels
0,ac9e7a91.wav,Church_bell
1,65ae847e.wav,Frying_(food)
2,32ec2454.wav,Computer_keyboard
3,af7b5bab.wav,Scissors
4,7e8cd849.wav,Purr


In [143]:
# dropping corrupted files
df.set_index('fname', inplace=True)
for f in tqdm(df.index):
    try:
        rate, signal = wavfile.read('audio/train/'+f)
    except ValueError:
        df.drop(f, 0, inplace=True) #drop corrupted files

  0%|          | 0/4970 [00:00<?, ?it/s]


FileNotFoundError: [Errno 2] No such file or directory: 'audio/train/ac9e7a91.wav'

In [139]:
df.shape

(4269, 2)

In [147]:
road_sounds = {'Accelerating_and_revving_and_vroom', 'Bicycle_bell', 'Bus', 'Car_passing_by',
               'Motorcycle', 'Race_car_and_auto_racing', 'Skateboard', 'Traffic_noise_and_roadway_noise',
              }

In [149]:
df[df['labels'].isin(road_sounds)]

Unnamed: 0_level_0,labels,freesound_id,license
fname,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
75ba74db.wav,Car_passing_by,237,CC-BY-NC
40d9de68.wav,Bicycle_bell,262648,CC0
b7b2c6b6.wav,Traffic_noise_and_roadway_noise,262654,CC-BY-NC
64f50c86.wav,Accelerating_and_revving_and_vroom,131591,CC0
8c225bfb.wav,Car_passing_by,262898,CC-BY
...,...,...,...
4b82998e.wav,Car_passing_by,425850,CC-BY
d291f6ac.wav,Car_passing_by,425860,CC-BY
6b27eb13.wav,Car_passing_by,425865,CC-BY
5682234e.wav,Car_passing_by,425868,CC-BY


In [150]:
df.to_csv('data/train/roadsounds_labels.csv')

In [99]:
# df.to_csv('data/clean_training_labels.csv')

In [100]:
uniques = (df.labels.unique())
print(sorted(uniques))
print(len(uniques))

['Accelerating_and_revving_and_vroom', 'Accordion', 'Acoustic_guitar', 'Bark', 'Bass_drum', 'Bass_guitar', 'Bathtub_(filling_or_washing)', 'Bicycle_bell', 'Burping_and_eructation', 'Bus', 'Buzz', 'Car_passing_by', 'Chewing_and_mastication', 'Child_speech_and_kid_speaking', 'Chink_and_clink', 'Chirp_and_tweet', 'Church_bell', 'Clapping', 'Computer_keyboard', 'Crackle', 'Cricket', 'Crowd', 'Cupboard_open_or_close', 'Cutlery_and_silverware', 'Drawer_open_or_close', 'Drip', 'Electric_guitar', 'Fart', 'Female_singing', 'Female_speech_and_woman_speaking', 'Fill_(with_liquid)', 'Finger_snapping', 'Frying_(food)', 'Gasp', 'Glockenspiel', 'Gong', 'Gurgling', 'Harmonica', 'Hi-hat', 'Hiss', 'Keys_jangling', 'Knock', 'Male_singing', 'Male_speech_and_man_speaking', 'Marimba_and_xylophone', 'Mechanical_fan', 'Meow', 'Microwave_oven', 'Motorcycle', 'Printer', 'Purr', 'Race_car_and_auto_racing', 'Raindrop', 'Run', 'Scissors', 'Screaming', 'Shatter', 'Sigh', 'Skateboard', 'Slam', 'Sneeze', 'Squeak', 'S

In [None]:
df.iloc[]

In [91]:
df.set_index('fname', inplace=True)

In [80]:
df[df['labels'] == 'Accelerating_and_revving_and_vroom'].count()

fname     32
labels    32
dtype: int64

In [84]:
connection = boto3.resource('s3')
client = boto3.client('s3')
bucket_name = 'jarednewstudy'

In [85]:
data = call_s3(s3_client=client, bucket_name='jarednewstudy', fname='af7b5bab.wav', folder='audio_train/')

In [86]:
rate, signal = wavfile.read(data)

In [124]:
f = '7e8cd849.wav'
rate, signal = wavfile.read('audio_train/'+f)
Audio(data=signal, rate=rate)

In [126]:
df

Unnamed: 0_level_0,labels
fname,Unnamed: 1_level_1
ac9e7a91.wav,Church_bell
32ec2454.wav,Computer_keyboard
af7b5bab.wav,Scissors
7e8cd849.wav,Purr
fdfbf113.wav,Gasp
...,...
a0582310.wav,Stream
02a5aae7.wav,Bass_drum
9a92f7d0.wav,Bass_drum
4e5c0a8a.wav,Bark
