# sources
### http://luthuli.cs.uiuc.edu/~daf/courses/cs-498-daf-ps/lecture%208%20-%20audio%20features2.pdf
- Pitch
    - Mel scale
    - Twice as many Mels correspond to a perceived pitch doubling
    - Pitch represents sounds ordered from low to high
- Timbre
    - What distinguishes sounds outside of loudness and pitch
    - E.g. music instrument identification is guided largely by intensity fluctuatoins through time 
- Spectrogram
    - we can "see" each individual sound and know hot it sounds like
    


In [1]:
import glob
import os
import numpy as np

import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
import matplotlib.style as ms
ms.use("seaborn-muted")
%matplotlib inline

import IPython.display

import librosa
import librosa.display

import tensorflow as tf
from sklearn.metrics import precision_recall_fscore_support
import pickle

NUMBER_OF_CLASSES = 4


In [2]:
def load_files(filepaths):
    raw_sounds = []
    for filepath in filepaths:
        X,sr = librosa.load(filepath)
        raw_sounds.append(X)
    return raw_sounds


def plot_waves(sound_names,raw_sounds):
    i = 1
    fig = plt.figure(figsize=(25,60), dpi = 900)
    for n,f in zip(sound_names,raw_sounds):
        print(n,f)
        plt.subplot(10,1,i)
        librosa.display.waveplot(np.array(f),sr=22050)
        plt.title(n.title())
        i += 1
    plt.suptitle("Figure 1: Waveplot",x=0.5, y=0.915,fontsize=18)
    plt.show()

In [3]:
def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),
    sr=sample_rate).T,axis=0)
    return mfccs,chroma,mel,contrast,tonnetz

def parse_audio_files(parent_dir,sub_dirs,file_ext="*.wav"):
    features, labels = np.empty((0,193)), np.empty(0) #the shape 193 comes from the shape of features combined
#     features = np.empty((0,193))
    for label, sub_dir in enumerate(sub_dirs):
        print(label, sub_dir)
        for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
            try:
                mfccs, chroma, mel, contrast,tonnetz = extract_feature(fn)
                
            except Exception as e:
                print("Error encountered while parsing file: ", fn)
                continue
            ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
            
            features = np.vstack([features,ext_features])
#             labels = np.append(labels, fn.split('\\')[-1].split('-')[1])
            labels = np.append(labels, fn.split('\\')[-1])
#     return np.array(features), np.array(labels, dtype = np.int)
    return np.array(features), np.array(labels)

#     return np.array(features)



In [18]:
print("Starting Audio parsing")
parent_dir = 'Sound-Data/renameThese'
dirs = ["AmbientRecordings"]
features, labels = parse_audio_files(parent_dir, dirs)
print(features.shape)
print(labels.shape)
print("Done parsing audio")

Starting Audio parsing
0 AmbientRecordings
(1010, 193)
Done parsing audio


In [20]:
sums = []
ratios = []
eventChanges = []
print(labels.shape)
print("features")
for feature in features:
    sums.append(sum(feature))
max_sum = max(sums)
print("sums")
for s in sums:
    diff_ratio = s / max_sum
    ratios.append(diff_ratio)
print("finding events")
for index in range(len(ratios)-1):
    if (0.95 < ratios[index] / ratios[index+1] < 1.05):
        eventChanges.append(labels[index])

(1010,)
features
sums
finding events


In [22]:
print(np.array(eventChanges).shape)
print(eventChanges)

(266,)
['Sound-Data/renameThese\\AmbientRecordings\\145654-3-0-0.wav', 'Sound-Data/renameThese\\AmbientRecordings\\145754-3-0-3.wav', 'Sound-Data/renameThese\\AmbientRecordings\\150034-3-0-11.wav', 'Sound-Data/renameThese\\AmbientRecordings\\150054-3-0-12.wav', 'Sound-Data/renameThese\\AmbientRecordings\\150134-3-0-14.wav', 'Sound-Data/renameThese\\AmbientRecordings\\150234-3-0-17.wav', 'Sound-Data/renameThese\\AmbientRecordings\\150254-3-0-18.wav', 'Sound-Data/renameThese\\AmbientRecordings\\150314-3-0-19.wav', 'Sound-Data/renameThese\\AmbientRecordings\\150354-3-0-21.wav', 'Sound-Data/renameThese\\AmbientRecordings\\150514-3-0-25.wav', 'Sound-Data/renameThese\\AmbientRecordings\\150554-3-0-27.wav', 'Sound-Data/renameThese\\AmbientRecordings\\150714-3-0-31.wav', 'Sound-Data/renameThese\\AmbientRecordings\\150914-3-0-37.wav', 'Sound-Data/renameThese\\AmbientRecordings\\151114-3-0-43.wav', 'Sound-Data/renameThese\\AmbientRecordings\\151234-3-0-47.wav', 'Sound-Data/renameThese\\AmbientRe

In [8]:
print("Starting Audio parsing")
parent_dir = 'Sound-Data/renameThese'
dirs = ["Ambient_1_3","Ambient_1_4"]
features, labels = parse_audio_files(parent_dir, dirs)
print(features.shape)
print(labels.shape)
print("Done parsing audio")

Starting Audio parsing
0 Ambient_1_3
1 Ambient_1_4
(332, 193)
(332,)
Done parsing audio


In [5]:
sums = []
ratios = []
eventChanges = []
print(labels.shape)
print("features")
for feature in features:
    sums.append(sum(feature))
max_sum = max(sums)
print("sums")
for s in sums:
    diff_ratio = s / max_sum
    ratios.append(diff_ratio)
print("finding events")
for index in range(len(ratios)-1):
    if (0.95 < ratios[index] / ratios[index+1] < 1.05):
        eventChanges.append(labels[index])

(255,)
features
sums
finding events


In [7]:
print(np.array(eventChanges).shape)
print(eventChanges)

(38,)
['095010.wav', '095838.wav', '100018.wav', '100238.wav', '100338.wav', '101104.wav', '102414.wav', '143105.wav', '143511.wav', '150245.wav', '150516.wav', '150905.wav', '152400.wav', '152605.wav', '154606.wav', '154644.wav', '160755.wav', '164644.wav', '165452.wav', '165512.wav', '171511.wav', '175420.wav', '181310.wav', '182315.wav', '182435.wav', '182455.wav', '182515.wav', '182555.wav', '182644.wav', '182720.wav', '183105.wav', '183511.wav', '185601.wav', '190524.wav', '193105.wav', '194329.wav', '194429.wav', '194631.wav']
