# SpEAR-speech-database

In this notebook, we will download the data required and then extract the MFCC features from this dataset, and store it as a pytorch tensor file

First, download the data files, extract the data and save the files to the directory

In [47]:
import sys
import time
import urllib
import os
import zipfile

DATA_DIR = "../data/raw/"
FILENAME = "SpEAR"


def reporthook(count, block_size, total_size):
    global start_time
    if count == 0:
        start_time = time.time()
        return
    duration = time.time() - start_time
    progress_size = int(count * block_size)
    speed = int(progress_size / (1024 * duration))
    percent = int(count * block_size * 100 / total_size)
    sys.stdout.write("\r...%d%%, %d MB, %d KB/s, %d seconds passed" %
                    (percent, progress_size / (1024 * 1024), speed, duration))
    sys.stdout.flush()

f = urllib.request.urlretrieve("https://github.com/dingzeyuli/SpEAR-speech-database/archive/master.zip", DATA_DIR + FILENAME + ".zip", reporthook)



with zipfile.ZipFile(f[0],"r") as zip_ref:
    zip_ref.extractall(DATA_DIR)
    zip_ref.close()
    print("... data unzipped")
    

os.remove(f[0])
print("zip file deleted")

...-1606451200%, 15 MB, 1400 KB/s, 11 seconds passed... data unzipped
zip file deleted


Then, extract the MFCC features from the data. 
See https://librosa.github.io/librosa/generated/librosa.feature.mfcc.html

In [79]:
!pip install librosa

from os import listdir
import librosa
from librosa.feature import mfcc

SpEAR_NAME = "/SpEAR-speech-database-master/data/"
SpEAR_DIR = DATA_DIR + SpEAR_NAME
TIMIT = "TIMIT"

audio_file = listdir(SpEAR_DIR + "TIMIT")[0]
audio_file = "{}{}/{}".format(SpEAR_DIR, TIMIT, audio_file)



Lets hear that audio!

In [80]:
import numpy as np
import librosa
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
import os
import scipy
import librosa.display
from IPython.display import Audio

Audio(audio_file)

Add noise to the audio:

https://www.kaggle.com/huseinzol05/sound-augmentation-librosa

In [86]:
sample_rate, samples = scipy.io.wavfile.read(audio_file)
y_noise = samples.copy()
# you can take any distribution from https://docs.scipy.org/doc/numpy-1.13.0/reference/routines.random.html
noise_amp = 0.15*np.random.uniform()*np.amax(y_noise)
y_noise = y_noise.astype('float64') + noise_amp * np.random.normal(size=y_noise.shape[0])
Audio(y_noise, rate=sample_rate)

Extract the MFCC features

In [None]:
y, sr = librosa.load("{}{}/{}".format(SpEAR_DIR, TIMIT, audio_file))

mfcc_features = mfcc(y)

Saving the Features

In [76]:
import torch

PROCESSED = "../data/processed{}{}/".format(SpEAR_NAME, TIMIT)

mfcc_pt = torch.from_numpy(mfcc_features)

try:  
    os.makedirs(PROCESSED)
except OSError:  
    print ("Creation of the directory %s failed" % PROCESSED)
else:  
    print ("Successfully created the directory %s " % PROCESSED)

torch.save(mfcc_pt, "{}{}".format(PROCESSED, audio_file.replace(".wav", ".pt")))

print("features saved")

Successfully created the directory ../data/processed/SpEAR-speech-database-master/data/TIMIT/ 
