In [8]:
from __future__ import print_function, division

import os
import glob
import sys
import os
import glob
import csv

from os import walk

sys.path.append('..')

import numpy as np
import pandas as pd
from pymir import AudioFile

In [19]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [9]:
from python_speech_features import mfcc
from python_speech_features import logfbank
import scipy.io.wavfile as wav

In [21]:
# constants
SEGMENT_SIZE = 32768
N_MFCC = 32
N_CHROMA = 12

In [22]:
def get_feature_df(audio_path, person_id, person_name, person_gender, file_name, chapter_name, speech_text):
    audio_data = AudioFile.open(audio_path)
    fixed_frames = audio_data.frames(SEGMENT_SIZE, np.hamming)
    N_SEGMENTS = len(fixed_frames)
    
    spectra = [frame.spectrum() for frame in fixed_frames]

    zcr = pd.Series([frame.zcr() for frame in fixed_frames])
    centroid = pd.Series([spectrum.centroid() for spectrum in spectra])
    crest = pd.Series([spectrum.crest() for spectrum in spectra])
    mean = pd.Series([spectrum.mean() for spectrum in spectra])
    flatness = pd.Series([spectrum.flatness() for spectrum in spectra])
    kurtosis = pd.Series([spectrum.kurtosis() for spectrum in spectra])
    
    skewness = pd.Series([spectrum.skewness() for spectrum in spectra])
    variance = pd.Series([spectrum.variance() for spectrum in spectra])
    spread = pd.Series([spectrum.spread() for spectrum in spectra])
    rolloff = pd.Series([spectrum.rolloff() for spectrum in spectra])

    features = pd.DataFrame(data={
        'pid': pd.Series([person_id] * N_SEGMENTS),
        'pname': pd.Series([person_name] * N_SEGMENTS),
        'pgender': pd.Series([person_gender] * N_SEGMENTS),
        'fname': pd.Series([file_name] * N_SEGMENTS),
        'chname': pd.Series([chapter_name] * N_SEGMENTS),
        'text': pd.Series([speech_text] * N_SEGMENTS),
        'zcr': zcr,
        'centroid': centroid,
        'crest': crest,
        'mean': mean,
        'flatness': flatness,
        'kurtosis': kurtosis,
        'skewness': skewness,
        'variance': variance,
        'spread': spread,
        'rolloff': rolloff,
    })

    mfcc2 = pd.DataFrame([spectrum.mfcc2() for spectrum in spectra],
                         columns=['mfcc{}'.format(i) for i in range(N_MFCC)])
    features = features.join(mfcc2)

    chroma = pd.DataFrame([spectrum.chroma() for spectrum in spectra],
                          columns=['chroma{}'.format(i) for i in range(N_CHROMA)])
    features = features.join(chroma)
    return features

In [49]:
BASE_DIR = os.path.join("..","data","dev-clean")
reader = csv.DictReader(open("../INFO.TXT"),delimiter='|', quotechar='|')

audio_features_dataset = pd.DataFrame()

count = 0
for row in reader:
    file_path = os.path.join(BASE_DIR,row['id'],row['chapter'],row['file'])
#     print (file_path)
    df = get_feature_df(file_path, row['id'], row['name'], row['sex'], 
                        row['file'], row['chapter'], row['content'])    
    audio_features_dataset = audio_features_dataset.append(df, ignore_index=True)
#     break
#     if count > 100:
#         break
#     else:
#         count = count + 1
    
# Removes Empty Entries
# audio_features_dataset = audio_features_dataset[np.isfinite(
# audio_features_dataset['pid'])]

audio_features_dataset.to_hdf('../data_extract/features_datasetdemo.h5', key='dataset')
print("Saved features dataset in data directory.")

Saved features dataset in data directory.


# audio_features_dataset.boxplot(column='centroid', by='pgender')

In [74]:
# Usage of h5 file

import h5py
df = pd.read_hdf('../data_extract/features_dataset.h5')

In [76]:
df.describe()

Unnamed: 0,centroid,crest,flatness,kurtosis,rolloff,skewness,spread,zcr,mfcc0,mfcc1,...,chroma2,chroma3,chroma4,chroma5,chroma6,chroma7,chroma8,chroma9,chroma10,chroma11
count,10851.0,10851.0,10851.0,10859.0,10859.0,10859.0,10851.0,10859.0,10859.0,10851.0,...,10851.0,10851.0,10851.0,10851.0,10851.0,10851.0,10851.0,10851.0,10851.0,10851.0
mean,1834.217904,0.002563,0.378564,83.604751,3784.586442,6.768051,1802.726801,0.114316,-inf,-1.032598,...,0.619572,0.616183,0.621235,0.64149,0.654959,0.686236,0.688353,0.701504,0.718002,0.706642
std,683.580678,0.002099,0.140922,159.712456,1448.216873,3.715923,443.908859,0.056649,,3.276575,...,0.20824,0.203634,0.20209,0.198064,0.193803,0.197312,0.193607,0.197096,0.201016,0.199972
min,549.242288,0.00013,0.06417,-3.0,0.0,0.0,643.819857,0.0,-inf,-12.635722,...,0.092588,0.081251,0.101722,0.09519,0.102626,0.110457,0.093933,0.104799,0.098659,0.114369
25%,1356.809071,0.001536,0.267266,29.517513,2669.026549,4.661108,1456.808167,0.076584,-1.694658,-3.168564,...,0.46283,0.461419,0.471981,0.494865,0.514308,0.539023,0.548537,0.556715,0.568184,0.556661
50%,1694.945686,0.002121,0.36908,48.412889,3566.676839,5.918445,1773.331919,0.114868,5.630278,-0.815866,...,0.606171,0.60314,0.608539,0.628442,0.646473,0.682909,0.687804,0.703749,0.723234,0.704209
75%,2163.504697,0.002987,0.482141,87.121971,4876.411352,7.802806,2127.997351,0.153137,9.941875,1.228012,...,0.771339,0.76214,0.764511,0.782352,0.797715,0.83706,0.837311,0.854605,0.888749,0.872907
max,6013.424498,0.055732,0.833814,4248.572366,7702.166616,59.609428,3141.165091,0.343506,20.39174,11.147025,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
