In [75]:
from __future__ import print_function, division

import os
import glob
import sys
import os
import glob
import csv

from os import walk

sys.path.append('..')

import numpy as np
import pandas as pd
from pymir import AudioFile

In [76]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [77]:
# constants
N_MFCC = 32
N_CHROMA = 12
FRAME_SIZE = 1

In [87]:
def get_feature_df(audio_path, person_id, person_name, person_gender, file_name, chapter_name, speech_text, SEGMENT_TIME = 0.1):
    audio_data = AudioFile.open(audio_path)
    
    # Nearest larger power of 2
    SEGMENT_SIZE = int(2**math.ceil(
                    math.log(SEGMENT_TIME*audio_data.sampleRate*FRAME_SIZE,2)))
    fixed_frames = audio_data.frames(SEGMENT_SIZE, np.hamming)
    
    # Avoid last empty frame
    fixed_frames = fixed_frames[:-1]
    N_SEGMENTS = len(fixed_frames)
    
    spectra = [frame.spectrum() for frame in fixed_frames]

    zcr = pd.Series([frame.zcr() for frame in fixed_frames])
    centroid = pd.Series([spectrum.centroid() for spectrum in spectra])
    crest = pd.Series([spectrum.crest() for spectrum in spectra])
    mean = pd.Series([spectrum.mean() for spectrum in spectra])
    flatness = pd.Series([spectrum.flatness() for spectrum in spectra])
    kurtosis = pd.Series([spectrum.kurtosis() for spectrum in spectra])
    
    skewness = pd.Series([spectrum.skewness() for spectrum in spectra])
    variance = pd.Series([spectrum.variance() for spectrum in spectra])
    spread = pd.Series([spectrum.spread() for spectrum in spectra])
    rolloff = pd.Series([spectrum.rolloff() for spectrum in spectra])

    features = pd.DataFrame(data={
        'pid': pd.Series([person_id] * N_SEGMENTS),
        'pname': pd.Series([person_name] * N_SEGMENTS),
        'pgender': pd.Series([person_gender] * N_SEGMENTS),
        'fname': pd.Series([file_name] * N_SEGMENTS),
        'chname': pd.Series([chapter_name] * N_SEGMENTS),
        'text': pd.Series([speech_text] * N_SEGMENTS),
        'zcr': zcr,
        'centroid': centroid,
        'crest': crest,
        'mean': mean,
        'flatness': flatness,
        'kurtosis': kurtosis,
        'skewness': skewness,
        'variance': variance,
        'spread': spread,
        'rolloff': rolloff,
    })

    mfcc2 = pd.DataFrame([spectrum.mfcc2()[:N_MFCC] for spectrum in spectra],
                         columns=['mfcc{}'.format(i) for i in range(N_MFCC)])
    features = features.join(mfcc2)

    chroma = pd.DataFrame([spectrum.chroma()[:N_CHROMA] for spectrum in spectra],
                          columns=['chroma{}'.format(i) for i in range(N_CHROMA)])
    features = features.join(chroma)
    return features

In [89]:
def create_db(filepath, SEGMENT_TIME = 0.1, test = False):
    BASE_DIR = os.path.join("..","data","dev-clean")
    reader = csv.DictReader(open("../data_extract/INFO.TXT"),delimiter='|', quotechar='|')

    audio_features_dataset = pd.DataFrame()
    count = 0
    for row in reader:
        file_path = os.path.join(BASE_DIR,row['id'],row['chapter'],row['file'])
        df = get_feature_df(file_path, row['id'], row['name'], row['sex'], 
                            row['file'], row['chapter'], row['content'], SEGMENT_TIME)    
        audio_features_dataset = audio_features_dataset.append(df, ignore_index=True)
        if(count % 10000 == 0):
            print (count,file_path)
        count = count + 1
        if test:
            break

    audio_features_dataset.to_hdf(filepath, key='dataset')
    print("DB Created")

In [90]:
# DB with 100 ms segments
N_MFCC = 14
path2 = '../data_extract/features_dataset_2.h5'
create_db(path2,0.1)

0 ../data/dev-clean/84/121123/84-121123-0016.wav


  return (numerator * 1.0) / denominator
  return maxSpectrum / spectralSum
  log_a = np.log(a)
  return geometricMean / arithmeticMean
  return nx.log(x)


DB Created


In [68]:
# Initial DB
# path1 = '../data_extract/features_datasetdemo.h5'
# create_db(path1, True)