In [3]:
import pdb
import errno
import os
from os import path
import sys
import tarfile
import fnmatch
import pandas as pd
import subprocess
import argparse
from mapping import phone_maps
import python_speech_features as psf
import scipy.io.wavfile as wav
import numpy as np
from pathlib import Path
timit_phone_map = phone_maps(mapping_file="kaldi_60_48_39.map")

In [4]:
def clean(word):
    # LC ALL & strip punctuation which are not required
    new = word.lower().replace('.', '')
    new = new.replace(',', '')
    new = new.replace(';', '')
    new = new.replace('"', '')
    new = new.replace('!', '')
    new = new.replace('?', '')
    new = new.replace(':', '')
    new = new.replace('-', '')
    return new

def compute_mfcc(wav_file, n_delta=0):
    mfcc_feat = psf.mfcc(wav_file)
    if(n_delta == 0):
        return(mfcc_feat)
    elif(n_delta == 1):
        return(np.hstack((mfcc_feat, psf.delta(mfcc_feat,1))))
    elif(n_delta == 2):
        return(np.hstack((mfcc_feat, psf.delta(mfcc_feat,1), psf.delta(mfcc_feat, 2))))
    else:
        return 0

def read_transcript(full_wav):
    trans_file = full_wav[:-8] + ".PHN"
    with open(trans_file, "r") as file:
        trans = file.readlines()
    durations = [ele.strip().split(" ")[:-1] for ele in trans]
    durations_int = []
    for duration in durations:
        durations_int.append([int(duration[0]), int(duration[1])])
    trans = [ele.strip().split(" ")[-1] for ele in trans]
    trans = [timit_phone_map.map_symbol_reduced(symbol=phoneme) for phoneme in trans]
    # trans = " ".join(trans)
    return trans, durations_int

def _preprocess_data(args):
    target = args.timit
    print(target)
    preprocessed = args.preprocessed
    preprocessed=False #have troubles get correct value from cmd
    print("Preprocessing data")
    print(preprocessed)
    # Assume data is downloaded from LDC - https://catalog.ldc.upenn.edu/ldc93s1
    # We convert the .WAV (NIST sphere format) into MSOFT .wav
    # creates _rif.wav as the new .wav file
    if(preprocessed):
        print("Data is already preprocessed, just gonna read it")
    full_wavs = []
    filenames=list(Path(target).rglob("*.WAV"))
    for filename in filenames:
        if str(filename).endswith('.WAV'): #windows rglob is case insesnitive
            sph_file = str(filename)
            wav_file = str(filename)[:-4] + "_rif.wav"
            print("converting {} to {}".format(sph_file, wav_file))
            if(not preprocessed):
                subprocess.check_call(["sox", sph_file, wav_file])
            full_wavs.append(wav_file)

    print("Preprocessing Complete")
    print("Building features")

    mfcc_features = []
    mfcc_labels = []

    for full_wav in full_wavs:
        print("Computing features for file: ", full_wav)
        trans, durations = read_transcript(full_wav = full_wav)
        n_delta = int(args.n_delta)
        labels = []

        (sample_rate,wav_file) = wav.read(full_wav)
        mfcc_feats = compute_mfcc(wav_file[durations[0][0]:durations[0][1]], n_delta=n_delta)

        for i in range(len(mfcc_feats)):
                labels.append(trans[0])
        for index, chunk in enumerate(durations[1:]):
            mfcc_feat = compute_mfcc(wav_file[chunk[0]:chunk[1]], n_delta=n_delta)
            mfcc_feats = np.vstack((mfcc_feats, mfcc_feat))
            for i in range(len(mfcc_feat)):
                labels.append(trans[index])
        mfcc_features.extend(mfcc_feats)
        mfcc_labels.extend(labels)
    #Possibly separate features phone-wise and dump them? (np.where() could be used)
    timit_df = pd.DataFrame()
    timit_df["features"] = mfcc_features
    timit_df["labels"] = mfcc_labels
    timit_df.to_hdf(args.features_file, "timit", encoding='UTF-8')

In [5]:
class Args:
    timit='./TIMIT/TRAIN'
    preprocessed=False
    n_delta=0
    features_file="./features/mfcc/timit.hdf"
    
class ArgsTest:
    timit='./TIMIT/TEST'
    preprocessed=False
    n_delta=0
    features_file="./features/mfcc_test/timit.hdf"
    

In [7]:
if __name__ == "__main__":

#     parser = argparse.ArgumentParser()

#     parser.add_argument('--timit', type=str, default="",
#                        help='TIMIT root directory')
#     parser.add_argument('--n_delta', type=str, default="0",
#                        help='Number of delta features to compute')
#     parser.add_argument('--preprocessed', type=bool, default=False,
#                        help='Set to True if already preprocessed')

#     args = parser.parse_args()
#     print(args)
#     print("TIMIT path is: ", args.timit)
    args=Args
    args_test=ArgsTest
    
    _preprocess_data(args)
    _preprocess_data(args_test)
    print("Completed")


./TIMIT/TRAIN
Preprocessing data
False
converting TIMIT\TRAIN\DR1\FCJF0\SA1.WAV to TIMIT\TRAIN\DR1\FCJF0\SA1_rif.wav
converting TIMIT\TRAIN\DR1\FDAW0\SA1.WAV to TIMIT\TRAIN\DR1\FDAW0\SA1_rif.wav
converting TIMIT\TRAIN\DR1\FDAW0\SA2.WAV to TIMIT\TRAIN\DR1\FDAW0\SA2_rif.wav
converting TIMIT\TRAIN\DR1\FDAW0\SI1271.WAV to TIMIT\TRAIN\DR1\FDAW0\SI1271_rif.wav
converting TIMIT\TRAIN\DR1\FDAW0\SI1406.WAV to TIMIT\TRAIN\DR1\FDAW0\SI1406_rif.wav
converting TIMIT\TRAIN\DR1\FDAW0\SI2036.WAV to TIMIT\TRAIN\DR1\FDAW0\SI2036_rif.wav
converting TIMIT\TRAIN\DR1\FDAW0\SX146.WAV to TIMIT\TRAIN\DR1\FDAW0\SX146_rif.wav
converting TIMIT\TRAIN\DR1\FDAW0\SX236.WAV to TIMIT\TRAIN\DR1\FDAW0\SX236_rif.wav
converting TIMIT\TRAIN\DR1\FDAW0\SX326.WAV to TIMIT\TRAIN\DR1\FDAW0\SX326_rif.wav
converting TIMIT\TRAIN\DR1\FDAW0\SX416.WAV to TIMIT\TRAIN\DR1\FDAW0\SX416_rif.wav
converting TIMIT\TRAIN\DR1\FDAW0\SX56.WAV to TIMIT\TRAIN\DR1\FDAW0\SX56_rif.wav
converting TIMIT\TRAIN\DR1\FDML0\SA1.WAV to TIMIT\TRAIN\DR1\FDML0\S

converting TIMIT\TRAIN\DR1\FTBR0\SX201.WAV to TIMIT\TRAIN\DR1\FTBR0\SX201_rif.wav
converting TIMIT\TRAIN\DR1\FTBR0\SX21.WAV to TIMIT\TRAIN\DR1\FTBR0\SX21_rif.wav
converting TIMIT\TRAIN\DR1\FTBR0\SX291.WAV to TIMIT\TRAIN\DR1\FTBR0\SX291_rif.wav
converting TIMIT\TRAIN\DR1\FTBR0\SX381.WAV to TIMIT\TRAIN\DR1\FTBR0\SX381_rif.wav
converting TIMIT\TRAIN\DR1\FVFB0\SA1.WAV to TIMIT\TRAIN\DR1\FVFB0\SA1_rif.wav
converting TIMIT\TRAIN\DR1\FVFB0\SA2.WAV to TIMIT\TRAIN\DR1\FVFB0\SA2_rif.wav
converting TIMIT\TRAIN\DR1\FVFB0\SI1032.WAV to TIMIT\TRAIN\DR1\FVFB0\SI1032_rif.wav
converting TIMIT\TRAIN\DR1\FVFB0\SI1510.WAV to TIMIT\TRAIN\DR1\FVFB0\SI1510_rif.wav
converting TIMIT\TRAIN\DR1\FVFB0\SI2292.WAV to TIMIT\TRAIN\DR1\FVFB0\SI2292_rif.wav
converting TIMIT\TRAIN\DR1\FVFB0\SX132.WAV to TIMIT\TRAIN\DR1\FVFB0\SX132_rif.wav
converting TIMIT\TRAIN\DR1\FVFB0\SX222.WAV to TIMIT\TRAIN\DR1\FVFB0\SX222_rif.wav
converting TIMIT\TRAIN\DR1\FVFB0\SX312.WAV to TIMIT\TRAIN\DR1\FVFB0\SX312_rif.wav
converting TIMIT\TRA

converting TIMIT\TRAIN\DR1\MKLW0\SA2.WAV to TIMIT\TRAIN\DR1\MKLW0\SA2_rif.wav
converting TIMIT\TRAIN\DR1\MKLW0\SI1571.WAV to TIMIT\TRAIN\DR1\MKLW0\SI1571_rif.wav
converting TIMIT\TRAIN\DR1\MKLW0\SI1844.WAV to TIMIT\TRAIN\DR1\MKLW0\SI1844_rif.wav
converting TIMIT\TRAIN\DR1\MKLW0\SI2201.WAV to TIMIT\TRAIN\DR1\MKLW0\SI2201_rif.wav
converting TIMIT\TRAIN\DR1\MKLW0\SX131.WAV to TIMIT\TRAIN\DR1\MKLW0\SX131_rif.wav
converting TIMIT\TRAIN\DR1\MKLW0\SX221.WAV to TIMIT\TRAIN\DR1\MKLW0\SX221_rif.wav
converting TIMIT\TRAIN\DR1\MKLW0\SX311.WAV to TIMIT\TRAIN\DR1\MKLW0\SX311_rif.wav
converting TIMIT\TRAIN\DR1\MKLW0\SX401.WAV to TIMIT\TRAIN\DR1\MKLW0\SX401_rif.wav
converting TIMIT\TRAIN\DR1\MKLW0\SX41.WAV to TIMIT\TRAIN\DR1\MKLW0\SX41_rif.wav
converting TIMIT\TRAIN\DR1\MMGG0\SA1.WAV to TIMIT\TRAIN\DR1\MMGG0\SA1_rif.wav
converting TIMIT\TRAIN\DR1\MMGG0\SA2.WAV to TIMIT\TRAIN\DR1\MMGG0\SA2_rif.wav
converting TIMIT\TRAIN\DR1\MMGG0\SI1079.WAV to TIMIT\TRAIN\DR1\MMGG0\SI1079_rif.wav
converting TIMIT\TRAIN

converting TIMIT\TRAIN\DR1\MRWS0\SX22.WAV to TIMIT\TRAIN\DR1\MRWS0\SX22_rif.wav
converting TIMIT\TRAIN\DR1\MRWS0\SX292.WAV to TIMIT\TRAIN\DR1\MRWS0\SX292_rif.wav
converting TIMIT\TRAIN\DR1\MRWS0\SX382.WAV to TIMIT\TRAIN\DR1\MRWS0\SX382_rif.wav
converting TIMIT\TRAIN\DR1\MTJS0\SA1.WAV to TIMIT\TRAIN\DR1\MTJS0\SA1_rif.wav
converting TIMIT\TRAIN\DR1\MTJS0\SA2.WAV to TIMIT\TRAIN\DR1\MTJS0\SA2_rif.wav
converting TIMIT\TRAIN\DR1\MTJS0\SI1192.WAV to TIMIT\TRAIN\DR1\MTJS0\SI1192_rif.wav
converting TIMIT\TRAIN\DR1\MTJS0\SI1822.WAV to TIMIT\TRAIN\DR1\MTJS0\SI1822_rif.wav
converting TIMIT\TRAIN\DR1\MTJS0\SI562.WAV to TIMIT\TRAIN\DR1\MTJS0\SI562_rif.wav
converting TIMIT\TRAIN\DR1\MTJS0\SX112.WAV to TIMIT\TRAIN\DR1\MTJS0\SX112_rif.wav
converting TIMIT\TRAIN\DR1\MTJS0\SX202.WAV to TIMIT\TRAIN\DR1\MTJS0\SX202_rif.wav
converting TIMIT\TRAIN\DR1\MTJS0\SX22.WAV to TIMIT\TRAIN\DR1\MTJS0\SX22_rif.wav
converting TIMIT\TRAIN\DR1\MTJS0\SX292.WAV to TIMIT\TRAIN\DR1\MTJS0\SX292_rif.wav
converting TIMIT\TRAIN\D

Computing features for file:  TIMIT\TRAIN\DR1\FMEM0\SA1_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\FMEM0\SA2_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\FMEM0\SI1377_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\FMEM0\SI2007_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\FMEM0\SI747_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\FMEM0\SX117_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\FMEM0\SX207_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\FMEM0\SX297_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\FMEM0\SX333_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\FMEM0\SX387_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\FSAH0\SA1_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\FSAH0\SA2_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\FSAH0\SI1244_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\FSAH0\SI1874_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\FSAH0\SI614_rif.wav
Computing feat

Computing features for file:  TIMIT\TRAIN\DR1\MJWT0\SA1_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MJWT0\SA2_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MJWT0\SI1291_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MJWT0\SI1381_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MJWT0\SI751_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MJWT0\SX121_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MJWT0\SX211_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MJWT0\SX301_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MJWT0\SX31_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MJWT0\SX391_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MKLS0\SA1_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MKLS0\SA2_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MKLS0\SI1437_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MKLS0\SI1533_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MKLS0\SI2067_rif.wav
Computing feat

Computing features for file:  TIMIT\TRAIN\DR1\MRWS0\SX292_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MRWS0\SX382_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MTJS0\SA1_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MTJS0\SA2_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MTJS0\SI1192_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MTJS0\SI1822_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MTJS0\SI562_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MTJS0\SX112_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MTJS0\SX202_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MTJS0\SX22_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MTJS0\SX292_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MTJS0\SX382_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MTPF0\SA1_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MTPF0\SA2_rif.wav
Computing features for file:  TIMIT\TRAIN\DR1\MTPF0\SI1235_rif.wav
Computing featur

converting TIMIT\TEST\DR1\MREB0\SI1375.WAV to TIMIT\TEST\DR1\MREB0\SI1375_rif.wav
converting TIMIT\TEST\DR1\MREB0\SI2005.WAV to TIMIT\TEST\DR1\MREB0\SI2005_rif.wav
converting TIMIT\TEST\DR1\MREB0\SI745.WAV to TIMIT\TEST\DR1\MREB0\SI745_rif.wav
converting TIMIT\TEST\DR1\MREB0\SX115.WAV to TIMIT\TEST\DR1\MREB0\SX115_rif.wav
converting TIMIT\TEST\DR1\MREB0\SX205.WAV to TIMIT\TEST\DR1\MREB0\SX205_rif.wav
converting TIMIT\TEST\DR1\MREB0\SX25.WAV to TIMIT\TEST\DR1\MREB0\SX25_rif.wav
converting TIMIT\TEST\DR1\MREB0\SX295.WAV to TIMIT\TEST\DR1\MREB0\SX295_rif.wav
converting TIMIT\TEST\DR1\MREB0\SX385.WAV to TIMIT\TEST\DR1\MREB0\SX385_rif.wav
converting TIMIT\TEST\DR1\MRJO0\SA1.WAV to TIMIT\TEST\DR1\MRJO0\SA1_rif.wav
converting TIMIT\TEST\DR1\MRJO0\SA2.WAV to TIMIT\TEST\DR1\MRJO0\SA2_rif.wav
converting TIMIT\TEST\DR1\MRJO0\SI1364.WAV to TIMIT\TEST\DR1\MRJO0\SI1364_rif.wav
converting TIMIT\TEST\DR1\MRJO0\SI1624.WAV to TIMIT\TEST\DR1\MRJO0\SI1624_rif.wav
converting TIMIT\TEST\DR1\MRJO0\SI734.WAV 

Computing features for file:  TIMIT\TEST\DR1\MRJO0\SA2_rif.wav
Computing features for file:  TIMIT\TEST\DR1\MRJO0\SI1364_rif.wav
Computing features for file:  TIMIT\TEST\DR1\MRJO0\SI1624_rif.wav
Computing features for file:  TIMIT\TEST\DR1\MRJO0\SI734_rif.wav
Computing features for file:  TIMIT\TEST\DR1\MRJO0\SX104_rif.wav
Computing features for file:  TIMIT\TEST\DR1\MRJO0\SX14_rif.wav
Computing features for file:  TIMIT\TEST\DR1\MRJO0\SX194_rif.wav
Computing features for file:  TIMIT\TEST\DR1\MRJO0\SX284_rif.wav
Computing features for file:  TIMIT\TEST\DR1\MRJO0\SX374_rif.wav
Computing features for file:  TIMIT\TEST\DR1\MSJS1\SA1_rif.wav
Computing features for file:  TIMIT\TEST\DR1\MSJS1\SA2_rif.wav
Computing features for file:  TIMIT\TEST\DR1\MSJS1\SI1899_rif.wav
Computing features for file:  TIMIT\TEST\DR1\MSJS1\SI639_rif.wav
Computing features for file:  TIMIT\TEST\DR1\MSJS1\SI869_rif.wav
Computing features for file:  TIMIT\TEST\DR1\MSJS1\SX189_rif.wav
Computing features for file: 