# Gender Classificaton


## Imports

In [1]:
import os
import sys
from scrape import get_remote_tgz_files, download_extract_files
from parse import paths, parse_readme
from pathlib import Path
import sqlite3
import librosa
import librosa.display
import IPython.display
import numpy

## Input variables

In [2]:
SOURCE_URL = '''http://www.repository.voxforge1.org/downloads/SpeechCorpus/Trunk/Audio/Main/16kHz_16bit/'''
DATA_DIR = '''data'''
DB = '''genderclass.db'''

## Scrape data
Only files not already downloaded and extracted will be fetched.

In [3]:
data_folder = Path(DATA_DIR)
urls = get_remote_tgz_files(SOURCE_URL)
download_extract_files(urls, data_folder)

## Create DB to hold features

In [None]:
conn = sqlite3.connect(DB)
db = conn.cursor()
db.execute('''DROP TABLE IF EXISTS features''')
db.execute('''CREATE TABLE features
                     (file text, 
                      path text, 
                      female integer, 
                      age text, 
                      language text, 
                      dialect text,
                      cepstrum blob,
                      n_mfcc integer,
                      n_frames integer)''')
conn.commit()
conn.close()

## Extract features
Silence trimmed at beginning and end of each file.

MFCC extracted (with default librosa settings).

Save into database.

In [None]:
import datetime
print(datetime.datetime.now())

data_folder = Path(DATA_DIR)
conn = sqlite3.connect(DB)
db = conn.cursor()
samples = os.listdir(data_folder)

skipped = []
for sample in samples:
    (wav_folder, readme_path) = paths(data_folder, sample)

    if(wav_folder is None or readme_path is None):
        skipped.append(wav_folder)
        continue;

    meta = parse_readme(readme_path)
    if(not 'gender' in meta):
        skipped.append(wav_folder)
        continue;
    try:
        wavs = os.listdir(wav_folder)
        for wav in wavs:
            y, fs = librosa.load(wav_folder / wav, sr=None)
            y, idx = librosa.effects.trim(y)
            mfcc = librosa.feature.mfcc(y, fs)

            db.execute('''INSERT INTO features VALUES
             (?,?,?,?,?,?,?,?,?)''', 
                (wav[:-4], 
                str(wav_folder), 
                meta['gender'] == 'female', 
                meta.get('age range', ''), 
                meta.get('language', ''), 
                meta.get('pronunciation dialect',''),
                bytes(memoryview(mfcc)),
                mfcc.shape[0],
                mfcc.shape[1]))

        conn.commit()
    except Exception as e:
        print(wav_folder)
        print("type error: " + str(e))
        print(traceback.format_exc())



conn.close()
print(datetime.datetime.now())
print(len(skipped))

2019-03-25 21:43:11.750068


In [None]:
## remember taht you might have to transpose mfcc to train!

In [None]:
## Possibly rewisit narrower frames:
#spec = librosa.feature.melspectrogram(y=y, sr=fs, S=None, n_fft=400, hop_length=160, power=2.0)
#mfcc1 = librosa.feature.mfcc(y, fs, S=numpy.log(spec))

#librosa.display.specshow(mfcc1, sr=fs, x_axis='time', hop_length=160)
#librosa.display.specshow(mfcc2, sr=fs, x_axis='time')

#IPython.display.Audio(y, rate=fs)




#db.execute('''SELECT * FROM features''')
#res = db.fetchall()
#print(res)
#mfcc2 = numpy.frombuffer(res[0][6])
#mfcc2 = numpy.reshape(mfcc2,(res[0][7], res[0][8]))
#librosa.display.specshow(mfcc2, sr=16000, x_axis='time')
#break