In [3]:
import glob
import os
import pandas as pd
import numpy as np
import shutil
import librosa
from tqdm import tqdm
from os import listdir
from scipy.io import wavfile


def extract_spectrum(file_name, file_length=5, bin_factor=4, **kwargs):
    """
    file_length is the target time length of the file in seconds
    bin_factor is the amount by which we enlarge the time bins from 512 samples:
    Extract mel spectrogram from audio file `file_name`
    """
    
#     X, sample_rate = wavfile.read(file_name)
    X, sample_rate = librosa.core.load(file_name)
    target_name = file_name[:-4]
    target_length = int(file_length * sample_rate)
    target = np.zeros(target_length)
    if X.shape[0] >= target_length:
        target = X[:target_length]
    else:
        target[:X.shape[0]] = X
    X = target
    mel = librosa.feature.melspectrogram(X, sr=sample_rate, hop_length=512 * bin_factor).T
    return mel


def wav_reprocess(filename):
    """params:
    1. a path of a wav file (full or relative path)
    returns:
    2. a shape (54,128,1) matrix - mel frequency only"""
    
    file_pro = extract_spectrum(filename, mel=True)
    file_pro = file_pro[:,:,np.newaxis]
    return file_pro

def process_wav_folder_with_labels(wav_folder_path, labels_file_path):
    """params:
    1. a folder path of wav files
    2. csv file with ['gender','filename'] columns
    returns:
    X - matrix of mel frequency for all the files in the directory
    y - vector representing labels to X (0:female,1:male)"""
    
    y = pd.read_csv(labels_file_path)
    os.chdir(wav_folder_path)
    X = []
    files = y['filename'].tolist() 
    for filename in files:
          X.append(wav_reprocess(filename)) 
    X = np.asarray(X)

    gender_dict = {'female':0, 'male':1}
    y['gender'] = y['gender'].str.lower()
    y['gender'] = y['gender'].map(gender_dict)
    y = np.asarray(y['gender'], dtype = 'int32')

    return X, y

In [2]:
main_folder = 'C:/Users/Juvin/Desktop/ITC_Final_Project/ITC_FinalProject_Gender_Voice_Recognition/yuval'
wav_folder_path = 'C:/Users/Juvin/Desktop/ITC_Final_Project/ITC_FinalProject_Gender_Voice_Recognition/yuval/waves'
labels_file_path = 'C:/Users/Juvin/Desktop/ITC_Final_Project/ITC_FinalProject_Gender_Voice_Recognition/yuval/label.csv'
X, y = process_wav_folder_with_labels(wav_folder_path, labels_file_path)



ValueError: frames must be specified for non-seekable files

In [35]:
X.shape, y.shape

((21, 54, 128, 1), (21,))

In [36]:
os.chdir(main_folder)
np.save('X', X)
np.save('y', y)

In [12]:
xx = np.load('X.npy')
yy = np.load('y.npy')
xx.shape, yy.shape

((21, 54, 128, 1), (21,))

In [4]:
features= 'C:/Users/Juvin/Desktop/ITC_Final_Project/ITC_FinalProject_Gender_Voice_Recognition/Daniel/results/features.npy'
labels= 'C:/Users/Juvin/Desktop/ITC_Final_Project/ITC_FinalProject_Gender_Voice_Recognition/Daniel/results/labels.npy'

In [5]:
features = np.load(features)

In [8]:
features[0].min()

4.1435097955400124e-05

In [23]:
labels = np.load(labels)

In [25]:
labels.shape

(66938, 1)