In [1]:
import os
import wave
import time
import pickle
import pyaudio
import warnings
import numpy as np
from sklearn import preprocessing
from scipy.io.wavfile import read
import python_speech_features as mfcc
from sklearn.mixture import GaussianMixture

In [2]:
warnings.filterwarnings("ignore")

In [76]:
#paths
path_train = 'C:/Users/thats/Desktop/Semester 5/Embedded System Design/project/training/'
path_test = 'C:/Users/thats/Desktop/Semester 5/Embedded System Design/project/testing/'
path_train_model = 'C:/Users/thats/Desktop/Semester 5/Embedded System Design/project/trained_models/'

In [4]:
def create_user_audio(name):
    try:
        os.mkdir(path_train+name)
        print('Directory created successfully!!')
    except OSError as e:
        print('User already exists!!')

In [5]:
def capture_audio(iteration,path):
    for itr in range(iteration):
        FORMAT = pyaudio.paInt16
        CHANNELS = 1
        RATE = 44100
        CHUNK = 512
        RECORD_SECONDS = 10
        # device_index = 2
        audio = pyaudio.PyAudio()

        print('--------------Recording device list--------------')
        info = audio.get_host_api_info_by_index(0)
        print("info:",info)
        numDevices = info.get('deviceCount')
        for i in range(numDevices):
            if (audio.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0:
                print("Input device id:",i,"-",audio.get_device_info_by_host_api_device_index(0,i).get('name'))
        print("-------------------------------------------------")

        index = int(input('Enter id of the input device: '))
        print('Recording via index '+audio.get_device_info_by_index(index).get('name'))

        stream = audio.open(
                        format=FORMAT, 
                        channels=CHANNELS, 
                        rate=RATE,
                        input=True,
                        input_device_index=index,
                        frames_per_buffer=CHUNK
                    )

        print("recording started...")
        recordFrames = []

        for j in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
            data = stream.read(CHUNK)
            recordFrames.append(data)
            print('cnt:',j)
        print("Recording stopped....")
        # print(recordFrames)
        stream.stop_stream()
        stream.close()
        audio.terminate()

        output_filename = str(itr)+'.wav'
        wave_output_file = os.path.join(path,output_filename)
        waveFile = wave.open(wave_output_file, 'wb')
        waveFile.setnchannels(CHANNELS)
        waveFile.setsampwidth(audio.get_sample_size(FORMAT))
        waveFile.setframerate(RATE)
        waveFile.writeframes(b''.join(recordFrames))
        waveFile.close()

In [6]:
def calculate_delta(array):
    rows,cols = array.shape
    # print(rows, cols)
    deltas = np.zeros((rows, 20))
    n = 2
    for i in range(rows):
        index = []
        j = 1
        while j <= n:
            if i-j < 0:
                first = 0
            else:
                first = i-j
            if i+j > rows-1:
                second = rows-1
            else:
                second = i+j
            index.append((second,first))
            j += 1
        deltas[i] = (array[index[0][0]]-array[index[0][1]] + (2*(array[index[1][0]]-array[index[1][1]])))/10
    return deltas

In [7]:
def extract_features(audio, rate):
    mfcc_feature = mfcc.mfcc(audio,rate,0.025,0.01,20,nfft=1200,appendEnergy=True)
    mfcc_feature = preprocessing.scale(mfcc_feature)
    # print(mfcc_feature)
    delta = calculate_delta(mfcc_feature)
    combined = np.hstack((mfcc_feature,delta))
    return combined

In [74]:
def train_Model(source, destination):
    count = 1
    features = np.asarray(())

    for subdirectory in os.listdir(source):
        path = source+subdirectory
        for file in os.listdir(path):
            file_path = path+'/'+file
            sr,audio = read(file_path)
            vector = extract_features(audio,sr)
            if features.size == 0:
                features = vector
            else:
                features = np.vstack((features,vector))
            if count == 2:
                gmm = GaussianMixture(n_components=3, max_iter=200, covariance_type='diag')
                gmm.fit(features)

                pickleFile = str(subdirectory)+'.gmm'
                pickle.dump(gmm,open(destination+pickleFile,'wb'))
                print('modelling completed for speaker',subdirectory,'with data point',features.shape)
                features = np.asarray(())
                count = 0
            count += 1


In [97]:
def test_model(source):
    models = []
    speakers = []
    
    for model in os.listdir(path_train_model):
        path = path_train_model+model
        speaker = model.split('.gmm')[0]
        speakers.append(speaker)
        models.append(pickle.load(open(path,'rb')))

    for file in os.listdir(source):
        sr,audio = read(source+file)
        vector = extract_features(audio,sr)

        likelihood = np.zeros(len(models))

        for i in range(len(models)):
            gmm = models[i]
            scores = np.array(gmm.score(vector))
            likelihood[i] = scores.sum()
        
        predicted_speaker = np.argmax(likelihood)
        print("Speaker detected as",speakers[predicted_speaker])

In [96]:
choice = int(input("1.Create account\n2.Extract feature for users\n3.Authenticate user"))

In [103]:
if choice == 1:
    total_user = os.listdir(path_train)
    name = input('Enter your name:')
    if name in total_user:
        print('user already exists')
    else:
        create_user_audio(name)
        capture_audio(2,path_train+name)
if choice == 2:
    train_Model(path_train,path_train_model)
if choice == 3:
    name = input("Enter your name:")
    total_user = os.listdir(path_train)
    if name not in total_user:
        print("User doesnot exist. Register yourself first!!!")
    else:
        capture_audio(1,path_test)
        time.sleep(5)
        test_model(path_test)
        

--------------Recording device list--------------
info: {'index': 0, 'structVersion': 1, 'type': 2, 'name': 'MME', 'deviceCount': 5, 'defaultInputDevice': 1, 'defaultOutputDevice': 3}
Input device id: 0 - Microsoft Sound Mapper - Input
Input device id: 1 - Microphone (Realtek(R) Audio)
-------------------------------------------------
Recording via index Microphone (Realtek(R) Audio)
recording started...
cnt: 0
cnt: 1
cnt: 2
cnt: 3
cnt: 4
cnt: 5
cnt: 6
cnt: 7
cnt: 8
cnt: 9
cnt: 10
cnt: 11
cnt: 12
cnt: 13
cnt: 14
cnt: 15
cnt: 16
cnt: 17
cnt: 18
cnt: 19
cnt: 20
cnt: 21
cnt: 22
cnt: 23
cnt: 24
cnt: 25
cnt: 26
cnt: 27
cnt: 28
cnt: 29
cnt: 30
cnt: 31
cnt: 32
cnt: 33
cnt: 34
cnt: 35
cnt: 36
cnt: 37
cnt: 38
cnt: 39
cnt: 40
cnt: 41
cnt: 42
cnt: 43
cnt: 44
cnt: 45
cnt: 46
cnt: 47
cnt: 48
cnt: 49
cnt: 50
cnt: 51
cnt: 52
cnt: 53
cnt: 54
cnt: 55
cnt: 56
cnt: 57
cnt: 58
cnt: 59
cnt: 60
cnt: 61
cnt: 62
cnt: 63
cnt: 64
cnt: 65
cnt: 66
cnt: 67
cnt: 68
cnt: 69
cnt: 70
cnt: 71
cnt: 72
cnt: 73
cnt: 74
cn