In [1]:
from tensorflow import keras

import tensorflow as tf
import os
from os.path import isfile, join
import numpy as np
import shutil
from tensorflow import keras
from pathlib import Path
from IPython.display import display, Audio
import subprocess

In [2]:
model = keras.models.load_model('models/train.h5')

def predict(path, labels):
    test = paths_and_labels_to_dataset(path, labels)
    
    test = test.shuffle(buffer_size = batch_size * 8, seed = shuffle_seed).batch(batch_size)
    test = test.prefetch(tf.data.experimental.AUTOTUNE)
    
    test = test.map(lambda x, y: (add_noise(x, noises, scale = scale), y))
    
    for audios, labels in test.take(1):
        ffts = audio_to_fft(audios)
        y_pred = model.predict(ffts)
        rnd = np.random.randint(0, 1, 1)
        audios = audios.numpy()[rnd, :]
        labels = labels.numpy()[rnd]
        y_pred = np.argmax(y_pred, axis = -1)[rnd]
        
    for index in range(1):
        print(
            "Speaker:\33{} {}\33[0m\tPredicted:\33{} {}\33[0m".format(
                "[92m", y_pred[index],
                "[92m", y_pred[index]
            )
        )
        print("Speaker Predicted : ", class_names[y_pred[index]])
        
    return class_names[y_pred[index]]



In [3]:
def paths_and_labels_to_dataset(audio_paths, labels):
    path_ds = tf.data.Dataset.from_tensor_slices(audio_paths)
    audio_ds = path_ds.map(lambda x: path_to_audio(x))
    label_ds = tf.data.Dataset.from_tensor_slices(labels)
    return tf.data.Dataset.zip((audio_ds, label_ds))

In [4]:
def path_to_audio(path):
    audio = tf.io.read_file(path)
    audio, _ = tf.audio.decode_wav(audio, 1, sample_rate)
    return audio

sample_rate = 16000
valid_split = 0.1
shuffle_seed = 43
scale = 0.5
batch_size = 128
epochs = 15

In [5]:
data_dictionary = "data"
audio_folder = "audio"
noise_folder = "noise"

audio_path = os.path.join(data_dictionary, audio_folder)
noise_path = os.path.join(data_dictionary, noise_folder)

In [6]:
noise_paths = []
for subdir in os.listdir(noise_path):
    subdir_path = Path(noise_path) / subdir
    if os.path.isdir(subdir_path):
        noise_paths +=[
            os.path.join(subdir_path, filepath)
            for filepath in os.listdir(subdir_path)
            if filepath.endswith(".wav")
        ]

In [7]:
class_names = os.listdir(audio_path)
class_names.remove('.DS_Store')
print(class_names, )

audio_paths = []
labels = []
for label, name in enumerate(class_names):
    print("Speaker: ", (name))
    dir_path = Path(audio_path) / name
    speaker_sample_paths = [
        os.path.join(dir_path, filepath)
        for filepath in os.listdir(dir_path)
        if filepath.endswith(".wav")
    ]
    audio_paths += speaker_sample_paths
    labels += [label] * len(speaker_sample_paths)

['fem_speaker', 'axb_speaker', 'vishnu', 'gka_speaker', 'slp_speaker', 'ljm_speaker', 'ahw_speaker', 'eey_speaker']
Speaker:  fem_speaker
Speaker:  axb_speaker
Speaker:  vishnu
Speaker:  gka_speaker
Speaker:  slp_speaker
Speaker:  ljm_speaker
Speaker:  ahw_speaker
Speaker:  eey_speaker


In [8]:
def load_noise_samples(path):
    sample, sampling_rate = tf.audio.decode_wav(tf.io.read_file(path), desired_channels = 1)
    print("sampling rate of original audio", sampling_rate)
    if sampling_rate == sample_rate:
        print("shape", sample.shape[0])
        slices = int(sample.shape[0] / sample_rate)
        print(slices)
        sample = tf.split(sample[: slices * sample_rate], slices)
        return sample
    else:
        print("sampling rate for ", path, " is correct")
        return None
    
noises = []
for path in noise_paths:
    sample = load_noise_samples(path)
    if sample:
        noises.extend(sample)
noises = tf.stack(noises)

sampling rate of original audio tf.Tensor(16000, shape=(), dtype=int32)
shape 1438464
89


In [9]:
def add_noise(audio, noises = None, scale = 0.5):
    if noises is not None:
        tf_rnd = tf.random.uniform(
            (tf.shape(audio)[0], ), 0, noises.shape[0], dtype = tf.int32    
        )
        noise = tf.gather(noises, tf_rnd, axis = 0)
        
        prop = tf.math.reduce_max(audio, axis = 1) / tf.math.reduce_max(noise, axis = 1)
        prop = tf.repeat(tf.expand_dims(prop, axis = 1), tf.shape(audio)[1], axis = 1)
        
        audio = audio + noise * prop * scale
        
    return audio

In [10]:
def audio_to_fft(audio):
    audio = tf.squeeze(audio, axis = -1)
    fft = tf.signal.fft(
        tf.cast(tf.complex(real = audio, imag = tf.zeros_like(audio)), tf.complex64)
    )
    fft = tf.expand_dims(fft, axis = -1)
    
    return tf.math.abs(fft[:, : (audio.shape[1] // 2), :])

In [11]:
path = ["data/audio/ahw_speaker/arctic_a0001.wav"]
labels = ["unknown"]
predict(path, labels)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
Speaker:[92m 6[0m	Predicted:[92m 6[0m
Speaker Predicted :  ahw_speaker


2024-04-27 12:43:46.705139: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


'ahw_speaker'

The GUI for recording

In [12]:
import os
import wave
import time
import threading
import tkinter as tk
import pyaudio

import shutil

In [13]:
class VoiceRecorder():
    def __init__(self):
        self.root = tk.Tk()
        self.root.resizable(False, False)
        self.button = tk.Button(text = "rec🔴rd", font = ("Arial", 120, "bold"), command = self.click_handler)
        self.button.pack()
        self.label = tk.Label(text = "00:00:00")
        self.label.pack()
        self.recording = False
        self.root.mainloop()

    def click_handler(self):
        if self.recording:
            self.recording = False
            self.button.config(fg = "black")
        else:
            self.recording = True
            self.button.config(fg = "red")
            threading.Thread(target = self.record).start()

    def record(self):
        audio = pyaudio.PyAudio()
        stream = audio.open(format = pyaudio.paInt16, channels = 1, rate = 16000, input = True, frames_per_buffer = 1024)
        frames = []

        start = time.time()

        while self.recording:
            data = stream.read(1024)
            frames.append(data)

            passed = time.time() - start
            secs = passed%60
            mins = passed // 60
            hours = mins // 60
            self.label.config(text = f"{int(hours) : 02d}:{int(mins) : 02d}:{int(secs)  : 02d}")

        stream.stop_stream()
        stream.close()
        audio.terminate()

        exists = True
        global i
        i = 1
        while exists:
            if os.path.exists(f"data/Recordings/recording{i}.wav"):
                i += 1
            else:
                exists = False

        sound_file = wave.open(f"recording{i}.wav", "wb")
        sound_file.setnchannels(1)
        sound_file.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
        sound_file.setframerate(16000)
        sound_file.writeframes(b''.join(frames))
        sound_file.close()
        
        shutil.move(f"recording{i}.wav", f"data/Recordings/recording{i}.wav")

In [20]:
VoiceRecorder()

<__main__.VoiceRecorder at 0x16c19a850>

In [21]:
print(i)

42


In [22]:
path = [f"data/Recordings/recording{i}.wav"]
labels = ["unknown"]
predict(path, labels)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
Speaker:[92m 2[0m	Predicted:[92m 2[0m
Speaker Predicted :  vishnu


2024-04-21 21:42:01.738109: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


'vishnu'

In [14]:
import openpyxl
from openpyxl import Workbook

In [15]:
workbook = Workbook()

# Attendance loop

In [18]:
students = ["vishnu", "ljm_speaker", "ahw_speaker"]
for student in students:
    VoiceRecorder()
    path = [f"data/Recordings/recording{i}.wav"]
    labels = ["unknown"]
    pred = predict(path, labels)
    if pred == student:
        print("Student ", student, " is present")
    else:
        print("Student ", student, " is absent")

NameError: name 'i' is not defined

# Function to take Attendance

In [20]:
students = ["vishnu", "ljm_speaker", "ahw_speaker"]

In [21]:
def attendance_call():
    global j
    exists = True
    j = 1
    while exists:
        if os.path.exists(f"data/Attendance/class_attendance{j}.xlsx"):
            j+=1
        else:
            exists = False
    workbook.save(filename = f"class_attendance{j}.xlsx")

    sheet = workbook.active
    student_name = sheet.cell(row = 1, column = 1)
    student_name.value = "Student Name"
    attendance = sheet.cell(row = 1, column = 2)
    attendance.value = "Present/Absent"
    proxy = sheet.cell(row = 1, column = 3)
    proxy.value = "Proxy"

    r = 2
    for student in students:
        name = sheet.cell(row = r, column = 1)
        att = sheet.cell(row = r, column = 2)
        prox = sheet.cell(row = r, column = 3)
        name.value = student
        
        VoiceRecorder()
        path = [f"data/Recordings/recording{i}.wav"]
        labels = ["unknown"]
        pred = predict(path, labels)
        if pred == student:
            att.value = "Present"
            prox.value = "None"
        else:
            att.value = "Absent"
            prox.value = pred
        r += 1

    workbook.save(f"class_attendance{j}.xlsx")

    shutil.move(f"class_attendance{j}.xlsx", f"data/Attendance/class_attendance{j}.xlsx")

In [22]:
attendance_call()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Speaker:[92m 2[0m	Predicted:[92m 2[0m
Speaker Predicted :  vishnu


2024-04-27 12:45:52.949119: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Speaker:[92m 2[0m	Predicted:[92m 2[0m
Speaker Predicted :  vishnu


2024-04-27 12:45:58.132642: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Speaker:[92m 2[0m	Predicted:[92m 2[0m
Speaker Predicted :  vishnu


2024-04-27 12:46:02.564457: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
