# Voice Recognition Security System
Interface for voice-based access control using ML classification.

In [None]:
import tkinter as tk
from tkinter import filedialog
import torch
import torch.nn as nn
from torchvision.models import resnet18
from IPython.display import Audio, display
import os
import numpy as np
import librosa
import matplotlib.pyplot as plt
from df.enhance import init_df, enhance
from resample_audio_and_clear_of_noise import re_sample_audio, is_valid_wav_file
from create_spectogram import save_spectrogram
from silence_removal import process_audio_file

# Initialize models
model_df, df_state, _ = init_df()

# Load classification model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = resnet18()
model.fc = nn.Linear(model.fc.in_features, 20)
model.load_state_dict(torch.load('trained_model3.pth', map_location=device))
model.to(device)
model.eval()

In [None]:
def process_file(file_path):
   """Process audio file and verify identity"""
   try:
       if not is_valid_wav_file(file_path):
           print("Invalid WAV file")
           return

       # Resample audio to 48kHz
       re_sample_audio(file_path)
       
       # Get audio segments using silence removal function
       audio, sr = librosa.load(file_path, sr=48000)
       audio_tensor = torch.from_numpy(audio).float().unsqueeze(0)
       enhanced = enhance(model_df, df_state, audio_tensor)
       enhanced_numpy = enhanced.squeeze(0).cpu().numpy()
       
       segments = librosa.effects.split(enhanced_numpy, top_db=60)
       print(f"Found {len(segments)} segments")
       
       authorized_count = 0
       total_confidence = 0
       processed_segments = 0  # Counter for processed segments
       
       for i, (start, end) in enumerate(segments):
           segment = enhanced_numpy[start:end]
           if len(segment) / sr < 4:  # Skip segments shorter than 4 seconds
               continue
               
           try:
               # Generate spectrogram data
               spec = librosa.stft(segment)
               spec_db = librosa.amplitude_to_db(abs(spec))
               
               # Normalize and prepare for model
               spec_norm = (spec_db - spec_db.min()) / (spec_db.max() - spec_db.min())
               spec_tensor = torch.FloatTensor(spec_norm).unsqueeze(0).repeat(3, 1, 1)
               spec_tensor = spec_tensor.unsqueeze(0)
               spec_tensor = spec_tensor.to(device)
               
               with torch.no_grad():
                   output = model(spec_tensor)
                   probs = torch.nn.functional.softmax(output, dim=1)
                   pred_idx = torch.argmax(output).item()
                   confidence = probs[0][pred_idx].item() * 100
                   
                   is_authorized = pred_idx in [0, 1, 2, 3, 4, 5]
                   if is_authorized:
                       authorized_count += 1
                   total_confidence += confidence
                   processed_segments += 1  # Increment counter only for successfully processed segments
                   
                   print(f"Segment {i+1}: {'Authorized' if is_authorized else 'Unauthorized'} "
                         f"(Confidence: {confidence:.2f}%)")
                   
           except Exception as e:
               print(f"Error processing segment {i+1}: {str(e)}")
               continue
       
       if processed_segments > 0:  # Check if any segments were processed
           final_authorized = authorized_count > processed_segments / 2
           avg_confidence = total_confidence / processed_segments  # Calculate average using processed segments
           
           print("\nAccess Decision:")
           print("✅ ACCESS GRANTED" if final_authorized else "❌ ACCESS DENIED")
           print(f"Average Confidence: {avg_confidence:.2f}%")
           print(f"Authorized segments: {authorized_count}/{processed_segments}")
           print(f"Processed {processed_segments} out of {len(segments)} total segments")
           
           display(Audio(file_path))
           
   except Exception as e:
       print(f"Error processing file: {str(e)}")

In [None]:
from ipywidgets import Button, Output

output = Output()
button = Button(description='Upload and Process WAV File')

def on_button_click(b):
    with output:
        output.clear_output()
        root = tk.Tk()
        root.withdraw()
        file_path = filedialog.askopenfilename(
            title='Select WAV File',
            filetypes=[('WAV files', '*.wav')]
        )
        if file_path:
            process_file(file_path)

button.on_click(on_button_click)
display(button, output)