# Voice Recognition Security System
Interface for voice-based access control using ML classification.

In [None]:
import tkinter as tk
from tkinter import filedialog
import librosa
import soundfile as sf
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.models import resnet18
import numpy as np
from IPython.display import Audio, display
import os
from df.enhance import enhance, init_df, load_audio, save_audio
from PIL import Image
import matplotlib.pyplot as plt
import librosa.display

model_df, df_state, _ = init_df()

In [None]:
from ipywidgets import Button, Output, VBox
from IPython.display import display

In [None]:
def process_audio(audio_path, min_segment_length=4):
    """Process audio through preprocessing pipeline"""
    try:
        audio, sr = librosa.load(audio_path, sr=48000)
        
        audio_tensor = torch.from_numpy(audio).float()
        audio_tensor = audio_tensor.unsqueeze(0)  # Add channel dimension [1, samples]
        
        enhanced = enhance(model_df, df_state, audio_tensor)
        enhanced_numpy = enhanced.squeeze(0).cpu().numpy()
        
        non_silent = librosa.effects.split(enhanced_numpy, top_db=60)
        
        segments = []
        for start, end in non_silent:
            segment = enhanced_numpy[start:end]
            if len(segment) / sr >= min_segment_length:
                segments.append(segment)
        
        print(f"Successfully processed audio with {len(segments)} segments")
        return segments, sr
        
    except Exception as e:
        print(f"Error in process_audio: {str(e)}")
        print(f"Audio shape: {audio.shape}")
        print(f"Audio tensor shape before enhance: {audio_tensor.shape}")
        raise

def create_spectrogram(audio, sr):
    """Generate spectrogram from audio"""
    S = librosa.feature.melspectrogram(y=audio, sr=sr)
    S_db = librosa.power_to_db(S, ref=np.max)
    return S_db

def prepare_for_model(S_db):
    """Convert spectrogram to model input format"""
    try:
        # Normalize to range [0, 1]
        S_db_normalized = (S_db - S_db.min()) / (S_db.max() - S_db.min())
        
        # Resize to model requirements
        S_db_resized = np.array(Image.fromarray(S_db_normalized).resize((224, 224)))
        
        # Convert to tensor and add RGB channels
        tensor = torch.FloatTensor(S_db_resized).unsqueeze(0).repeat(3, 1, 1)
        
        # ImageNet normalization
        normalize = transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        )
        tensor = normalize(tensor)
        tensor = tensor.unsqueeze(0)
        
        return tensor
    
    except Exception as e:
        print(f"Error in prepare_for_model: {str(e)}")
        raise

In [None]:
def load_model(model_path='trained_model3.pth'):
    """Load trained classification model"""
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    model = resnet18()
    model.fc = nn.Linear(model.fc.in_features, 20)
    
    state_dict = torch.load(model_path, map_location=device, weights_only=True)
    model.load_state_dict(state_dict)
    
    model = model.to(device)
    model.eval()
    
    return model

try:
    model = load_model()
    print(f"Model loaded successfully on {torch.device('cuda' if torch.cuda.is_available() else 'cpu')}!")
except Exception as e:
    print(f"Error loading model: {e}")

In [None]:
def process_uploaded_file(file_path):
    """Process audio file and show results"""
    try:
        print("Loading and processing audio file...")
        segments, sr = process_audio(file_path)
        print(f"Found {len(segments)} valid segments")
        
        authorized_count = 0
        total_confidence = 0
        device = next(model.parameters()).device
        print(f"Using device: {device}")
        
        for i, segment in enumerate(segments):
            print(f"\nProcessing segment {i+1}...")
            try:
                spec = create_spectrogram(segment, sr)
                print(f"Spectrogram shape: {spec.shape}")
                print(f"Spectrogram range: [{spec.min():.2f}, {spec.max():.2f}]")
                
                model_input = prepare_for_model(spec)
                print(f"Model input shape: {model_input.shape}")
                print(f"Model input range: [{model_input.min():.2f}, {model_input.max():.2f}]")
                
                model_input = model_input.to(device)
                
                with torch.no_grad():
                    output = model(model_input)
                    probs = torch.nn.functional.softmax(output, dim=1)
                    pred_idx = torch.argmax(output).item()
                    is_authorized = pred_idx in [0, 1, 2, 3, 4, 5]
                    confidence = probs[0][pred_idx].item() * 100
                    total_confidence += confidence
                    
                    if is_authorized:
                        authorized_count += 1
                    
                    print(f"Segment {i+1}: {'Authorized' if is_authorized else 'Unauthorized'} "
                          f"(Confidence: {confidence:.2f}%)")
            
            except Exception as e:
                print(f"Error processing segment {i+1}: {str(e)}")
                import traceback
                traceback.print_exc()
                continue
        
        if len(segments) > 0:
            final_authorized = authorized_count > len(segments) / 2
            avg_confidence = total_confidence / len(segments)
            
            print("\nAccess Decision:")
            print("✅ ACCESS GRANTED" if final_authorized else "❌ ACCESS DENIED")
            print(f"Average Confidence: {avg_confidence:.2f}%")
            print(f"Authorized segments: {authorized_count}/{len(segments)}")
        else:
            print("\n❌ ACCESS DENIED - No valid segments found")
        
        display(Audio(file_path))
        
    except Exception as e:
        print(f"Error processing file: {str(e)}")
        import traceback
        traceback.print_exc()

In [None]:
def upload_and_process():
    """Handle file selection and processing"""
    root = tk.Tk()
    root.withdraw()
    file_path = filedialog.askopenfilename(
        title='Select WAV File',
        filetypes=[('WAV files', '*.wav')]
    )
    if file_path:
        process_uploaded_file(file_path)

output = Output()
button = Button(description='Upload and Process WAV File')

def on_button_click(b):
    with output:
        output.clear_output()
        upload_and_process()

button.on_click(on_button_click)
display(button, output)