<a href="https://colab.research.google.com/github/i-ganza007/AlaMovie/blob/main/Untitled7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [69]:
import librosa as lb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa.display
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, log_loss , precision_score, recall_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import ast
import warnings
warnings.filterwarnings('ignore')

In [70]:
def process_audio_files(audio_files):
    features = []
    labels = []

    for file_path, user, phrase in audio_files:
        y, sr = lb.load(file_path)
        yt, _ = lb.effects.trim(y)
        plt.figure(figsize=(10, 4))
        lb.display.waveshow(yt, sr=sr)
        plt.title(f'Waveform: {user} - {phrase}')
        plt.savefig(f"waveform_{user}_{phrase.replace(' ', '_')}.png")
        plt.close()
        print(f"Saved waveform for {user} - {phrase}: Duration {lb.get_duration(y=yt, sr=sr):.2f}s, clear speech peaks")
        plt.figure(figsize=(10, 4))
        D = lb.amplitude_to_db(np.abs(lb.stft(yt)), ref=np.max)
        lb.display.specshow(D, sr=sr, x_axis='time', y_axis='hz')
        plt.colorbar(format='%+2.0f dB')
        plt.title(f'Spectrogram: {user} - {phrase}')
        plt.savefig(f"Spectogram{user}_{phrase.replace(' ', '_')}.png")
        plt.close()
        print(f"Saved spectrogram for {user} - {phrase}: Speech frequencies 0-5kHz")
        y_pitch = lb.effects.pitch_shift(yt, sr=sr, n_steps=4)
        y_stretch = lb.effects.time_stretch(yt, rate=0.8)
        for audio, aug_type in [(yt, 'original'), (y_pitch, 'pitch'), (y_stretch, 'stretch')]:
            mfcc = lb.feature.mfcc(y=audio, sr=sr, n_mfcc=20)
            features.append({
                'file': f"{file_path.split('/')[-1]}_{aug_type}",
                'user': user,
                'phrase': phrase,
                'mfcc_mean': np.mean(mfcc, axis=1).tolist(),
                'rolloff_mean': float(np.mean(lb.feature.spectral_rolloff(y=audio, sr=sr))),
                'energy_mean': float(np.mean(lb.feature.rms(y=audio)))
            })
            labels.append(f"{user}_{phrase.lower()}")
    df_augmented = pd.DataFrame(features)
    df_augmented.to_csv('audio_features_augmented.csv', index=False)
    return df_augmented, np.array(labels)

In [71]:
def load_and_combine_features(csv_path='audio_features.csv'):
    # Load original features
    df_original = pd.read_csv(csv_path)
    # Map filenames to user-phrase classes
    df_original['phrase'] = df_original['file'].apply(
        lambda x: 'confirm transaction' if 'confirm' in x.lower() else 'yes approve'
    )
    df_original['user'] = df_original['file'].apply(
        lambda x: 'Eddy' if 'eddy' in x.lower() else 'Lievin'
    )
    df_original['label'] = df_original.apply(lambda x: f"{x['user']}_{x['phrase'].lower()}", axis=1)
    df_original['mfcc_mean'] = df_original['mfcc_mean'].apply(ast.literal_eval)

    # Process audio files for augmented features
    audio_files = [
        ('/content/confirm_eddy.mp3', 'Eddy', 'confirm transaction'),
        ('/content/confirm_lievin.mp3', 'Lievin', 'confirm transaction'),
        ('/content/yes_approve_eddy.mp3', 'Eddy', 'yes approve'),
        ('/content/yes_approve_lievin.mp3', 'Lievin', 'yes approve')
    ]
    df_augmented, augmented_labels = process_audio_files(audio_files)

    # Combine features
    df_combined = pd.concat([df_original, df_augmented], ignore_index=True)
    labels = np.concatenate([df_original['label'].values, augmented_labels])

    # Extract feature vectors
    X = np.array([
        row['mfcc_mean'] + [row['rolloff_mean'], row['energy_mean']]
        for _, row in df_combined.iterrows()
    ])

    return X, labels, df_combined

In [72]:
def train_rf_model(features, labels):
    # Encode labels
    le = LabelEncoder()
    y = le.fit_transform(labels)

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(features, y, test_size=0.2, random_state=42)

    # Train Random Forest
    model = RandomForestClassifier(n_estimators=50, max_depth=5, random_state=42)
    model.fit(X_train, y_train)
    # Evaluate
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)


    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    loss = log_loss(y_test, y_prob, labels=le.classes_)

    print(f"\nVoiceprint RF Model - Accuracy: {accuracy:.4f}, F1-Score: {f1:.4f}, Log Loss: {loss:.4f}")

    return model, le

In [73]:
def simulate_transaction(audio_path, model, label_encoder):
    print("\n=== Voiceprint Verification Simulation ===")

    # Load and preprocess audio
    y, sr = lb.load(audio_path)
    yt, _ = lb.effects.trim(y)
    mfcc = lb.feature.mfcc(y=yt, sr=sr, n_mfcc=20)
    features = np.concatenate([
        np.mean(mfcc, axis=1),
        [np.mean(lb.feature.spectral_rolloff(y=yt, sr=sr))],
        [np.mean(lb.feature.rms(y=yt))]
    ]).reshape(1, -1)

    # Predict
    pred = model.predict(features)
    pred_label = label_encoder.inverse_transform(pred)[0]
    pred_user, pred_phrase = pred_label.split('_', 1)

    # Verify authorized user (Eddy or Lievin) and valid sentence
    if pred_user in ['Eddy', 'Lievin'] and pred_phrase in ['yes approve', 'confirm transaction']:
        print(f"Voice verification passed. Predicted: {pred_user} saying '{pred_phrase}'")
        return True
    else:
        print(f"Access Denied: Predicted {pred_user} saying '{pred_phrase}', expected Eddy or Lievin saying 'yes approve' or 'confirm transaction'")
        return False

In [74]:
def simulate_unauthorized_attempt(model, label_encoder):
    print("\n=== Unauthorized Voice Attempt Simulation ===")

    # Simulate random features (representing an unauthorized user)
    features = np.concatenate([
        np.random.randn(20) * 10,  # Random MFCC means
        [np.random.uniform(1000, 5000)],  # Random rolloff
        [np.random.uniform(0.01, 0.1)]  # Random energy
    ]).reshape(1, -1)

    # Predict
    pred = model.predict(features)
    pred_label = label_encoder.inverse_transform(pred)[0]
    pred_user, pred_phrase = pred_label.split('_', 1)
    print(f"Access Denied: Predicted {pred_user} saying '{pred_phrase}' (unauthorized user)")

In [76]:
def main():
    # Load and combine features
    X, labels, df_combined = load_and_combine_features('/content/audio_features(1).csv')

    # Train model
    model, label_encoder = train_rf_model(X, labels)

    # Simulate authorized transaction (example: Eddy saying 'confirm transaction')
    simulate_transaction(
        '/content/confirm_eddy.mp3',
        model=model,
        label_encoder=label_encoder
    )

    # Simulate unauthorized attempt
    simulate_unauthorized_attempt(model, label_encoder)

if __name__ == "__main__":
    main()

Saved waveform for Eddy - confirm transaction: Duration 2.67s, clear speech peaks
Saved spectrogram for Eddy - confirm transaction: Speech frequencies 0-5kHz
Saved waveform for Lievin - confirm transaction: Duration 2.04s, clear speech peaks
Saved spectrogram for Lievin - confirm transaction: Speech frequencies 0-5kHz
Saved waveform for Eddy - yes approve: Duration 2.88s, clear speech peaks
Saved spectrogram for Eddy - yes approve: Speech frequencies 0-5kHz
Saved waveform for Lievin - yes approve: Duration 2.85s, clear speech peaks
Saved spectrogram for Lievin - yes approve: Speech frequencies 0-5kHz

Voiceprint RF Model - Accuracy: 1.0000, F1-Score: 1.0000, Log Loss: 0.0000

=== Voiceprint Verification Simulation ===
Voice verification passed. Predicted: Eddy saying 'confirm transaction'

=== Unauthorized Voice Attempt Simulation ===
Access Denied: Predicted Eddy saying 'yes approve' (unauthorized user)
