In [1]:
import os
import librosa
import numpy as np
import pandas as pd
import librosa.display
import matplotlib.pyplot as plt

# Define dataset paths
real_audio_path = "./archive/dev/real"
fake_audio_path = "./archive/dev/fake"

# Load real and fake audio files
real_files = [os.path.join(real_audio_path, f) for f in os.listdir(real_audio_path) if f.endswith('.wav')]
fake_files = [os.path.join(fake_audio_path, f) for f in os.listdir(fake_audio_path) if f.endswith('.wav')]

print(f"Total Real Audio Files: {len(real_files)}")
print(f"Total Fake Audio Files: {len(fake_files)}")


Total Real Audio Files: 2548
Total Fake Audio Files: 10295


In [2]:
# now embed a watermark in real audio using audioseal
import librosa
import torch
from audioseal import AudioSeal
model = AudioSeal.load_generator("audioseal_wm_16bits")

In [3]:
# ab ispe algo lga ke dekhte hai mfcc features pe
import os
import librosa
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical


In [4]:
# basically ye tera binary classification problem hai ya to fake hoga ya real hoga
def extract_features(file_path, n_mfcc=13):
    try:
        y, sr = librosa.load(file_path, sr=None)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
        return np.mean(mfccs, axis=1)  # Take mean of each MFCC coefficient
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None


In [5]:
import random

# Define paths for real and fake audio files
real_audio_path = "./archive/dev/real/"
fake_audio_path = "./archive/dev/fake/"

# real_files = random.sample(os.listdir(real_audio_path), 50)
# fake_files = random.sample(os.listdir(fake_audio_path), 50)


real_files = os.listdir(real_audio_path)  
fake_files = os.listdir(fake_audio_path)
# Prepare dataset list
data = []
labels = []

# Process real audios
for file in real_files:
    file_path = os.path.join(real_audio_path, file)
    features = extract_features(file_path)
    if features is not None:
        data.append(features)
        labels.append(1)  # 1 for real audio

# Process fake audios
for file in fake_files:
    file_path = os.path.join(fake_audio_path, file)
    features = extract_features(file_path)
    if features is not None:
        data.append(features)
        labels.append(0)  # 0 for spoofed audio

# Convert to NumPy arrays
X = np.array(data)
y = np.array(labels)
print("Feature Shape:", X.shape, "Labels Shape:", y.shape)


Feature Shape: (12843, 13) Labels Shape: (12843,)


In [6]:
import os
import librosa
import numpy as np
import pandas as pd
import librosa.display
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report


In [7]:
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Choose a classifier (Random Forest / SVM)
clf = RandomForestClassifier(n_estimators=100, random_state=42)
# clf = SVC(kernel='linear', C=1.0)  # Use SVM instead if needed

# Train the model
clf.fit(X_train, y_train)

# Predict on test set
y_pred = clf.predict(X_test)

# Evaluate model performance
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9793694044375243
Classification Report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99      2036
           1       0.98      0.92      0.95       533

    accuracy                           0.98      2569
   macro avg       0.98      0.96      0.97      2569
weighted avg       0.98      0.98      0.98      2569



In [8]:
# now let us apply svm on it
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model= SVC(kernel='linear', C=1.0)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.8871156091864538
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.94      0.93      2036
           1       0.75      0.68      0.72       533

    accuracy                           0.89      2569
   macro avg       0.83      0.81      0.82      2569
weighted avg       0.88      0.89      0.89      2569



In [9]:
import librosa
import torch
import soundfile as sf
from audioseal import AudioSeal
import numpy as np

# Load model
model = AudioSeal.load_generator("audioseal_wm_16bits")

def addWaterMark(audioPath, outputPath):
    wav, sr = librosa.load(audioPath, sr=16000)

    # Convert wav to a PyTorch tensor and add batch + channel dimensions
    wav_tensor = torch.tensor(wav, dtype=torch.float32).unsqueeze(0).unsqueeze(0)  # Shape: (1, 1, T)

    # Pass the tensor to the model
    watermark = model.get_watermark(wav_tensor, sr)
    

    # Convert watermark to NumPy
    watermark_np = watermark.squeeze().detach().cpu().numpy()  # Remove extra dimensions & move to CPU

    # Ensure both arrays have the same shape
    if watermark_np.shape != wav.shape:
        watermark_np = np.resize(watermark_np, wav.shape)  # Resize watermark if needed

    # Add the watermark to the original audio
    watermarked_audio = wav + watermark_np

    # Save the watermarked file
    sf.write(outputPath, watermarked_audio, sr)

# Example Usage
# addWaterMark("./archive/dev/real/B_0000_5_A.wav", "./watermark/watermarked.wav")


In [10]:
# now i have to extract features from both of them and look at the difference of water_marked and not watermarked audio
original_features = extract_features("./archive/dev/real/B_0000_5_A.wav")
original_features


array([-130.33653  ,  130.84656  ,   -1.77593  ,   36.66478  ,
          2.545489 ,   20.664501 ,    0.7507467,   -0.776362 ,
          3.1035469,    3.3764336,    6.7592845,    3.764939 ,
          2.588349 ], dtype=float32)

In [11]:
watermarked_features = extract_features("./watermark/watermarked.wav")
watermarked_features

Error processing ./watermark/watermarked.wav: [Errno 2] No such file or directory: './watermark/watermarked.wav'


  y, sr = librosa.load(file_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


In [15]:
real_files

['B_0000_5_A.wav',
 'B_0001_20_C.wav',
 'B_0002_10_A.wav',
 'B_0003_5_B.wav',
 'B_0004_10_A.wav',
 'B_0005_5_A.wav',
 'B_0006_05_A.wav',
 'B_0007_10_B.wav',
 'B_0008_20_C.wav',
 'B_0009_05_A.wav',
 'B_0010_0_D.wav',
 'B_0011_0_C.wav',
 'B_0012_0_D.wav',
 'B_0013_15_D.wav',
 'B_0014_10_B.wav',
 'B_0015_0_D.wav',
 'B_0016_20_A.wav',
 'B_0017_15_B.wav',
 'B_0018_15_B.wav',
 'B_0019_0_B.wav',
 'B_0020_20_A.wav',
 'B_0021_0_C.wav',
 'B_0022_05_A.wav',
 'B_0023_0_B.wav',
 'B_0024_15_C.wav',
 'B_0025_10_A.wav',
 'B_0026_10_D.wav',
 'B_0027_10_B.wav',
 'B_0028_20_D.wav',
 'B_0029_0_A.wav',
 'B_0030_10_A.wav',
 'B_0031_10_D.wav',
 'B_0032_15_D.wav',
 'B_0033_15_C.wav',
 'B_0034_0_B.wav',
 'B_0035_0_A.wav',
 'B_0036_10_B.wav',
 'B_0037_5_A.wav',
 'B_0038_5_A.wav',
 'B_0039_5_C.wav',
 'B_0040_05_D.wav',
 'B_0041_10_B.wav',
 'B_0042_20_B.wav',
 'B_0043_5_B.wav',
 'B_0044_0_D.wav',
 'B_0045_05_C.wav',
 'B_0046_0_B.wav',
 'B_0047_15_D.wav',
 'B_0048_10_C.wav',
 'B_0049_05_A.wav',
 'B_0050_20_A.wav',

In [17]:
# hlka hlka change hua hai to ab agr hum lgye algo to kya frk aata hai lets see
from tqdm import tqdm

# Assuming real_files is a list of audio file paths
cnt=0
for audioPath in tqdm(real_files, desc="Processing Audio Files"):
    # print(audioPath)
    output_path = f"./watermark/watermarked_{cnt}.wav"
    input_path = f"./archive/dev/real/{audioPath}"
    addWaterMark(input_path, output_path)
    cnt=cnt+1

Processing Audio Files: 100%|██████████| 2548/2548 [28:40<00:00,  1.48it/s]  


In [18]:
# ab waatermarked audio files pe model lgate hai featues extract krne ke baaad
import random

# Define paths for real and fake audio files
real_audio_path = "./watermark/"
fake_audio_path = "./archive/dev/fake/"

# real_files = random.sample(os.listdir(real_audio_path), 50)
# fake_files = random.sample(os.listdir(fake_audio_path), 50)


real_files = os.listdir(real_audio_path)  
fake_files = os.listdir(fake_audio_path)
# Prepare dataset list
data2 = []
labels2 = []

# Process real audios
for file in real_files:
    file_path = os.path.join(real_audio_path, file)
    features = extract_features(file_path)
    if features is not None:
        data2.append(features)
        labels2.append(1)  # 1 for real audio

# Process fake audios
for file in fake_files:
    file_path = os.path.join(fake_audio_path, file)
    features = extract_features(file_path)
    if features is not None:
        data2.append(features)
        labels2.append(0)  # 0 for spoofed audio

# Convert to NumPy arrays
X = np.array(data)
y = np.array(labels)
print("Feature Shape:", X.shape, "Labels Shape:", y.shape)


Feature Shape: (12843, 13) Labels Shape: (12843,)


In [19]:
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Choose a classifier (Random Forest / SVM)
clf = RandomForestClassifier(n_estimators=100, random_state=42)
# clf = SVC(kernel='linear', C=1.0)  # Use SVM instead if needed

# Train the model
clf.fit(X_train, y_train)

# Predict on test set
y_pred = clf.predict(X_test)

# Evaluate model performance
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9793694044375243
Classification Report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99      2036
           1       0.98      0.92      0.95       533

    accuracy                           0.98      2569
   macro avg       0.98      0.96      0.97      2569
weighted avg       0.98      0.98      0.98      2569



In [20]:
# lets apply svm now
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model= SVC(kernel='linear', C=1.0)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))



Accuracy: 0.8871156091864538
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.94      0.93      2036
           1       0.75      0.68      0.72       533

    accuracy                           0.89      2569
   macro avg       0.83      0.81      0.82      2569
weighted avg       0.88      0.89      0.89      2569



In [1]:
#lets try to work on some new features maybe igtcc
# define a function to extract features
def extract_features_igctcc():
    try:
        y, sr = librosa.load(file_path, sr=None)
        igctcc = librosa.feature.inverse.mfcc(y=y, sr=sr)
        return np.mean(igctcc, axis=1)  # Take mean of each MFCC coefficient
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None
 

In [None]:
# lets try to run it on 10 eval fake videos
fake_audio_path = "./archive/eval/fake/"

fake_files = os.listdir(fake_audio_path)

detector = AudioSeal.load_detector("audioseal_detector_16bits")

# Prepare dataset list
for file in fake_files:
    file_path = os.path.join(fake_audio_path, file)
    features = extract_features(file_path)
    
    

In [2]:
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.asymmetric import rsa, padding
from cryptography.hazmat.primitives import serialization

# Generate RSA Key Pair (One-Time Setup)
private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048)
public_key = private_key.public_key()

# Save keys (Optional - to reuse later)
private_pem = private_key.private_bytes(
    encoding=serialization.Encoding.PEM,
    format=serialization.PrivateFormat.PKCS8,
    encryption_algorithm=serialization.NoEncryption(),
)

public_pem = public_key.public_bytes(
    encoding=serialization.Encoding.PEM,
    format=serialization.PublicFormat.SubjectPublicKeyInfo,
)

with open("private_key.pem", "wb") as f:
    f.write(private_pem)

with open("public_key.pem", "wb") as f:
    f.write(public_pem)

# Function to generate a digital signature for an audio file
def sign_audio(audio_path):
    with open(audio_path, "rb") as f:
        audio_data = f.read()

    # Hash the audio file using SHA-256
    digest = hashes.Hash(hashes.SHA256())
    digest.update(audio_data)
    audio_hash = digest.finalize()

    # Sign the hash using the private key
    signature = private_key.sign(
        audio_hash,
        padding.PSS(
            mgf=padding.MGF1(hashes.SHA256()),
            salt_length=padding.PSS.MAX_LENGTH,
        ),
        hashes.SHA256(),
    )
    return signature

# Example: Generate a signature for an audio file
audio_file = "./archive/dev/fake/B_10004_0_C.wav"
signature = sign_audio(audio_file)

# Store signature securely (e.g., database, separate file, or attach as metadata)
signature_db = {audio_file: signature}
print("🔹 Digital Signature Generated & Stored.")


🔹 Digital Signature Generated & Stored.


In [3]:
# Function to verify the digital signature of an audio file
def verify_audio_signature(audio_path, expected_signature):
    with open(audio_path, "rb") as f:
        audio_data = f.read()

    # Hash the received audio file
    digest = hashes.Hash(hashes.SHA256())
    digest.update(audio_data)
    audio_hash = digest.finalize()

    try:
        # Verify the signature using the public key
        public_key.verify(
            expected_signature,
            audio_hash,
            padding.PSS(
                mgf=padding.MGF1(hashes.SHA256()),
                salt_length=padding.PSS.MAX_LENGTH,
            ),
            hashes.SHA256(),
        )
        print("✅ Digital Signature Verified: Audio is Authentic")
        return True
    except:
        print("❌ Digital Signature Mismatch: Audio is Tampered (Spoofed)")
        return False

# Example: Verify a new incoming audio file
new_audio_file = "./archive/dev/fake/B_10004_0_C.wav"
if new_audio_file in signature_db:
    verify_audio_signature(new_audio_file, signature_db[new_audio_file])
else:
    print("❌ No Signature Found: Audio is Unverified!")


✅ Digital Signature Verified: Audio is Authentic


In [4]:
# Function to verify the digital signature of an audio file
def verify_audio_signature(audio_path, expected_signature):
    with open(audio_path, "rb") as f:
        audio_data = f.read()

    # Hash the received audio file
    digest = hashes.Hash(hashes.SHA256())
    digest.update(audio_data)
    audio_hash = digest.finalize()

    try:
        # Verify the signature using the public key
        public_key.verify(
            expected_signature,
            audio_hash,
            padding.PSS(
                mgf=padding.MGF1(hashes.SHA256()),
                salt_length=padding.PSS.MAX_LENGTH,
            ),
            hashes.SHA256(),
        )
        print("✅ Digital Signature Verified: Audio is Authentic")
        return True
    except:
        print("❌ Digital Signature Mismatch: Audio is Tampered (Spoofed)")
        return False

# Example: Verify a new incoming audio file
new_audio_file = "./archive/dev/fake/B_10005_5_C.wav"
if new_audio_file in signature_db:
    verify_audio_signature(new_audio_file, signature_db[new_audio_file])
else:
    print("❌ No Signature Found: Audio is Unverified!")


❌ No Signature Found: Audio is Unverified!


In [None]:
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.asymmetric import rsa, padding
from cryptography.hazmat.primitives import serialization

def sign_audio(audio_path, private_key_path, signature_output):
    # Step 1: Read the audio file
    with open(audio_path, "rb") as f:
        audio_data = f.read()  

    # Step 2: Compute SHA-256 Hash
    digest = hashes.Hash(hashes.SHA256())
    digest.update(audio_data)
    audio_hash = digest.finalize()  

    # Step 3: Load Private Key
    with open(private_key_path, "rb") as f:
        private_key = serialization.load_pem_private_key(f.read(), password=None)

    # Step 4: Sign the hash with RSA Private Key
    signature = private_key.sign(
        audio_hash,
        padding.PSS(
            mgf=padding.MGF1(hashes.SHA256()),  
            salt_length=padding.PSS.MAX_LENGTH,
        ),
        hashes.SHA256(),
    )

    # Step 5: Store the digital signature
    with open(signature_output, "wb") as f:
        f.write(signature)

    print("✅ Digital Signature Generated & Saved.")

# Example usage
sign_audio("bonafide_audio.wav", "private_key.pem", "bonafide_audio.sig")
