In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from zipfile import ZipFile

import scipy.io.wavfile as wav
import os
import math
from scipy.fftpack import dct

### Download the dataset

In [None]:
!kaggle datasets download mohammedabdeldayem/the-fake-or-real-dataset

Traceback (most recent call last):
  File "/usr/local/bin/kaggle", line 10, in <module>
    sys.exit(main())
             ^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/kaggle/cli.py", line 68, in main
    out = args.func(**command_args)
          ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/kaggle/api/kaggle_api_extended.py", line 1734, in dataset_download_cli
    with self.build_kaggle_client() as kaggle:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/kaggle/api/kaggle_api_extended.py", line 688, in build_kaggle_client
    username=self.config_values['username'],
             ~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^
KeyError: 'username'


In [None]:
with ZipFile('the-fake-or-real-dataset.zip', 'r') as zipObj:
   zipObj.extractall()


FileNotFoundError: [Errno 2] No such file or directory: 'the-fake-or-real-dataset.zip'

### We are using 2 sec data

### Reading and collecting data from WAV files

In [None]:
def read_wav(file_path):
    rate, signal = wav.read(file_path)
    return rate, signal

### Pre-emphasis

In [None]:
def pre_emphasis(signal, coeff=0.97):
    return np.append(signal[0], signal[1:] - coeff * signal[:-1])

### Framing and Windowing to reduce Spectral Leaks

In [None]:
def framing(signal, frame_size, hop_size, sample_rate):
    frame_length = int(sample_rate * frame_size)
    hop_length = int(sample_rate * hop_size)
    frames = []

    for start in range(0, len(signal) - frame_length, hop_length):
        frames.append(signal[start:start + frame_length])

    frames = np.array(frames)
    return frames


### FFT to convert from Time Scale to Frequency Domain

In [None]:
def fft_frames(frames, sample_rate):
    nfft = 512
    spectrum = np.fft.rfft(frames, n=nfft)
    magnitude = np.abs(spectrum)
    return magnitude


### Using Mel Spectrogram to mimic human hearing (Frequency to our hearing)

In [None]:
def mel_filter_bank(sample_rate, n_filters=23, nfft=512, f_min=0, f_max=None):
    if f_max is None:
        f_max = sample_rate // 2

    mel_min = 2595 * np.log10(1 + f_min / 700)
    mel_max = 2595 * np.log10(1 + f_max / 700)

    mel_points = np.linspace(mel_min, mel_max, n_filters + 2)
    hz_points = 700 * (10**(mel_points / 2595) - 1)

    bin_points = np.floor((nfft + 1) * hz_points / sample_rate).astype(int)

    filter_bank = np.zeros((n_filters, nfft // 2 + 1))

    for i in range(1, n_filters + 1):
        f_m_minus = bin_points[i - 1]
        f_m = bin_points[i]
        f_m_plus = bin_points[i + 1]

        for j in range(f_m_minus, f_m):
            filter_bank[i - 1, j] = (j - f_m_minus) / (f_m - f_m_minus)

        for j in range(f_m, f_m_plus):
            filter_bank[i - 1, j] = (f_m_plus - j) / (f_m_plus - f_m)

    return filter_bank


### Log Scale

In [None]:
def log_mel_spectrogram(magnitude, mel_filter_bank):
    mel_spectrogram = np.dot(mel_filter_bank, magnitude.T)
    mel_spectrogram = np.log(mel_spectrogram + 1e-6)  # Adding a small constant to avoid log(0)
    return mel_spectrogram.T


### DCT to obtain MFCCs

In [None]:
def dct_transform(mel_spectrogram, n_coeffs=13):
    return dct(mel_spectrogram, type=2, axis=-1, norm='ortho')[:, :n_coeffs]

### Feature Extraction

In [None]:
def extract_mfcc(file_path, frame_size=0.025, hop_size=0.01, n_filters=23, n_coeffs=13):
    sample_rate, signal = read_wav(file_path)

    # Apply pre-emphasis
    signal = pre_emphasis(signal)

    # Framing
    frames = framing(signal, frame_size, hop_size, sample_rate)

    # Apply FFT to frames
    magnitude = fft_frames(frames, sample_rate)

    # Create Mel filter bank
    mel_filter = mel_filter_bank(sample_rate, n_filters=n_filters)

    # Apply Mel filter bank to the magnitude
    mel_spectrogram = log_mel_spectrogram(magnitude, mel_filter)

    # Apply DCT to get MFCCs
    mfcc = dct_transform(mel_spectrogram, n_coeffs)

    return mfcc

### Extract Features from Folder

In [None]:
def extract_features_from_folder(folder_path):
    features = []
    labels = []

    for label in os.listdir(folder_path):
        label_path = os.path.join(folder_path, label)
        if os.path.isdir(label_path):
            for audio_file in os.listdir(label_path):
                if audio_file.endswith('.wav'):
                    file_path = os.path.join(label_path, audio_file)
                    mfcc = extract_mfcc(file_path)
                    features.append(np.mean(mfcc, axis=0))
                    labels.append(label)

    return np.array(features), np.array(labels)


### Load Dataset

In [None]:
def load_data(dataset_path):
    features = []
    labels = []

    for category in ['training', 'testing', 'validation']:
        for label, subfolder in zip([1, 0], ['real', 'fake']):
            folder_path = os.path.join(dataset_path, category, subfolder)

            for filename in os.listdir(folder_path):
                if filename.endswith('.wav'):
                    file_path = os.path.join(folder_path, filename)
                    mfcc = extract_mfcc(file_path)

                    features.append(mfcc)
                    labels.append(label)  # 1 for real, 0 for fake

    return np.array(features), np.array(labels)

In [None]:
dataset_path = '/content/for-2sec/for-2seconds'  # Replace with your actual path
X, y = load_data(dataset_path)

FileNotFoundError: [Errno 2] No such file or directory: '/content/for-2sec/for-2seconds/training/real'

### 17870 -> Samples, 198 -> Frames per Sample, 13 -> MFCCs in each Sample

In [None]:
X.shape, y.shape

((17870, 198, 13), (17870,))

### Since SVM requires a 2-D array, X has to be reshaped

In [None]:
X_reshaped = X.reshape(X.shape[0], -1)
print(X_reshaped.shape)  # This will print (17870, 198 * 13) = (17870, 2574)

(17870, 2574)


### Training and Testing Datasets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y, test_size=0.2, random_state=42)

### Scaling to Preprocess Data

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Basic SVM Model

In [None]:
svm = SVC(kernel='linear')
svm.fit(X_train[:100], y_train[:100])

In [None]:
y_pred = svm.predict(X_test[:30])
accuracy = a[ccuracy_score(y_test[:30], y_pred)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

In [None]:
svm = SVC(kernel='linear')
svm.fit(X_train, y_train)