In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import librosa
import os
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, StratifiedKFold, cross_validate
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from xgboost import XGBClassifier
import time
import tpot
from tqdm.auto import tqdm

In [2]:
processed_data = pd.read_csv(os.path.join(os.pardir, "raw_data", "KAGGLE", "DATASET-balanced.csv"))
processed_data.head()

Unnamed: 0,chroma_stft,rms,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,LABEL
0,0.338055,0.027948,2842.948867,4322.916759,6570.586186,0.04105,-462.169586,90.311272,19.073769,24.046888,...,-6.686564,0.902086,-7.251551,-1.198342,4.747403,-4.986279,0.953935,-5.013138,-6.77906,FAKE
1,0.443766,0.037838,2336.129597,3445.777044,3764.949874,0.04773,-409.413422,120.348808,-7.161531,5.114784,...,-2.131157,-6.876417,-1.359395,0.326401,-5.420016,-2.109968,-1.757634,-9.537907,-8.494421,FAKE
2,0.302528,0.056578,2692.988386,2861.13318,4716.610271,0.080342,-318.996033,120.490273,-24.625771,23.891073,...,-5.853725,-3.724773,-6.627182,-5.117002,-6.072106,-0.994653,-1.61712,-3.922354,-7.033001,FAKE
3,0.319933,0.031504,2241.665382,3503.766175,3798.641521,0.04718,-404.636749,136.320908,2.308172,-3.907071,...,-1.898315,-2.046493,-7.176277,-3.293508,4.209121,0.121835,-5.407063,-3.654926,-3.274857,FAKE
4,0.420055,0.016158,2526.069123,3102.659519,5025.077899,0.051905,-410.497925,152.7314,-18.266771,51.993462,...,-1.95234,0.810868,6.238493,6.555839,7.535542,2.849219,2.616843,-1.793357,-5.060998,FAKE


In [3]:
processed_data.describe()

Unnamed: 0,chroma_stft,rms,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20
count,11778.0,11778.0,11778.0,11778.0,11778.0,11778.0,11778.0,11778.0,11778.0,11778.0,...,11778.0,11778.0,11778.0,11778.0,11778.0,11778.0,11778.0,11778.0,11778.0,11778.0
mean,0.421925,0.037894,2719.201373,3050.299988,4977.617722,0.070821,-382.562312,145.056311,-24.699932,21.311292,...,-2.242488,-4.439806,-1.657739,-2.107442,-2.606924,-1.641709,-3.32034,-3.11711,-2.754034,-4.427346
std,0.069392,0.028071,1066.754739,872.258653,2170.158448,0.039292,79.593413,36.188506,27.728976,22.480432,...,7.726415,6.614913,5.121732,5.347953,4.909579,5.627377,4.597079,4.976511,4.95766,5.478806
min,0.200353,4.3e-05,756.163229,1096.903152,1063.964279,0.015504,-1055.002197,-83.816765,-132.491104,-47.77,...,-29.636639,-30.168062,-19.717999,-21.553347,-28.876045,-20.307486,-22.753056,-19.62388,-23.89047,-25.099829
25%,0.37233,0.015098,2062.875836,2569.289971,3448.144178,0.045749,-432.928848,120.522711,-35.550402,3.635681,...,-6.863236,-8.23307,-5.178427,-5.642066,-5.760252,-4.868709,-6.435052,-5.862841,-5.51425,-7.463774
50%,0.417708,0.03187,2579.963841,3055.863462,4683.958018,0.060237,-365.756241,145.970162,-19.164229,22.218458,...,-2.438328,-4.186015,-1.53067,-2.32035,-2.447045,-0.862829,-3.230387,-2.957088,-2.726495,-3.839427
75%,0.467741,0.054046,3283.857559,3581.271953,6211.301553,0.085149,-321.772781,168.321331,-6.235028,37.017731,...,2.349474,-0.266044,1.794835,1.56943,0.83769,2.043472,-0.293191,0.068411,0.495779,-0.787292
max,0.707202,0.168958,17685.00669,7836.844404,21130.54467,0.81248,-193.430145,284.727997,67.475792,86.585747,...,28.890339,22.552883,19.462509,21.356049,13.319826,19.329521,18.873386,17.924467,11.985058,11.764109


In [4]:
path_fake = os.path.join(os.pardir, "raw_data", "KAGGLE", "AUDIO", "FAKE")
files_wav_fake = []
for root, dirs, files in os.walk(path_fake):
    for _file in files:
        if _file.endswith(".wav"):
            files_wav_fake.append(_file)

len(files_wav_fake)

56

In [5]:
path_real = os.path.join(os.pardir, "raw_data", "KAGGLE", "AUDIO", "REAL")
files_wav_real = []
for root, dirs, files in os.walk(path_real):
    for _file in files:
        if _file.endswith(".wav"):
            files_wav_real.append(_file)

len(files_wav_real)

8

# Empty dataframe initialization

In [6]:
mfcc_columns = [f"mfcc{i+1}" for i in range(20)]

In [17]:
def initialize_empty_df():
    df = pd.DataFrame({
        "chroma_stft":[],
        "rms":[],
        "spectral_centroid":[],
        "spectral_bandwidth":[],
        "rolloff":[],
        "zero_crossing_rate":[]
    })
    initial_columns = df.columns
    for mfcc in mfcc_columns:
        df[mfcc] = ""
    df["LABEL"] = ""
    return df

In [8]:
empty_df = initialize_empty_df()
empty_df

Unnamed: 0,chroma_stft,rms,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,LABEL


### Feature extraction

In [48]:
df = initialize_empty_df()

In [49]:
def preprocess_data(y,sr,label):
    df_indiv = initialize_empty_df()
    df_indiv["chroma_stft"] = pd.DataFrame(np.mean(librosa.feature.chroma_stft(y=y, sr=sr, hop_length=sr).T, axis=1))
    df_indiv["rms"] = pd.DataFrame(librosa.feature.rms(y=y, hop_length=sr).T)
    df_indiv["spectral_centroid"] = pd.DataFrame(librosa.feature.spectral_centroid(y=y, hop_length=sr).T)
    df_indiv["spectral_bandwidth"] = pd.DataFrame(librosa.feature.spectral_bandwidth(y=y, hop_length=sr).T)
    df_indiv["rolloff"] = pd.DataFrame(librosa.feature.spectral_rolloff(y=y, hop_length=sr).T)
    df_indiv["zero_crossing_rate"] = pd.DataFrame(librosa.feature.zero_crossing_rate(y=y, hop_length=sr).T)
    df_indiv[mfcc_columns] = pd.DataFrame(librosa.feature.mfcc(y=y, sr=sr, hop_length=sr, n_mfcc=20).T)
    df_indiv["LABEL"] = int(label)
    return df_indiv

### Fake audios

In [50]:
for audio in files_wav_fake:
    path = os.path.join(os.pardir, "raw_data", "KAGGLE", "AUDIO", "FAKE", audio)
    y, sr = librosa.load(path)
    df_indiv = preprocess_data(y,sr,1)
    df = pd.concat([df, df_indiv], ignore_index=True)

df.shape

  df = pd.concat([df, df_indiv], ignore_index=True)


(26250, 27)

### Real audios

In [51]:
for audio in files_wav_real:
    path = os.path.join(os.pardir, "raw_data", "KAGGLE", "AUDIO", "REAL", audio)
    y, sr = librosa.load(path)
    df_indiv = preprocess_data(y,sr,0)
    df = pd.concat([df, df_indiv], ignore_index=True)

df.shape

(30000, 27)

In [52]:
df.describe()

Unnamed: 0,chroma_stft,rms,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20
count,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,...,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0,30000.0
mean,0.397444,0.047543,2300.191408,2132.929362,4232.001877,0.129057,-305.755066,89.092216,-10.448294,10.185871,...,-5.599305,-2.690511,-4.614054,-2.74867,-5.086118,-3.091188,-6.940665,-2.793291,-5.018226,-2.680815
std,0.169093,0.053284,1342.415412,604.390601,2306.645132,0.113252,114.81366,58.436676,37.940006,28.168379,...,12.635489,9.978125,9.278063,9.488855,8.600267,9.643442,8.383015,7.314955,7.781118,7.409049
min,0.115687,3e-06,293.071993,466.22682,215.332031,0.0,-759.733643,-177.025055,-143.416107,-95.569283,...,-69.164688,-47.667747,-45.538296,-47.090431,-51.192055,-45.486912,-46.106239,-32.770676,-44.124901,-39.606369
25%,0.254719,0.008087,1374.578309,1680.526217,2422.485352,0.053711,-388.925438,55.854109,-37.101856,-9.78828,...,-13.622525,-8.978693,-10.60921,-8.401582,-10.698615,-8.360602,-12.371149,-7.440499,-9.97272,-7.323744
50%,0.369466,0.028606,1858.874994,2084.48769,3552.978516,0.090332,-290.730988,96.400734,-8.056952,9.222217,...,-4.843097,-2.458317,-4.237128,-2.144349,-4.976859,-2.29852,-6.396105,-2.623993,-4.636112,-2.508389
75%,0.523026,0.068723,2786.654636,2563.726062,6018.530273,0.157715,-212.870468,129.522976,16.997291,27.958039,...,2.667114,3.666056,1.527035,3.493073,0.576903,2.99737,-1.121217,1.91983,0.145871,2.068932
max,0.936359,0.417975,8245.241074,4442.72018,10389.770508,0.739258,-47.583233,247.707047,115.505585,119.409286,...,45.794586,43.611626,37.643642,36.056511,47.618546,41.728874,43.848362,43.355728,43.124992,53.397881


In [53]:
processed_data.describe()

Unnamed: 0,chroma_stft,rms,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20
count,11778.0,11778.0,11778.0,11778.0,11778.0,11778.0,11778.0,11778.0,11778.0,11778.0,...,11778.0,11778.0,11778.0,11778.0,11778.0,11778.0,11778.0,11778.0,11778.0,11778.0
mean,0.421925,0.037894,2719.201373,3050.299988,4977.617722,0.070821,-382.562312,145.056311,-24.699932,21.311292,...,-2.242488,-4.439806,-1.657739,-2.107442,-2.606924,-1.641709,-3.32034,-3.11711,-2.754034,-4.427346
std,0.069392,0.028071,1066.754739,872.258653,2170.158448,0.039292,79.593413,36.188506,27.728976,22.480432,...,7.726415,6.614913,5.121732,5.347953,4.909579,5.627377,4.597079,4.976511,4.95766,5.478806
min,0.200353,4.3e-05,756.163229,1096.903152,1063.964279,0.015504,-1055.002197,-83.816765,-132.491104,-47.77,...,-29.636639,-30.168062,-19.717999,-21.553347,-28.876045,-20.307486,-22.753056,-19.62388,-23.89047,-25.099829
25%,0.37233,0.015098,2062.875836,2569.289971,3448.144178,0.045749,-432.928848,120.522711,-35.550402,3.635681,...,-6.863236,-8.23307,-5.178427,-5.642066,-5.760252,-4.868709,-6.435052,-5.862841,-5.51425,-7.463774
50%,0.417708,0.03187,2579.963841,3055.863462,4683.958018,0.060237,-365.756241,145.970162,-19.164229,22.218458,...,-2.438328,-4.186015,-1.53067,-2.32035,-2.447045,-0.862829,-3.230387,-2.957088,-2.726495,-3.839427
75%,0.467741,0.054046,3283.857559,3581.271953,6211.301553,0.085149,-321.772781,168.321331,-6.235028,37.017731,...,2.349474,-0.266044,1.794835,1.56943,0.83769,2.043472,-0.293191,0.068411,0.495779,-0.787292
max,0.707202,0.168958,17685.00669,7836.844404,21130.54467,0.81248,-193.430145,284.727997,67.475792,86.585747,...,28.890339,22.552883,19.462509,21.356049,13.319826,19.329521,18.873386,17.924467,11.985058,11.764109


In [54]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30000 entries, 0 to 29999
Data columns (total 27 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   chroma_stft         30000 non-null  float32
 1   rms                 30000 non-null  float32
 2   spectral_centroid   30000 non-null  float64
 3   spectral_bandwidth  30000 non-null  float64
 4   rolloff             30000 non-null  float64
 5   zero_crossing_rate  30000 non-null  float64
 6   mfcc1               30000 non-null  float32
 7   mfcc2               30000 non-null  float32
 8   mfcc3               30000 non-null  float32
 9   mfcc4               30000 non-null  float32
 10  mfcc5               30000 non-null  float32
 11  mfcc6               30000 non-null  float32
 12  mfcc7               30000 non-null  float32
 13  mfcc8               30000 non-null  float32
 14  mfcc9               30000 non-null  float32
 15  mfcc10              30000 non-null  float32
 16  mfcc

In [55]:
df["LABEL"].value_counts()

LABEL
1    26250
0     3750
Name: count, dtype: int64

### Split the data

In [122]:
df_train, df_test = train_test_split(df, test_size=0.3, random_state=42, stratify=df["LABEL"])

In [123]:
df_train["LABEL"].value_counts()

LABEL
1    18375
0     2625
Name: count, dtype: int64

In [124]:
X_test = df_test.drop(columns=["LABEL"])
y_test = df_test["LABEL"]
y_test = pd.to_numeric(y_test)
X_train = df_train_under.drop(columns=["LABEL"])
y_train = df_train_under["LABEL"]
y_train = pd.to_numeric(y_train)

### Balance the Dataset

In [125]:
count_fake, count_real = df_train["LABEL"].value_counts()
df_train_fake = df_train[df_train["LABEL"] == 1]
df_train_real = df_train[df_train["LABEL"] == 0]

In [126]:
df_train_fake_under = df_train_fake.sample(count_real)

In [127]:
df_train_under = pd.concat([df_train_fake_under, df_train_real], axis=0)

In [128]:
df_train_under["LABEL"].value_counts()

LABEL
1    2625
0    2625
Name: count, dtype: int64

In [129]:
X_train = df_train_under.drop(columns=["LABEL"])
y_train = df_train_under["LABEL"]
y_train = pd.to_numeric(y_train)

### Optimize the hyperparameters

In [142]:
model = XGBClassifier()
grid = {
    'learning_rate': [0.01, 0.1, 1, 10], 
    "max_depth": [1,3,5,7,9],
    "n_estimators": [50, 150, 200, 250, 300, 350]
}
stratifed_cv = StratifiedKFold(n_splits=5)
search = GridSearchCV(model, grid, scoring="accuracy", cv=stratifed_cv, n_jobs=-1)
results = search.fit(X_train, y_train)

In [143]:
results.best_params_

{'learning_rate': 0.1, 'max_depth': 9, 'n_estimators': 350}

In [144]:
results.best_score_

0.8325714285714285

In [145]:
model = XGBClassifier(learning_rate=0.1)
grid = {
    "max_depth": [9,11,13,15],
    "n_estimators": [350,400,450,500]
}
search = GridSearchCV(model, grid, scoring="accuracy", cv=stratifed_cv, n_jobs=-1)
results = search.fit(X_train, y_train)

In [146]:
results.best_params_

{'max_depth': 13, 'n_estimators': 450}

In [147]:
results.best_score_

0.8417142857142856

### Optimized model:

In [148]:
model = XGBClassifier(learning_rate=0.1, n_estimators=450, max_depth=13)
stratifed_cv = StratifiedKFold(n_splits=5)
results = cross_validate(model, X_train, y_train, scoring="accuracy", cv=stratifed_cv)
results

{'fit_time': array([ 85.75665975,  83.77516818,  86.20870113,  97.35364151,
        103.67071199]),
 'score_time': array([0.04212594, 0.04545784, 0.04457498, 0.04517341, 0.04035449]),
 'test_score': array([0.84285714, 0.84666667, 0.84190476, 0.84      , 0.83714286])}

In [149]:
model.fit(X_train, y_train)

In [159]:
y_pred = model.predict(X_test)

In [160]:
y_test = np.ones(y_pred.shape[0])

In [161]:
accuracy_score(y_test, y_pred)

0.8032786885245902

In [162]:
precision_score(y_test, y_pred)

1.0

In [163]:
recall_score(y_test, y_pred)

0.8032786885245902

In [164]:
f1_score(y_test, y_pred)

0.8909090909090909

### Predict the demonstration samples

In [138]:
path = os.path.join(os.pardir, "raw_data", "DEMONSTRATION", "DEMONSTRATION", "linus-original-DEMO.mp3")
y, sr = librosa.load(path)
df_test = preprocess_data(y,sr,0)
X_test = df_test.drop(columns="LABEL")
X_test.shape

(61, 26)

In [139]:
pd.DataFrame(model.predict(X=X_test)).value_counts(normalize=True)

0    0.770492
1    0.229508
Name: proportion, dtype: float64

In [140]:
path = os.path.join(os.pardir, "raw_data", "DEMONSTRATION", "DEMONSTRATION", "linus-to-musk-DEMO.mp3")
y, sr = librosa.load(path)
df_test = preprocess_data(y,sr,0)
X_test = df_test.drop(columns="LABEL")
X_test.shape

(61, 26)

In [141]:
pd.DataFrame(model.predict(X=X_test)).value_counts(normalize=True)

1    0.836066
0    0.163934
Name: proportion, dtype: float64

In [166]:
path = os.path.join(os.pardir, "raw_data", "Tests", "Test_Elise.m4a")
y, sr = librosa.load(path)
df_test = preprocess_data(y,sr,0)
X_test = df_test.drop(columns="LABEL")
X_test.shape

  y, sr = librosa.load(path)


(24, 26)

In [167]:
pd.DataFrame(model.predict(X=X_test)).value_counts(normalize=True)

1    0.75
0    0.25
Name: proportion, dtype: float64

In [168]:
model.predict(X=X_test)

array([1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1,
       0, 1])

# Pre-processing v2

In [193]:
df = initialize_empty_df()
df_columns = df.columns
df_columns

Index(['chroma_stft', 'rms', 'spectral_centroid', 'spectral_bandwidth',
       'rolloff', 'zero_crossing_rate', 'mfcc1', 'mfcc2', 'mfcc3', 'mfcc4',
       'mfcc5', 'mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11',
       'mfcc12', 'mfcc13', 'mfcc14', 'mfcc15', 'mfcc16', 'mfcc17', 'mfcc18',
       'mfcc19', 'mfcc20', 'LABEL'],
      dtype='object')

In [21]:
def preprocess_data_2(y, sr, label):
    iter_ = int(y.shape[0]/sr)
    features = []
    for i in range(iter_):
        y_seg = y[i*sr:(sr*i+sr)]
        chroma_stft = np.mean(librosa.feature.chroma_stft(y=y_seg, sr=sr))
        rms = np.mean(librosa.feature.rms(y=y_seg))
        spec_cent = np.mean(librosa.feature.spectral_centroid(y=y_seg, sr=sr))
        spec_bw = np.mean(librosa.feature.spectral_bandwidth(y=y_seg, sr=sr))
        rolloff = np.mean(librosa.feature.spectral_rolloff(y=y_seg, sr=sr))
        zcr = np.mean(librosa.feature.zero_crossing_rate(y_seg))
        mfcc = np.mean(librosa.feature.mfcc(y=y_seg, sr=sr, n_mfcc=20),axis=1)
        features.append([chroma_stft,rms,spec_cent,spec_bw,rolloff,zcr,*mfcc, label])

    df_indiv = pd.DataFrame(features, columns = df_columns)
    return df_indiv

In [197]:
df = initialize_empty_df()
audio_types = ["REAL","FAKE"]
for audio_type in audio_types:
    path_folder = os.path.join(os.pardir, "raw_data", "KAGGLE", "AUDIO", audio_type)
    for root, dirs, files in os.walk(path_folder):
        for _file in files:
            if _file.endswith(".wav"):
                audio = _file
                path = os.path.join(path_folder, audio)
                y, sr = librosa.load(path)
                if audio_type == "REAL":
                    df_indiv = preprocess_data_2(y, sr, 0)
                else:
                    df_indiv = preprocess_data_2(y, sr, 1)
                df = pd.concat([df, df_indiv], ignore_index=True)
df.shape

  df = pd.concat([df, df_indiv], ignore_index=True)


(29936, 27)

In [198]:
df

Unnamed: 0,chroma_stft,rms,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,LABEL
0,0.297479,0.028746,1946.727260,2014.082599,3437.237549,0.086692,-245.472076,104.607819,-46.405796,1.196160,...,-6.395597,-15.313617,-5.430982,-11.619410,-9.566907,-10.951209,-7.456825,-10.645374,-7.513761,0
1,0.486858,0.004846,2613.806630,2426.650312,5197.087513,0.128285,-374.178345,86.107994,-15.835505,19.442621,...,-1.176839,-8.161874,-4.760258,-10.079077,-5.885350,-10.463819,-4.462703,-10.880193,-4.522534,0
2,0.322989,0.020152,2021.784722,2083.546792,4099.872936,0.088379,-321.894012,103.345749,-26.281891,25.837278,...,-1.150528,-13.462696,-4.201140,-7.285254,-5.869706,-13.188181,-1.062960,-8.495620,-6.698467,0
3,0.308950,0.016800,2382.576266,2171.421697,4427.520197,0.128651,-326.366028,91.761772,-15.507668,31.374434,...,-0.081481,-10.845043,-4.006562,-4.791302,-12.806199,-7.461127,0.797512,-13.421026,-3.720847,0
4,0.344340,0.019600,2214.410400,2206.308508,4186.739835,0.120816,-326.179657,95.224518,-25.636980,22.470015,...,-4.114457,-2.843492,-12.660142,-7.123310,-12.714676,-13.993152,0.800288,-13.211448,-10.999192,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29931,0.316706,0.056206,2222.755533,2246.142749,3921.734619,0.107400,-267.162964,88.965797,-16.697729,9.186891,...,-14.572534,-8.675159,-1.057068,-6.318439,-0.729645,-6.671122,-8.323844,-8.292027,-7.811273,1
29932,0.377767,0.051859,2742.818542,2505.850408,5275.145375,0.141224,-290.435333,62.383610,-4.071285,18.787714,...,-11.117438,-10.938526,3.042160,-2.456734,0.501205,-8.675010,0.235262,-4.474710,-5.744944,1
29933,0.301122,0.044479,2624.528443,2823.049519,5529.628684,0.090310,-266.379364,65.211861,11.999157,31.880110,...,-15.367050,-14.893484,-3.912978,-10.857360,4.503061,-8.502412,-7.280063,-10.365235,-0.479803,1
29934,0.356438,0.040002,2203.283869,2258.234039,3741.883434,0.107722,-265.657928,98.423706,1.515131,12.571268,...,-4.816615,-8.005732,-4.429561,-1.313450,-1.917220,-12.020423,-9.319329,-7.296518,-8.915230,1


In [205]:
df.to_csv("dataset.csv")

# Split dataset

In [68]:
def split_dataset(df):
    df_train, df_test = train_test_split(df, test_size=0.3, random_state=42, stratify=df["LABEL"])
    
    X_test = df_test.drop(columns=["LABEL"])
    y_test = pd.to_numeric(df_test["LABEL"])

    X_train = df_train.drop(columns=["LABEL"])
    y_train = pd.to_numeric(df_train["LABEL"])

    return df_train, df_test, X_train, X_test, y_train, y_test

# Balance dataset

In [69]:
def balance_train_dataset(df_train):
    count_fake, count_real = df_train["LABEL"].value_counts()
    df_train_fake = df_train[df_train["LABEL"] == 1]
    df_train_real = df_train[df_train["LABEL"] == 0]
    df_train_fake_under = df_train_fake.sample(count_real)
    df_train_under = pd.concat([df_train_fake_under, df_train_real], axis=0)
    
    X_train = df_train_under.drop(columns=["LABEL"])
    y_train = pd.to_numeric(df_train_under["LABEL"])
    
    return X_train, y_train

# Fine-tune the XGBoost model

In [208]:
model = XGBClassifier()
grid = {
    'learning_rate': [0.1, 0.2, 0.3], 
    "max_depth": [4,6,8],
    "n_estimators": [300,350,400]
}
stratifed_cv = StratifiedKFold(n_splits=5)
search = GridSearchCV(model, grid, scoring="accuracy", cv=stratifed_cv, n_jobs=-1)
results = search.fit(X_train, y_train)

In [209]:
results.best_params_

{'learning_rate': 0.3, 'max_depth': 8, 'n_estimators': 350}

In [217]:
model = XGBClassifier()
grid = {
    'learning_rate': [0.3, 0.4, 0.5], 
    "max_depth": [8,9,10],
    "n_estimators": [340,350,360]
}
stratifed_cv = StratifiedKFold(n_splits=5)
search = GridSearchCV(model, grid, scoring="accuracy", cv=stratifed_cv, n_jobs=-1)
results = search.fit(X_train, y_train)

In [218]:
results.best_params_

{'learning_rate': 0.3, 'max_depth': 8, 'n_estimators': 350}

In [219]:
results.best_score_

0.9429177876448158

# Train the optimized model

In [226]:
model = XGBClassifier(learning_rate=0.3, n_estimators=350, max_depth=8)
stratifed_cv = StratifiedKFold(n_splits=5)
results = cross_validate(model, X_train, y_train, scoring="accuracy", cv=stratifed_cv)
results

{'fit_time': array([3.44593453, 3.55758309, 8.16609478, 2.26321936, 2.55106306]),
 'score_time': array([0.01264334, 0.01492858, 0.01552272, 0.01768398, 0.00974298]),
 'test_score': array([0.94274809, 0.95419847, 0.92557252, 0.9321872 , 0.94269341])}

In [227]:
model.fit(X_train, y_train)

In [229]:
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recal:", recall_score(y_test, y_pred))
print("F1 score:", f1_score(y_test, y_pred))

Accuracy: 0.9457744126489255
Precision: 0.9929115955597164
Recal: 0.9447696614914737
F1 score: 0.9682425823280079


# Test our model

In [230]:
path = os.path.join(os.pardir, "raw_data", "DEMONSTRATION", "DEMONSTRATION", "linus-original-DEMO.mp3")
y, sr = librosa.load(path)
df_demo = preprocess_data_2(y,sr,0)
X_demo = df_demo.drop(columns="LABEL")
pd.DataFrame(model.predict(X=X_demo)).value_counts(normalize=True)

0    0.9
1    0.1
Name: proportion, dtype: float64

In [231]:
path = os.path.join(os.pardir, "raw_data", "Tests", "Test_Elise.m4a")
y, sr = librosa.load(path)
df_demo = preprocess_data_2(y,sr,0)
X_demo = df_demo.drop(columns="LABEL")
pd.DataFrame(model.predict(X=X_demo)).value_counts(normalize=True)

  y, sr = librosa.load(path)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


1    0.869565
0    0.130435
Name: proportion, dtype: float64

In [232]:
path = os.path.join(os.pardir, "raw_data", "DEMONSTRATION", "DEMONSTRATION", "linus-to-musk-DEMO.mp3")
y, sr = librosa.load(path)
df_demo = preprocess_data_2(y,sr,0)
X_demo = df_demo.drop(columns="LABEL")
pd.DataFrame(model.predict(X=X_demo)).value_counts(normalize=True)

1    0.983333
0    0.016667
Name: proportion, dtype: float64

In [248]:
path = os.path.join(os.pardir, "raw_data", "Tests", "extracted_segment_10_Taylor Swift Talks Record-Breaking Midnights Album, Music Video Cameos and Easter Eggs.mp3")
y, sr = librosa.load(path)
df_demo = preprocess_data_2(y,sr,0)
X_demo = df_demo.drop(columns="LABEL")
pd.DataFrame(model.predict(X=X_demo)).value_counts(normalize=True)

1    0.6
0    0.4
Name: proportion, dtype: float64

In [249]:
df_demo

Unnamed: 0,chroma_stft,rms,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,LABEL
0,0.292053,0.038542,2010.584703,1867.239351,3359.424383,0.084539,-308.904602,86.453079,-65.826942,-2.092837,...,13.213736,-12.406124,-18.256849,9.554811,-15.841445,-1.546117,-8.398639,-7.858508,-4.631996,0
1,0.351233,0.025897,2572.626816,2081.467753,4320.098877,0.133123,-342.497772,80.942337,-35.553249,-0.984848,...,13.296432,-17.92535,-5.52683,7.933293,-8.905157,-1.367106,-5.830604,-9.14277,-4.655462,0
2,0.39415,0.020241,4099.222928,2381.942366,6598.70328,0.317649,-361.113831,24.59483,-14.183863,2.370954,...,5.830029,-9.994858,6.065957,2.196032,-4.024386,-0.805931,-0.560373,-3.415298,1.057746,0
3,0.331932,0.026483,2790.362132,2359.471001,5519.351474,0.156339,-334.989288,71.666496,-22.854023,2.558429,...,9.511411,-21.480192,-6.450656,9.292726,-14.907086,0.305639,-6.191702,-6.006624,-4.283689,0
4,0.341421,0.027872,2559.285391,2203.269797,4694.482977,0.156428,-296.268585,80.611008,-31.385475,-15.732702,...,-0.936325,-17.39513,1.154679,-5.17802,-10.236477,0.486353,-6.304649,-4.906729,-9.233509,0
5,0.439795,0.022023,4011.507394,2267.611275,6222.851008,0.320268,-327.83255,30.923494,2.058582,-1.478873,...,4.471275,-10.493587,4.003449,0.177397,-8.574155,1.411167,-1.657745,-4.089187,-6.160518,0
6,0.394731,0.023039,1910.523832,1833.062472,3295.55886,0.094327,-313.496063,117.524651,-47.536205,-14.626386,...,13.42651,-18.082443,-4.437365,1.728915,-10.571548,1.077875,-8.462462,1.267839,-7.634736,0
7,0.348295,0.021923,1719.508624,1943.289888,3158.28469,0.082364,-359.918091,123.311058,-12.552262,-13.264886,...,5.738411,-14.626791,0.517582,-0.793604,-14.825812,0.246517,-5.940605,-5.480679,-3.970117,0
8,0.378471,0.02186,3275.605849,2262.507336,5612.091064,0.210249,-334.5737,52.781544,-24.865801,3.969338,...,6.145244,-12.852735,2.355466,0.813338,-8.973125,-0.046496,-3.208203,-1.092529,-1.964465,0
9,0.346378,0.02551,2150.835695,2013.757215,3855.911532,0.125289,-322.814575,109.481255,-37.72213,-23.067293,...,1.863229,-13.919464,-3.135992,-2.157091,-8.926204,-3.260823,-1.973468,-8.9526,-4.319076,0


In [236]:
path = os.path.join(os.pardir, "raw_data", "Tests", "Youssef.wav")
y, sr = librosa.load(path)
df_demo = preprocess_data_2(y,sr,0)
X_demo = df_demo.drop(columns="LABEL")
pd.DataFrame(model.predict(X=X_demo)).value_counts(normalize=True)

1    1.0
Name: proportion, dtype: float64

In [239]:
path = os.path.join(os.pardir, "raw_data", "Tests", "Youssef_2.wav")
y, sr = librosa.load(path)
df_demo = preprocess_data_2(y,sr,0)
X_demo = df_demo.drop(columns="LABEL")
pd.DataFrame(model.predict(X=X_demo)).value_counts(normalize=True)

1    1.0
Name: proportion, dtype: float64

In [240]:
path = os.path.join(os.pardir, "raw_data", "Tests", "Youssef_Eric Cartman.mp3")
y, sr = librosa.load(path)
df_demo = preprocess_data_2(y,sr,0)
X_demo = df_demo.drop(columns="LABEL")
pd.DataFrame(model.predict(X=X_demo)).value_counts(normalize=True)

  return pitch_tuning(


1    1.0
Name: proportion, dtype: float64

In [241]:
path = os.path.join(os.pardir, "raw_data", "Tests", "Youssef_2_Female.mp3")
y, sr = librosa.load(path)
df_demo = preprocess_data_2(y,sr,0)
X_demo = df_demo.drop(columns="LABEL")
pd.DataFrame(model.predict(X=X_demo)).value_counts(normalize=True)

1    1.0
Name: proportion, dtype: float64

In [242]:
path = os.path.join(os.pardir, "raw_data", "Tests", "Morgan Freeman_real.mp3")
y, sr = librosa.load(path)
df_demo = preprocess_data_2(y,sr,0)
X_demo = df_demo.drop(columns="LABEL")
pd.DataFrame(model.predict(X=X_demo)).value_counts(normalize=True)

  return pitch_tuning(


1    0.597701
0    0.402299
Name: proportion, dtype: float64

In [246]:
X_demo = pd.read_csv(os.path.join(os.pardir, "raw_data", "Tests", "extracted_segment_10_Taylor Swift Talks Record-Breaking Midnights Album, Music Video Cameos and Easter Eggs.csv"))
X_demo

Unnamed: 0,chroma_stft,rms,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20
0,0.470935,0.059258,3589.920944,2347.309532,5931.90807,0.272317,-261.1296,74.67704,18.270967,7.238893,...,-10.181029,-0.778232,0.806979,6.729933,-0.798005,0.749729,1.258071,4.768607,-3.838733,1.802546
1,0.348168,0.078241,2023.856436,1993.776797,3844.900235,0.098311,-220.11786,111.94776,-7.605939,40.627792,...,-13.463178,2.044327,-3.555796,-0.544064,1.404545,-4.676535,-2.32046,8.78089,-2.997688,-4.718473
2,0.348726,0.069439,1200.883202,1528.429,2143.532493,0.051991,-261.31635,162.49538,-0.482804,29.772436,...,-12.04773,2.520585,1.551374,5.591324,0.434483,-0.622147,2.380089,3.535395,-1.074344,0.098497
3,0.439243,0.035014,1953.601816,1958.732599,3729.403964,0.098722,-280.75125,131.25885,-7.916832,21.938663,...,3.202434,2.764424,-1.067935,7.193146,4.816433,4.016971,0.705184,6.899977,0.677768,-2.142981
4,0.432548,0.065496,1719.14239,1772.658174,3074.109442,0.080211,-258.32434,141.1879,-17.30743,19.037336,...,-8.798536,2.278147,0.51841,10.369609,2.086153,-3.065268,4.038606,2.917625,-3.248699,0.680514
5,0.437033,0.043098,1514.928781,1609.51498,2820.115523,0.063077,-294.8097,156.0971,-22.933355,20.246971,...,1.300806,4.573141,4.311993,11.385888,7.164671,2.68354,1.340205,10.217193,2.709661,2.730037
6,0.528887,0.028137,1535.085987,1790.471841,3090.748735,0.068448,-331.03186,138.8743,-20.648512,23.201326,...,5.077175,3.915003,4.768389,8.504746,1.135159,1.988425,-0.0494,3.8828,0.209928,-3.238527
7,0.402212,0.080992,2159.247009,1989.368595,4009.5803,0.10386,-215.87135,116.37537,-20.949364,17.50782,...,-9.070487,3.252617,2.017319,4.51662,-0.075407,1.144051,-0.266043,3.684124,0.099339,-3.277136
8,0.396729,0.063704,2069.57138,1946.995748,3934.70348,0.091431,-238.59186,126.6338,-13.024871,27.830498,...,-10.981999,1.31897,-1.697661,-0.6736,2.053971,1.183913,-0.039037,6.783774,-0.141546,0.638278
9,0.398887,0.043221,1761.637458,1831.495023,3393.192361,0.067805,-309.46222,122.819244,-13.810925,28.456377,...,-4.454094,2.786302,1.433321,4.284021,1.454062,-0.258384,3.58346,2.995882,0.792018,4.803803


In [247]:
pd.DataFrame(model.predict(X=X_demo)).value_counts(normalize=True)

0    0.9
1    0.1
Name: proportion, dtype: float64

In [254]:
X_demo = pd.read_csv(os.path.join(os.pardir, "raw_data", "Tests", "extracted_segment_10_extracted_segment_Taylor Swift Talks Record-Breaking Midnights Album, Music Video Cameos and Easter Eggs.csv"))
pd.DataFrame(model.predict(X=X_demo)).value_counts(normalize=True)

1    0.7
0    0.3
Name: proportion, dtype: float64

In [252]:
X_demo = pd.read_csv(os.path.join(os.pardir, "raw_data", "Tests", "audio_features_biden.csv")).drop(columns=["origin_sample","LABEL"])
pd.DataFrame(model.predict(X=X_demo)).value_counts(normalize=True)

0    0.994444
1    0.005556
Name: proportion, dtype: float64

In [253]:
X_demo = pd.read_csv(os.path.join(os.pardir, "raw_data", "Tests", "audio_features_biden_AI.csv")).drop(columns=["origin_sample","LABEL"])
pd.DataFrame(model.predict(X=X_demo)).value_counts(normalize=True)

1    0.775
0    0.225
Name: proportion, dtype: float64

# Prepare new audios

In [257]:
from pydub import AudioSegment 

In [261]:
song = AudioSegment.from_file(os.path.join(os.pardir, "raw_data", "additional_mp3", "Daniel Levitin.mp3"), 
                              format="mp3") 

start = 27*1000
end = (60+24)*1000

segment = song[start:end] 

segment.export(os.path.join(os.pardir, "raw_data", "additional_mp3", "cut_audios", "Daniel_Levitin_1.mp3"), 
                        format="mp3") 

<_io.BufferedRandom name='../raw_data/additional_mp3/cut_audios/Daniel_Levitin_1.mp3'>

In [262]:
song = AudioSegment.from_file(os.path.join(os.pardir, "raw_data", "additional_mp3", "Pamela Meyer.mp3"), 
                              format="mp3")
start = (2*60+12)*1000
end = (3*60+3)*1000

segment = song[start:end] 

segment.export(os.path.join(os.pardir, "raw_data", "additional_mp3", "cut_audios", "Pamela_Meyer_1.mp3"), 
                        format="mp3") 

<_io.BufferedRandom name='../raw_data/additional_mp3/cut_audios/Pamela_Meyer_1.mp3'>

In [263]:
song = AudioSegment.from_file(os.path.join(os.pardir, "raw_data", "additional_mp3", "Wendy LeBorgne.mp3"), 
                              format="mp3")
start = (34)*1000
end = (1*60+50)*1000

segment = song[start:end] 

segment.export(os.path.join(os.pardir, "raw_data", "additional_mp3", "cut_audios", "Wendy_LeBorgne_1.mp3"), 
                        format="mp3") 

<_io.BufferedRandom name='../raw_data/additional_mp3/cut_audios/Wendy_LeBorgne_1.mp3'>

In [264]:
song = AudioSegment.from_file(os.path.join(os.pardir, "raw_data", "additional_mp3", "Matt Abrahams.mp3"), 
                              format="mp3")
start = (38)*1000
end = (4*60+34)*1000

segment = song[start:end] 

segment.export(os.path.join(os.pardir, "raw_data", "additional_mp3", "cut_audios", "Matt_Abrahams_1.mp3"), 
                        format="mp3") 

<_io.BufferedRandom name='../raw_data/additional_mp3/cut_audios/Matt_Abrahams_1.mp3'>

In [265]:
song = AudioSegment.from_file(os.path.join(os.pardir, "raw_data", "additional_mp3", "Sarah Knight.mp3"), 
                              format="mp3")
start = (1*60+28)*1000
end = (5*60+15)*1000

segment = song[start:end] 

segment.export(os.path.join(os.pardir, "raw_data", "additional_mp3", "cut_audios", "Sarah_Knight_1.mp3"), 
                        format="mp3") 

<_io.BufferedRandom name='../raw_data/additional_mp3/cut_audios/Sarah_Knight_1.mp3'>

In [267]:
song = AudioSegment.from_file(os.path.join(os.pardir, "raw_data", "additional_mp3", "Frederik Imbo.mp3"), 
                              format="mp3")
start = (25)*1000
end = (7*60+26)*1000

segment = song[start:end] 

segment.export(os.path.join(os.pardir, "raw_data", "additional_mp3", "cut_audios", "Frederik_Imbo_1.mp3"), 
                        format="mp3") 

<_io.BufferedRandom name='../raw_data/additional_mp3/cut_audios/Frederik_Imbo_1.mp3'>

In [268]:
song = AudioSegment.from_file(os.path.join(os.pardir, "raw_data", "additional_mp3", "Tia Graham.mp3"), 
                              format="mp3")
start = (37)*1000
end = (5*60+11)*1000

segment = song[start:end] 

segment.export(os.path.join(os.pardir, "raw_data", "additional_mp3", "cut_audios", "Tia_Graham_1.mp3"), 
                        format="mp3") 

<_io.BufferedRandom name='../raw_data/additional_mp3/cut_audios/Tia_Graham_1.mp3'>

In [None]:
song = AudioSegment.from_file(os.path.join(os.pardir, "raw_data", "additional_mp3", "Tia Graham.mp3"), 
                              format="mp3")
start = (37)*1000
end = (5*60+11)*1000

segment = song[start:end] 

segment.export(os.path.join(os.pardir, "raw_data", "additional_mp3", "cut_audios", "Tia_Graham_1.mp3"), 
                        format="mp3") 

In [271]:
song = AudioSegment.from_file(os.path.join(os.pardir, "raw_data", "additional_mp3", "Daniel Levitin.mp3"), 
                              format="mp3") 

start = (2*60)*1000
end = (11*60+24)*1000

segment = song[start:end] 

segment.export(os.path.join(os.pardir, "raw_data", "additional_mp3", "cut_audios", "Daniel_Levitin_2.mp3"), 
                        format="mp3") 

<_io.BufferedRandom name='../raw_data/additional_mp3/cut_audios/Daniel_Levitin_2.mp3'>

In [280]:
song = AudioSegment.from_file(os.path.join(os.pardir, "raw_data", "additional_mp3", "Pamela Meyer.mp3"), 
                              format="mp3")
start = (4*60+12)*1000
end = (17*60)*1000

segment = song[start:end] 

segment.export(os.path.join(os.pardir, "raw_data", "additional_mp3", "cut_audios", "Pamela_Meyer_2.mp3"), 
                        format="mp3") 

<_io.BufferedRandom name='../raw_data/additional_mp3/cut_audios/Pamela_Meyer_2.mp3'>

In [273]:
song = AudioSegment.from_file(os.path.join(os.pardir, "raw_data", "additional_mp3", "Wendy LeBorgne.mp3"), 
                              format="mp3")
start = (2*60)*1000
end = (12*60)*1000

segment = song[start:end] 

segment.export(os.path.join(os.pardir, "raw_data", "additional_mp3", "cut_audios", "Wendy_LeBorgne_2.mp3"), 
                        format="mp3") 

<_io.BufferedRandom name='../raw_data/additional_mp3/cut_audios/Wendy_LeBorgne_2.mp3'>

In [274]:
song = AudioSegment.from_file(os.path.join(os.pardir, "raw_data", "additional_mp3", "Matt Abrahams.mp3"), 
                              format="mp3")
start = (5*60)*1000
end = (13*60)*1000

segment = song[start:end] 

segment.export(os.path.join(os.pardir, "raw_data", "additional_mp3", "cut_audios", "Matt_Abrahams_2.mp3"), 
                        format="mp3") 

<_io.BufferedRandom name='../raw_data/additional_mp3/cut_audios/Matt_Abrahams_2.mp3'>

In [275]:
song = AudioSegment.from_file(os.path.join(os.pardir, "raw_data", "additional_mp3", "Ruairi Robertson.mp3"), 
                              format="mp3")
start = (1*60+20)*1000
end = (14*60)*1000

segment = song[start:end] 

segment.export(os.path.join(os.pardir, "raw_data", "additional_mp3", "cut_audios", "Ruairi_Robertson_1.mp3"), 
                        format="mp3") 

<_io.BufferedRandom name='../raw_data/additional_mp3/cut_audios/Ruairi_Robertson_1.mp3'>

In [276]:
song = AudioSegment.from_file(os.path.join(os.pardir, "raw_data", "additional_mp3", "Rangan Chatterjee.mp3"), 
                              format="mp3")
start = (1*60+20)*1000
end = (14*60)*1000

segment = song[start:end] 

segment.export(os.path.join(os.pardir, "raw_data", "additional_mp3", "cut_audios", "Rangan_Chatterjee_1.mp3"), 
                        format="mp3") 

<_io.BufferedRandom name='../raw_data/additional_mp3/cut_audios/Rangan_Chatterjee_1.mp3'>

In [277]:
song = AudioSegment.from_file(os.path.join(os.pardir, "raw_data", "additional_mp3", "Danna Pycher.mp3"), 
                              format="mp3")
start = (35)*1000
end = (16*60)*1000

segment = song[start:end] 

segment.export(os.path.join(os.pardir, "raw_data", "additional_mp3", "cut_audios", "Danna_Pycher_1.mp3"), 
                        format="mp3") 

<_io.BufferedRandom name='../raw_data/additional_mp3/cut_audios/Danna_Pycher_1.mp3'>

In [278]:
song = AudioSegment.from_file(os.path.join(os.pardir, "raw_data", "additional_mp3", "Elin Kjos.mp3"), 
                              format="mp3")
start = (60)*1000
end = (12*60)*1000

segment = song[start:end] 

segment.export(os.path.join(os.pardir, "raw_data", "additional_mp3", "cut_audios", "Elin_Kjos_1.mp3"), 
                        format="mp3") 

<_io.BufferedRandom name='../raw_data/additional_mp3/cut_audios/Elin_Kjos_1.mp3'>

# Create a new dataset with additional data

In [281]:
new_df_1 = initialize_empty_df()
path_folder = os.path.join(os.pardir, "raw_data", "additional_mp3", "cut_audios")
for root, dirs, files in os.walk(path_folder):
    for _file in files:
        if _file.endswith(".mp3"):
            audio = _file
            path = os.path.join(path_folder, audio)
            y, sr = librosa.load(path)
            df_indiv = preprocess_data_2(y, sr, 0)
            new_df_1 = pd.concat([new_df_1, df_indiv], ignore_index=True)
            print(f"File '{audio}' ok")
new_df_1.shape

  return pitch_tuning(
  new_df_1 = pd.concat([new_df_1, df_indiv], ignore_index=True)


File 'Sarah_Knight_1.mp3' ok
File 'Pamela_Meyer_2.mp3' ok
File 'Daniel_Levitin_1.mp3' ok
File 'Pamela_Meyer_1.mp3' ok
File 'Danna_Pycher_1.mp3' ok
File 'Ruairi_Robertson_1.mp3' ok
File 'Matt_Abrahams_1.mp3' ok
File 'Rangan_Chatterjee_1.mp3' ok
File 'Daniel_Levitin_2.mp3' ok
File 'Elin_Kjos_1.mp3' ok
File 'Wendy_LeBorgne_2.mp3' ok
File 'Matt_Abrahams_2.mp3' ok
File 'Tia_Graham_1.mp3' ok
File 'Frederik_Imbo_1.mp3' ok
File 'Wendy_LeBorgne_1.mp3' ok


(6859, 27)

In [282]:
new_df_1.to_csv("additional_data_elise.csv")

In [283]:
df_youssef = pd.read_csv(os.path.join(os.pardir, "Extra_features.csv"))

In [284]:
full_df = pd.concat([df, new_df_1], ignore_index=True)
full_df = pd.concat([full_df, df_youssef], ignore_index=True)
full_df.shape

(42084, 27)

In [285]:
full_df.to_csv("full_df.csv")

# Exploration 08-12-2023

In [48]:
full_df = pd.read_csv(os.path.join(os.pardir, "full_df_2023-12-07.csv"))
full_df

Unnamed: 0,chroma_stft,rms,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,LABEL
0,0.306480,0.076295,1413.467433,1839.001565,2703.151079,0.053345,-232.30121,133.165650,-22.955957,8.888358,...,2.394415,-3.317231,-14.874663,-4.914529,-1.474122,-10.153106,-9.911881,-3.817368,-10.349003,FAKE
1,0.477024,0.006032,2639.868324,2527.274422,5466.497248,0.117154,-383.07986,90.157790,-8.644286,15.567981,...,0.886504,-8.037273,-2.922858,-8.647303,-2.993608,-6.740992,-4.170974,-7.415453,-4.745568,FAKE
2,0.423958,0.054416,1729.632773,2022.347548,3623.206121,0.079767,-287.24896,125.231820,-8.775691,22.783344,...,-2.435866,-9.924732,-2.930283,-7.618189,-2.912869,-5.278629,-3.483856,-4.521770,-0.359239,FAKE
3,0.423640,0.038472,2266.627450,2006.521778,4112.597101,0.118597,-265.62040,103.924530,-9.073397,43.439182,...,-3.888492,-8.790924,2.843195,-3.305686,-1.018029,-0.356833,0.381062,-4.544253,0.707247,FAKE
4,0.502441,0.036537,2135.115673,2027.611411,4083.723034,0.128285,-310.34613,113.192940,-28.261267,12.852004,...,-4.229133,-6.774048,5.878327,-2.639606,-0.157469,-2.179182,-3.610251,-6.534174,-3.241210,FAKE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42079,0.314406,0.046116,2698.440901,2535.992561,5354.182018,0.149414,-255.56259,87.417946,4.433178,13.824187,...,-0.741367,-4.572025,14.577326,-4.435467,-6.427833,4.626421,-0.856710,-12.452103,2.811413,REAL
42080,0.370378,0.056801,2955.994101,2328.057783,5294.231623,0.190552,-232.96104,76.125670,-1.108529,18.675050,...,2.659945,-9.495482,8.700753,-1.669142,-10.515453,0.587301,4.574445,-8.807771,4.212809,REAL
42081,0.309927,0.076453,1884.453339,2071.263641,3594.332053,0.091908,-237.58550,107.939300,-22.380022,18.590910,...,6.337625,-7.541246,11.250736,-8.606643,-7.723567,1.479188,-9.126733,-8.543749,0.512092,REAL
42082,0.385543,0.030032,2407.104292,2433.123626,4915.932395,0.120594,-269.90726,91.986984,9.712902,21.076706,...,2.552168,-6.178091,6.619003,-7.936121,-7.672117,0.772831,0.610292,-10.339297,0.709354,REAL


### Label encoding

In [49]:
full_df["LABEL"].value_counts()

LABEL
FAKE    26194
REAL    15890
Name: count, dtype: int64

In [50]:
full_df["LABEL"] = full_df["LABEL"].map({"FAKE":1, "REAL":0})

In [51]:
full_df["LABEL"].value_counts()

LABEL
1    26194
0    15890
Name: count, dtype: int64

### Load ASV DF  audios labels

In [27]:
# column for trial name
df_name_trial = 'trial'
# column for key (bonafide/spoof)
df_name_label = 'label'
# column for the (w/ non-speech and w/o non-speech)
df_name_trim = 'trim'
# column for subset (progress, eval, hidden, ...)
df_name_subset = 'subset'
# column for score
df_name_score  = 'score'
# column for compression
df_name_compr = 'compression'
# column for data source
df_name_source = 'source'
# column for vocoder type
df_name_vocoder = 'vocoder'
# column for spoofing attack
df_name_attack = 'attack'
# column for speaker ID
df_name_speaker = 'speaker'

# value of the pooled condition
df_pooled_tag = 'Pooled'

# the order of column should not be changed
df_protocol_names = [df_name_speaker, df_name_trial,
                     df_name_compr, df_name_source, df_name_attack, df_name_label,
                     df_name_trim, df_name_subset, df_name_vocoder, 
                     'task', 'team', 'gender-pair', 'language']

def load_protocol(protocol_file, names, sep=' ', index_col=None):
    pd_protocol = pd.read_csv(protocol_file, sep=' ', names=names, 
                                  index_col = index_col, skipinitialspace=True)
    return pd_protocol

In [28]:
df_protocol_file = os.path.join(os.pardir, "raw_data", "ASV", "trial_metadata.txt")
df_protocol_pd = load_protocol(df_protocol_file, names = df_protocol_names, index_col = df_name_trial)

In [29]:
df_protocol_pd = df_protocol_pd[["label"]]
df_protocol_pd["label"] = df_protocol_pd["label"].map({"spoof":1, "bonafide":0})
df_protocol_pd

Unnamed: 0_level_0,label
trial,Unnamed: 1_level_1
DF_E_2000011,1
DF_E_2000013,1
DF_E_2000024,1
DF_E_2000026,1
DF_E_2000027,1
...,...
DF_E_4999945,1
DF_E_4999962,1
DF_E_4999964,1
DF_E_4999980,1


In [30]:
df_protocol_pd["label"].value_counts()

label
1    589212
0     22617
Name: count, dtype: int64

### Extract features from ASV audios

In [42]:
%time
mfcc_columns = [f"mfcc{i+1}" for i in range(20)]
df = initialize_empty_df()
df_columns = df.columns
df_columns

new_df_2 = initialize_empty_df()
path_folder = os.path.join(os.pardir, "raw_data", "ASV", "flac")
for root, dirs, files in os.walk(path_folder):
    for _file in tqdm(files[:20000]):
        if _file.endswith(".flac"):
            audio = _file
            path = os.path.join(path_folder, audio)
            y, sr = librosa.load(path)
            file = _file.split(".")[0]
            label = df_protocol_pd.loc[file, "label"]
            df_indiv = preprocess_data_2(y, sr, int(label))
            new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
            # print(f"File '{file}', label {label} ok")
new_df_2.shape

CPU times: user 1e+03 ns, sys: 3 µs, total: 4 µs
Wall time: 9.3 µs


  0%|          | 0/20000 [00:00<?, ?it/s]

  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  return pitch_tuning(
  new_df_2 = pd.c

(25035, 27)

In [43]:
for root, dirs, files in os.walk(path_folder):
    for _file in tqdm(files[20000:40000]):
        if _file.endswith(".flac"):
            audio = _file
            path = os.path.join(path_folder, audio)
            y, sr = librosa.load(path)
            file = _file.split(".")[0]
            label = df_protocol_pd.loc[file, "label"]
            df_indiv = preprocess_data_2(y, sr, int(label))
            new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
            # print(f"File '{file}', label {label} ok")
new_df_2.shape

  0%|          | 0/20000 [00:00<?, ?it/s]

  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  return pitch_tuning(
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  return pitch_tuning(
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  new_df_2 = pd.concat([new_df_2, df_indiv], ignore_index=True)
  return pitch_tuning(
  new_df_2 = pd.concat([new_df_2, d

(50014, 27)

In [None]:
from joblib import Parallel, delayed
n_proc = 8. # mon nbre de cpu dans ma machine
r = Parallel(n_jobs=n_proc)(delayed(make_features_df)([sound]) for sound in sound_files_path).  
# make_features_df est ma fonction qui retourne un df avec le son extrait et sound_files_path c’est une liste avec la liste de path 
# pour tout les sounds dans le repertoire.

In [45]:
new_df_2.LABEL.value_counts()

LABEL
1    48049
0     1965
Name: count, dtype: int64

In [52]:
full_df

Unnamed: 0,chroma_stft,rms,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,LABEL
0,0.306480,0.076295,1413.467433,1839.001565,2703.151079,0.053345,-232.30121,133.165650,-22.955957,8.888358,...,2.394415,-3.317231,-14.874663,-4.914529,-1.474122,-10.153106,-9.911881,-3.817368,-10.349003,1
1,0.477024,0.006032,2639.868324,2527.274422,5466.497248,0.117154,-383.07986,90.157790,-8.644286,15.567981,...,0.886504,-8.037273,-2.922858,-8.647303,-2.993608,-6.740992,-4.170974,-7.415453,-4.745568,1
2,0.423958,0.054416,1729.632773,2022.347548,3623.206121,0.079767,-287.24896,125.231820,-8.775691,22.783344,...,-2.435866,-9.924732,-2.930283,-7.618189,-2.912869,-5.278629,-3.483856,-4.521770,-0.359239,1
3,0.423640,0.038472,2266.627450,2006.521778,4112.597101,0.118597,-265.62040,103.924530,-9.073397,43.439182,...,-3.888492,-8.790924,2.843195,-3.305686,-1.018029,-0.356833,0.381062,-4.544253,0.707247,1
4,0.502441,0.036537,2135.115673,2027.611411,4083.723034,0.128285,-310.34613,113.192940,-28.261267,12.852004,...,-4.229133,-6.774048,5.878327,-2.639606,-0.157469,-2.179182,-3.610251,-6.534174,-3.241210,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42079,0.314406,0.046116,2698.440901,2535.992561,5354.182018,0.149414,-255.56259,87.417946,4.433178,13.824187,...,-0.741367,-4.572025,14.577326,-4.435467,-6.427833,4.626421,-0.856710,-12.452103,2.811413,0
42080,0.370378,0.056801,2955.994101,2328.057783,5294.231623,0.190552,-232.96104,76.125670,-1.108529,18.675050,...,2.659945,-9.495482,8.700753,-1.669142,-10.515453,0.587301,4.574445,-8.807771,4.212809,0
42081,0.309927,0.076453,1884.453339,2071.263641,3594.332053,0.091908,-237.58550,107.939300,-22.380022,18.590910,...,6.337625,-7.541246,11.250736,-8.606643,-7.723567,1.479188,-9.126733,-8.543749,0.512092,0
42082,0.385543,0.030032,2407.104292,2433.123626,4915.932395,0.120594,-269.90726,91.986984,9.712902,21.076706,...,2.552168,-6.178091,6.619003,-7.936121,-7.672117,0.772831,0.610292,-10.339297,0.709354,0


In [53]:
full_df = pd.concat([full_df, new_df_2], ignore_index=True)

In [54]:
full_df.shape

(92098, 27)

In [55]:
full_df.LABEL.value_counts()

LABEL
1    74243
0    17855
Name: count, dtype: int64

In [56]:
full_df.to_csv("full_df_Elise.csv", index=False)

In [57]:
full_df.to_csv("new_df_ASV_DF_Elise.csv", index=False)

In [59]:
df_youssef = pd.read_csv(os.path.join(os.pardir, "Asvc_2019_features.csv"))
df_youssef

Unnamed: 0,chroma_stft,rms,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,LABEL
0,0.581642,0.003070,2199.394712,1951.288198,4489.428156,0.101729,-465.42130,125.339874,-69.007560,70.611090,...,-8.866219,7.612215,-12.527738,9.243306,-17.393263,6.024668,-14.883923,-1.570087,-11.686265,FAKE
1,0.325574,0.093229,2345.454133,1599.777611,3984.376665,0.152854,-236.49667,100.673485,-32.507786,49.088180,...,-10.290752,-2.104480,-12.366513,-0.074342,-20.750038,-0.219439,-13.774378,-12.227905,-15.365830,FAKE
2,0.315612,0.107650,1792.799111,1426.987016,3165.870250,0.124190,-233.42374,128.119860,-33.989710,45.122196,...,-4.690394,-8.381293,-15.633381,-2.263502,-20.402868,-5.021841,-14.528839,-10.487343,-13.769796,FAKE
3,0.539373,0.001211,1397.669671,1798.662060,3206.245006,0.058805,-601.98300,157.632460,-29.607780,64.629550,...,-18.353567,-2.086941,-21.357689,1.418289,-19.528841,2.797719,-17.868378,-0.484152,-12.906013,FAKE
4,0.309486,0.177945,1369.867761,1431.153215,2583.005593,0.078857,-275.83110,104.976555,-20.991016,46.207660,...,-12.673950,-6.094771,-14.902205,0.231349,-26.601341,-16.148132,-9.064847,-9.427711,-17.687447,FAKE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73828,0.324854,0.171201,1960.383430,1530.253407,3517.497670,0.119573,-194.51631,107.887276,-35.448288,51.141476,...,-6.103287,-7.861819,-19.831623,-4.822103,-20.312162,-5.467866,-23.543077,-9.692486,-11.387695,REAL
73829,0.562111,0.001450,2005.104524,2193.143056,4906.144576,0.074984,-578.06100,126.731720,-30.722902,62.511670,...,-9.140802,3.760223,-21.480995,7.100119,-16.126696,3.521853,-13.677375,0.179337,-8.679742,REAL
73830,0.627150,0.000838,2497.242070,2130.631249,5123.434171,0.135254,-573.36584,118.842740,-60.091100,79.257290,...,-5.516846,8.238003,-15.756979,9.991550,-19.251009,3.247071,-17.782036,1.652009,-9.311823,REAL
73831,0.396236,0.079984,2030.308931,1653.855364,3667.740700,0.127608,-306.14417,103.021150,-33.476425,19.177876,...,-23.228844,-4.171561,-6.947437,-1.841715,-10.318977,-8.007598,-15.367290,-6.294384,-6.322925,REAL


In [60]:
full_df_2 = pd.concat([full_df, df_youssef], ignore_index=True)
full_df_2

Unnamed: 0,chroma_stft,rms,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,LABEL
0,0.306480,0.076295,1413.467433,1839.001565,2703.151079,0.053345,-232.30121,133.165650,-22.955957,8.888358,...,2.394415,-3.317231,-14.874663,-4.914529,-1.474122,-10.153106,-9.911881,-3.817368,-10.349003,1
1,0.477024,0.006032,2639.868324,2527.274422,5466.497248,0.117154,-383.07986,90.157790,-8.644286,15.567981,...,0.886504,-8.037273,-2.922858,-8.647303,-2.993608,-6.740992,-4.170974,-7.415453,-4.745568,1
2,0.423958,0.054416,1729.632773,2022.347548,3623.206121,0.079767,-287.24896,125.231820,-8.775691,22.783344,...,-2.435866,-9.924732,-2.930283,-7.618189,-2.912869,-5.278629,-3.483856,-4.521770,-0.359239,1
3,0.423640,0.038472,2266.627450,2006.521778,4112.597101,0.118597,-265.62040,103.924530,-9.073397,43.439182,...,-3.888492,-8.790924,2.843195,-3.305686,-1.018029,-0.356833,0.381062,-4.544253,0.707247,1
4,0.502441,0.036537,2135.115673,2027.611411,4083.723034,0.128285,-310.34613,113.192940,-28.261267,12.852004,...,-4.229133,-6.774048,5.878327,-2.639606,-0.157469,-2.179182,-3.610251,-6.534174,-3.241210,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
165926,0.324854,0.171201,1960.383430,1530.253407,3517.497670,0.119573,-194.51631,107.887276,-35.448288,51.141476,...,-6.103287,-7.861819,-19.831623,-4.822103,-20.312162,-5.467866,-23.543077,-9.692486,-11.387695,REAL
165927,0.562111,0.001450,2005.104524,2193.143056,4906.144576,0.074984,-578.06100,126.731720,-30.722902,62.511670,...,-9.140802,3.760223,-21.480995,7.100119,-16.126696,3.521853,-13.677375,0.179337,-8.679742,REAL
165928,0.627150,0.000838,2497.242070,2130.631249,5123.434171,0.135254,-573.36584,118.842740,-60.091100,79.257290,...,-5.516846,8.238003,-15.756979,9.991550,-19.251009,3.247071,-17.782036,1.652009,-9.311823,REAL
165929,0.396236,0.079984,2030.308931,1653.855364,3667.740700,0.127608,-306.14417,103.021150,-33.476425,19.177876,...,-23.228844,-4.171561,-6.947437,-1.841715,-10.318977,-8.007598,-15.367290,-6.294384,-6.322925,REAL


In [61]:
full_df = full_df_2
full_df.LABEL.value_counts()

LABEL
1       74243
FAKE    66160
0       17855
REAL     7673
Name: count, dtype: int64

In [62]:
full_df.LABEL = full_df.LABEL.map({1:1, "FAKE":1, 0:0, "REAL":0})

In [63]:
full_df.LABEL.value_counts()

LABEL
1    140403
0     25528
Name: count, dtype: int64

In [64]:
full_df.to_csv("full_df_2023-12-08.csv", index=False)

### Split the dataset

In [70]:
df_train, df_test, X_train, X_test, y_train, y_test = split_dataset(full_df)

### Balance the dataset

In [71]:
X_train, y_train = balance_train_dataset(df_train)

In [91]:
X_train

Unnamed: 0,chroma_stft,rms,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20
11653,0.477532,0.027035,1614.983626,2017.073710,2913.833896,0.075151,-341.177150,127.203735,-10.553321,10.803658,...,8.891631,0.857336,1.255024,-3.669160,-3.835343,-2.513381,-1.570176,4.108837,-1.686249,-2.756621
9465,0.422689,0.023700,2656.297426,2593.268466,5260.953036,0.110973,-304.486480,73.967450,24.907799,34.453312,...,1.033165,6.890780,0.548442,1.110733,-4.258731,-1.373678,-2.184253,4.391459,-0.355663,4.627751
111008,0.378056,0.099768,1697.062777,1356.171149,2823.296564,0.102750,-248.943220,135.837720,-23.877739,38.069880,...,-2.665796,-1.500853,8.545974,-19.880270,9.377654,-15.221759,-20.789354,-3.916131,-8.012980,-11.924155
126857,0.384755,0.107763,1551.690535,1595.090177,3019.787043,0.083363,-257.969300,132.456000,-12.429859,61.994144,...,-21.463804,-8.011512,3.717287,-17.876318,11.538120,-16.521610,-10.718972,-9.125284,-6.944127,-10.354171
98196,0.442243,0.138534,2059.866836,1776.184399,4036.007413,0.099987,-189.404650,124.411674,-31.691858,63.304440,...,-2.752812,-7.943760,2.767408,-19.627195,0.805653,-8.442031,3.677862,-7.429212,4.583130,-11.674087
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37573,0.482663,0.140116,1771.387178,2374.584327,3970.429022,0.041437,-294.902070,96.797740,20.853792,21.472643,...,-2.725092,8.961302,-5.463959,-3.231910,-2.938956,1.134794,-3.145010,3.715192,-5.987789,-0.173123
52909,0.535818,0.000823,1152.323188,1578.983622,2197.854892,0.053023,-715.720276,140.822021,-18.378025,70.304657,...,-7.746841,-4.754834,7.336418,-8.095177,2.011306,-13.797516,-3.372884,-12.406567,4.668414,-5.757507
164803,0.469825,0.030890,1280.439449,1907.972312,3248.822021,0.021407,-483.072080,81.051200,26.703552,33.418896,...,0.597287,-0.476696,2.779343,-1.231575,1.036636,0.184280,2.938758,-1.757240,-1.227051,-3.456279
38203,0.433000,0.035808,1706.272933,2374.854314,3869.369784,0.037409,-396.156250,98.983960,15.294168,23.767675,...,0.860912,-1.284868,-2.689160,6.976188,2.192620,1.701388,0.414259,3.986426,-4.603878,-5.299011


In [92]:
y_train.value_counts()

LABEL
1    17869
0    17869
Name: count, dtype: int64

### Fine-tune XGBoost

In [72]:
model = XGBClassifier()
grid = {
    'learning_rate': [0.1, 0.2, 0.3], 
    "max_depth": [4,6,8],
    "n_estimators": [300,350,400]
}
stratifed_cv = StratifiedKFold(n_splits=5)
search = GridSearchCV(model, grid, scoring="accuracy", cv=stratifed_cv, n_jobs=-1)
results = search.fit(X_train, y_train)

In [73]:
results.best_params_

{'learning_rate': 0.2, 'max_depth': 8, 'n_estimators': 400}

In [74]:
model = XGBClassifier()
grid = {
    'learning_rate': [0.15, 0.2, 0.25], 
    "max_depth": [8,9,10],
    "n_estimators": [400,450,500]
}
stratifed_cv = StratifiedKFold(n_splits=5)
search = GridSearchCV(model, grid, scoring="accuracy", cv=stratifed_cv, n_jobs=-1)
results = search.fit(X_train, y_train)
results.best_params_

{'learning_rate': 0.15, 'max_depth': 10, 'n_estimators': 500}

In [75]:
model = XGBClassifier(learning_rate=0.15)
grid = {
    "max_depth": [10,12,14],
    "n_estimators": [500, 550, 600]
}
stratifed_cv = StratifiedKFold(n_splits=5)
search = GridSearchCV(model, grid, scoring="accuracy", cv=stratifed_cv, n_jobs=-1)
results = search.fit(X_train, y_train)
results.best_params_

{'max_depth': 10, 'n_estimators': 600}

In [76]:
model = XGBClassifier(learning_rate=0.15, max_depth=10)
grid = {
    "n_estimators": [600, 800, 1000]
}
stratifed_cv = StratifiedKFold(n_splits=5)
search = GridSearchCV(model, grid, scoring="accuracy", cv=stratifed_cv, n_jobs=-1)
results = search.fit(X_train, y_train)
results.best_params_

{'n_estimators': 1000}

In [77]:
model = XGBClassifier(learning_rate=0.15, max_depth=10)
grid = {
    "n_estimators": [1000, 1500, 2000]
}
stratifed_cv = StratifiedKFold(n_splits=5)
search = GridSearchCV(model, grid, scoring="accuracy", cv=stratifed_cv, n_jobs=-1)
results = search.fit(X_train, y_train)
results.best_params_

{'n_estimators': 2000}

In [78]:
results.best_score_

0.8905365218335648

In [79]:
model = XGBClassifier(learning_rate=0.15, max_depth=10)
grid = {
    "n_estimators": [2000,2500,3000]
}
stratifed_cv = StratifiedKFold(n_splits=5)
search = GridSearchCV(model, grid, scoring="accuracy", cv=stratifed_cv, n_jobs=-1)
results = search.fit(X_train, y_train)
results.best_params_

{'n_estimators': 2500}

In [80]:
results.best_score_

0.8908442767436633

### Train the optimized model

In [81]:
model = XGBClassifier(learning_rate=0.15, max_depth=10, n_estimators=2500)
stratifed_cv = StratifiedKFold(n_splits=5)
results = cross_validate(model, X_train, y_train, scoring="accuracy", cv=stratifed_cv)
results

{'fit_time': array([204.87771034, 267.6812706 , 515.30170584, 420.24430299,
        435.72747397]),
 'score_time': array([0.35405231, 0.35624099, 0.68009591, 0.43883586, 0.34414697]),
 'test_score': array([0.89661444, 0.89297706, 0.89017907, 0.88904435, 0.88540646])}

In [82]:
model.fit(X_train, y_train)

In [83]:
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recal:", recall_score(y_test, y_pred))
print("F1 score:", f1_score(y_test, y_pred))

Accuracy: 0.9067296102852551
Precision: 0.9759715519431039
Recal: 0.9122290543909214
F1 score: 0.943024383060706


### Test with audios

In [84]:
path = os.path.join(os.pardir, "raw_data", "DEMONSTRATION", "DEMONSTRATION", "linus-original-DEMO.mp3")
y, sr = librosa.load(path)
df_demo = preprocess_data_2(y,sr,0)
X_demo = df_demo.drop(columns="LABEL")
pd.DataFrame(model.predict(X=X_demo)).value_counts(normalize=True)

0    1.0
Name: proportion, dtype: float64

In [85]:
path = os.path.join(os.pardir, "raw_data", "Tests", "Test_Elise.m4a")
y, sr = librosa.load(path)
df_demo = preprocess_data_2(y,sr,0)
X_demo = df_demo.drop(columns="LABEL")
pd.DataFrame(model.predict(X=X_demo)).value_counts(normalize=True)

  y, sr = librosa.load(path)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


0    0.73913
1    0.26087
Name: proportion, dtype: float64

In [86]:
path = os.path.join(os.pardir, "raw_data", "DEMONSTRATION", "DEMONSTRATION", "linus-to-musk-DEMO.mp3")
y, sr = librosa.load(path)
df_demo = preprocess_data_2(y,sr,0)
X_demo = df_demo.drop(columns="LABEL")
pd.DataFrame(model.predict(X=X_demo)).value_counts(normalize=True)

1    0.95
0    0.05
Name: proportion, dtype: float64

In [87]:
path = os.path.join(os.pardir, "raw_data", "Tests", "extracted_segment_10_Taylor Swift Talks Record-Breaking Midnights Album, Music Video Cameos and Easter Eggs.mp3")
y, sr = librosa.load(path)
df_demo = preprocess_data_2(y,sr,0)
X_demo = df_demo.drop(columns="LABEL")
pd.DataFrame(model.predict(X=X_demo)).value_counts(normalize=True)

0    1.0
Name: proportion, dtype: float64

In [88]:
path = os.path.join(os.pardir, "raw_data", "Tests", "Youssef_2_Female.mp3")
y, sr = librosa.load(path)
df_demo = preprocess_data_2(y,sr,0)
X_demo = df_demo.drop(columns="LABEL")
pd.DataFrame(model.predict(X=X_demo)).value_counts(normalize=True)

0    1.0
Name: proportion, dtype: float64

In [89]:
path = os.path.join(os.pardir, "raw_data", "Tests", "Morgan Freeman_real.mp3")
y, sr = librosa.load(path)
df_demo = preprocess_data_2(y,sr,0)
X_demo = df_demo.drop(columns="LABEL")
pd.DataFrame(model.predict(X=X_demo)).value_counts(normalize=True)

  return pitch_tuning(


0    0.977011
1    0.022989
Name: proportion, dtype: float64

In [90]:
path = os.path.join(os.pardir, "raw_data", "Tests", "Morgan Freeman_fake.mp3")
y, sr = librosa.load(path)
df_demo = preprocess_data_2(y,sr,0)
X_demo = df_demo.drop(columns="LABEL")
pd.DataFrame(model.predict(X=X_demo)).value_counts(normalize=True)

0    0.603175
1    0.396825
Name: proportion, dtype: float64

# Exploration 11-12-2023