In [1]:
import pywt
import os
import pandas as pd
import librosa
import librosa.display
import glob 
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
import seaborn as sns
from fitter import Fitter, get_common_distributions, get_distributions
from matplotlib.ticker import FormatStrFormatter
from pydub import AudioSegment
import pickle
import collections
import re

from sklearn.preprocessing import StandardScaler
import sklearn

from utils import plot_projections

from scipy.stats import kurtosis, skew

%matplotlib widget

In [2]:
model_dir = 'models/'
model_filename = 'svm_model_log_sigma_male_female_speaker_norm.sav'

In [3]:
# loading the saved model
model_svm = pickle.load(open(model_dir + model_filename, 'rb'))

In [4]:
def extract_speaker_num(f):
    
    substr = f.split('/')[7].split('_')
    
#     print(substr)
    
    speaker_str = re.split('(\d+)', substr[2])
    
#     print(speaker_str)
    
#     speaker_str = [*substr[2]]
    
    return int(speaker_str[1])

In [5]:
extract_speaker_num('/home/hashim/PHD/audio_data/AllAudioSamples/combined_male_female_speaker/original_laser_speaker1_002.wav')

1

In [6]:
# loading combined file paths

comb_dir = '/home/hashim/PHD/audio_data/AllAudioSamples/combined_male_female_speaker/'
comb_files = list(glob.glob(os.path.join(comb_dir, '*wav')))
comb_files.sort(key=extract_speaker_num)

# comb_files = comb_files[5:]
print(comb_files)
print(len(comb_files))

['/home/hashim/PHD/audio_data/AllAudioSamples/combined_male_female_speaker/original_laser_speaker1_002.wav', '/home/hashim/PHD/audio_data/AllAudioSamples/combined_male_female_speaker/original_laser_speaker1_001.wav', '/home/hashim/PHD/audio_data/AllAudioSamples/combined_male_female_speaker/original_laser_speaker1_003.wav', '/home/hashim/PHD/audio_data/AllAudioSamples/combined_male_female_speaker/original_laser_speaker1_004.wav', '/home/hashim/PHD/audio_data/AllAudioSamples/combined_male_female_speaker/original_laser_speaker1_005.wav', '/home/hashim/PHD/audio_data/AllAudioSamples/combined_male_female_speaker/original_laser_speaker2_004.wav', '/home/hashim/PHD/audio_data/AllAudioSamples/combined_male_female_speaker/original_laser_speaker2_002.wav', '/home/hashim/PHD/audio_data/AllAudioSamples/combined_male_female_speaker/original_laser_speaker2_005.wav', '/home/hashim/PHD/audio_data/AllAudioSamples/combined_male_female_speaker/original_laser_speaker2_003.wav', '/home/hashim/PHD/audio_dat

In [7]:
db1 = pywt.Wavelet('db1')

## Frame by Frame Analysis


In [8]:
def audiosegment_to_librosawav(audiosegment):    
    channel_sounds = audiosegment.split_to_mono()
    samples = [s.get_array_of_samples() for s in channel_sounds]

    fp_arr = np.array(samples).T.astype(np.float32)
    fp_arr /= np.iinfo(samples[0].typecode).max
    fp_arr = fp_arr.reshape(-1)

    return fp_arr

In [9]:
comb_true_labels = []

combf_dict = collections.defaultdict(dict)

for i, combf in enumerate(comb_files):
    
    audio_data, sr = librosa.load(combf, res_type='kaiser_fast')
    audio_data = librosa.util.normalize(audio_data)
    
    frame_len, hop_len = int(sr), int(sr/2)
    
    frames = librosa.util.frame(audio_data, frame_length=frame_len, hop_length=hop_len, axis=0)
    
    true_labels = []
    
    sigma_feat_slice = []
    skew_feat_slice = []
    kurt_feat_slice = []
    
#     if len(frames) <=4:
#         continue;
    
#     print(len(frames))
    for j, frame in enumerate(frames):
        
#         print("Frame {}: {}".format(j, frame))
        
        coeffs = pywt.wavedec(frame, db1, mode='constant', level=5)
        
        sigma_feat = []
        skew_feat = []
        kurt_feat = []

        for cf in coeffs:

            sigma_feat.append(np.log(np.var(cf)))
            skew_feat.append(skew(cf))
            kurt_feat.append(kurtosis(cf))

        sigma_feat_slice.append(sigma_feat)
        skew_feat_slice.append(skew_feat)
        kurt_feat_slice.append(kurt_feat) 
        
#         if j <= 1:
#             true_labels.append('original')
#         else:
#             true_labels.append('laser')
            
        if j <= 2:
            true_labels.append('original')
        else:
            true_labels.append('laser')
            
#         t = frame_len/(j+1)
#         print(t)
            
        print(j)

    sigma_feat_slice = np.squeeze(np.array(sigma_feat_slice))
    skew_feat_slice = np.squeeze(np.array(skew_feat_slice))
    kurt_feat_slice = np.squeeze(np.array(kurt_feat_slice))
    
#     print(sigma_feat_slice.shape)
#     print(skew_feat_slice.shape)
#     print(kurt_feat_slice.shape)
    
    print(true_labels)
    
    combf_dict[i]['feat'] = np.concatenate((sigma_feat_slice, skew_feat_slice, kurt_feat_slice), axis=1)
    combf_dict[i]['labels'] = true_labels

0
1
2
3
4
5
['original', 'original', 'original', 'laser', 'laser', 'laser']
0
1
2
3
4
5
['original', 'original', 'original', 'laser', 'laser', 'laser']
0
1
2
3
4
5
6
7
['original', 'original', 'original', 'laser', 'laser', 'laser', 'laser', 'laser']
0
1
2
3
4
5
6
7
['original', 'original', 'original', 'laser', 'laser', 'laser', 'laser', 'laser']
0
1
2
3
4
5
6
['original', 'original', 'original', 'laser', 'laser', 'laser', 'laser']
0
1
2
3
4
5
['original', 'original', 'original', 'laser', 'laser', 'laser']
0
1
2
3
4
5
6
['original', 'original', 'original', 'laser', 'laser', 'laser', 'laser']
0
1
2
3
4
5
['original', 'original', 'original', 'laser', 'laser', 'laser']
0
1
2
3
4
5
6
['original', 'original', 'original', 'laser', 'laser', 'laser', 'laser']
0
1
2
3
4
5
6
['original', 'original', 'original', 'laser', 'laser', 'laser', 'laser']
0
1
2
3
4
5
6
['original', 'original', 'original', 'laser', 'laser', 'laser', 'laser']
0
1
2
3
4
5
6
7
['original', 'original', 'original', 'laser', 'la

In [10]:
combf_dict

defaultdict(dict,
            {0: {'feat': array([[-1.01776075e+01, -1.02681465e+01, -1.19608011e+01,
                      -1.29557848e+01, -1.41942139e+01, -1.59331656e+01,
                       1.61513948e+00,  1.43463218e+00,  8.90031457e-01,
                       1.71219885e+00,  2.31194019e+00,  2.35191727e+00,
                       2.04401056e+01,  3.72041841e+01,  3.26837079e+01,
                       7.10267124e+01,  7.80927846e+01,  8.36804825e+01],
                     [-8.73505783e+00, -8.77716541e+00, -1.02725792e+01,
                      -1.17102728e+01, -1.32823772e+01, -1.51135654e+01,
                      -8.11439157e-02,  2.42549852e-01,  3.53632718e-01,
                       7.50702977e-01,  1.17608535e+00,  9.01857913e-01,
                       3.44056716e+00,  4.07901202e+00,  5.74732145e+00,
                       1.12716439e+01,  2.18034011e+01,  1.68945133e+01],
                     [-8.91828060e+00, -8.99397659e+00, -1.04451485e+01,
                    

In [11]:
combf_keys = combf_dict.keys()
combf_keys

dict_keys([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94])

## Prediction Using already trained model

In [12]:
import sklearn

from sklearn.svm import LinearSVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import plot_confusion_matrix

In [13]:
pred_label_dict = dict()

accuracy_dict = dict()

true_labels_ls = []
pred_labels_ls = []

for key in combf_dict:
    
    X_comb = combf_dict[key]['feat']
    comb_labels = combf_dict[key]['labels']
    
#     scaler = StandardScaler().fit(X_comb)
#     X_comb = scaler.transform(X_comb)
    
    true_labels_ls.extend(comb_labels)
    
    pred_labels = model_svm.predict(X_comb)
    
    pred_labels_ls.extend(pred_labels)
    
    accuracy_dict[key] = accuracy_score(comb_labels, pred_labels)

frame_by_frame_accuracy = accuracy_score(true_labels_ls, pred_labels_ls)

In [14]:
frame_by_frame_accuracy

0.6711798839458414

In [15]:
avg_accuracy = sum(accuracy_dict.values())/len(accuracy_dict)
avg_accuracy

0.7079448621553887

In [16]:
true_labels_ls

['original',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'original',
 'laser',
 'laser',
 'l

In [17]:
pred_labels_ls

['original',
 'original',
 'original',
 'laser',
 'laser',
 'original',
 'laser',
 'original',
 'original',
 'original',
 'original',
 'original',
 'laser',
 'original',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'orig

In [18]:
cm = confusion_matrix(true_labels_ls, pred_labels_ls)

In [19]:
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=model_svm.classes_)
disp.plot()

plt.savefig("ConfusionMatrix_frame_by_frame_with_border_frames_male_female_speaker_norm", dpi=300)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [20]:
# statistics from the smaller window is unreliable 

# follow hani fareed work

# look at the corner cases in terms of features. Whose audio features are close in terms of variance, skew and kurtosis

# Without Borders

In [23]:
comb_true_labels = []

combf_dict = collections.defaultdict(dict)

for i, combf in enumerate(comb_files):
    
    audio_data, sr = librosa.load(combf, res_type='kaiser_fast')
    audio_data = librosa.util.normalize(audio_data)
    
    frame_len, hop_len = int(sr), int(sr/2)
    
    frames = librosa.util.frame(audio_data, frame_length=frame_len, hop_length=hop_len, axis=0)
    
    true_labels = []
    
    sigma_feat_slice = []
    skew_feat_slice = []
    kurt_feat_slice = []
    
    if len(frames) <=2:
        continue;
    
    for j, frame in enumerate(frames):
        
        if (j)%4 == 0:
            continue;
        
#         print("Frame {}: {}".format(j, frame))
        
        coeffs = pywt.wavedec(frame, db1, mode='constant', level=5)
        
        sigma_feat = []
        skew_feat = []
        kurt_feat = []

        for cf in coeffs:

            sigma_feat.append(np.log(np.var(cf)))
            skew_feat.append(skew(cf))
            kurt_feat.append(kurtosis(cf))

        sigma_feat_slice.append(sigma_feat)
        skew_feat_slice.append(skew_feat)
        kurt_feat_slice.append(kurt_feat) 
        
#         if j <= 10:
#             true_labels.append('original')
#         else:
#             true_labels.append('laser')
            
        if j <= 2:
            true_labels.append('original')
        else:
            true_labels.append('laser')
            
        print(j)

#     sigma_feat_slice = np.array(sigma_feat_slice)
#     skew_feat_slice = np.array(skew_feat_slice)
#     kurt_feat_slice = np.array(kurt_feat_slice)
    
    sigma_feat_slice = np.squeeze(np.array(sigma_feat_slice))
    skew_feat_slice = np.squeeze(np.array(skew_feat_slice))
    kurt_feat_slice = np.squeeze(np.array(kurt_feat_slice))
    
    print(sigma_feat_slice.shape)
#     print(skew_feat_slice.shape)
#     print(kurt_feat_slice.shape)
    
    print(true_labels)

    combf_dict[i]['feat'] = np.concatenate((sigma_feat_slice, skew_feat_slice, kurt_feat_slice), axis=1)
    combf_dict[i]['labels'] = true_labels

1
2
3
5
(4, 6)
['original', 'original', 'laser', 'laser']
1
2
3
5
(4, 6)
['original', 'original', 'laser', 'laser']
1
2
3
5
6
7
(6, 6)
['original', 'original', 'laser', 'laser', 'laser', 'laser']
1
2
3
5
6
7
(6, 6)
['original', 'original', 'laser', 'laser', 'laser', 'laser']
1
2
3
5
6
(5, 6)
['original', 'original', 'laser', 'laser', 'laser']
1
2
3
5
(4, 6)
['original', 'original', 'laser', 'laser']
1
2
3
5
6
(5, 6)
['original', 'original', 'laser', 'laser', 'laser']
1
2
3
5
(4, 6)
['original', 'original', 'laser', 'laser']
1
2
3
5
6
(5, 6)
['original', 'original', 'laser', 'laser', 'laser']
1
2
3
5
6
(5, 6)
['original', 'original', 'laser', 'laser', 'laser']
1
2
3
5
6
(5, 6)
['original', 'original', 'laser', 'laser', 'laser']
1
2
3
5
6
7
(6, 6)
['original', 'original', 'laser', 'laser', 'laser', 'laser']
1
2
3
5
6
(5, 6)
['original', 'original', 'laser', 'laser', 'laser']
1
2
3
5
6
7
(6, 6)
['original', 'original', 'laser', 'laser', 'laser', 'laser']
1
2
3
5
6
7
(6, 6)
['original', 'o

In [24]:
combf_dict

defaultdict(dict,
            {0: {'feat': array([[ -8.73505783,  -8.77716541, -10.27257919, -11.71027279,
                      -13.28237724, -15.11356544,  -0.08114392,   0.24254985,
                        0.35363272,   0.75070298,   1.17608535,   0.90185791,
                        3.44056716,   4.07901202,   5.74732145,  11.27164393,
                       21.80340109,  16.89451328],
                     [ -8.9182806 ,  -8.99397659, -10.44514847, -12.03525066,
                      -13.73815632, -15.56807423,   0.20434321,   0.21621472,
                        0.37509963,   0.37652299,   0.66707307,   0.48693657,
                        3.96475959,   4.66951025,   7.76543659,  11.44732815,
                       11.58598089,  14.55432849],
                     [-10.85045719, -11.04220581, -12.28314495, -13.66258717,
                      -14.5471983 , -15.78322697,  -0.09881411,   0.03642113,
                        0.978782  ,   1.11440504,   1.15822208,   0.32768598,
           

In [25]:
pred_label_dict = dict()

accuracy_dict = dict()

true_labels_ls = []
pred_labels_ls = []

for key in combf_dict:
    
    X_comb = combf_dict[key]['feat']
    comb_labels = combf_dict[key]['labels']
    
#     scaler = StandardScaler().fit(X_comb)
#     X_comb = scaler.transform(X_comb)
    
    true_labels_ls.extend(comb_labels)
    
    pred_labels = model_svm.predict(X_comb)
    
    pred_labels_ls.extend(pred_labels)
    
    accuracy_dict[key] = accuracy_score(comb_labels, pred_labels)

frame_by_frame_accuracy = accuracy_score(true_labels_ls, pred_labels_ls)

In [26]:
frame_by_frame_accuracy

0.6991643454038997

In [27]:
avg_accuracy = sum(accuracy_dict.values())/len(accuracy_dict)
avg_accuracy

0.7132246376811592

In [28]:
true_labels_ls

['original',
 'original',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 '

In [29]:
pred_labels_ls

['original',
 'original',
 'laser',
 'original',
 'original',
 'original',
 'original',
 'original',
 'original',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'original',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'laser',
 'original',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'original',
 'laser',
 'laser',
 'laser',
 'original',
 'original',
 'original',
 'laser',
 'original',
 'laser',
 'original',
 'original',
 'laser',
 'original',
 'origin

In [30]:
cm = confusion_matrix(true_labels_ls, pred_labels_ls)

In [31]:
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=model_svm.classes_)
disp.plot()

plt.savefig("ConfusionMatrix_frame_by_frame_without_border_frames_male_female_speaker_norm", dpi=300)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Plotting the features of each audio

In [None]:
%matplotlib inline

fig, axes = plt.subplots(nrows=len(combf_dict.keys()), ncols=1, figsize=(15, 120))

for key, ax in zip(combf_dict, axes.flatten()):
    
    feat = combf_dict[key]['feat']
    comb_labels = combf_dict[key]['labels']
    
    
    laser_ids = [i for i,x in enumerate(comb_labels) if x == 'laser']
    
    orig_ids = [i for i,x in enumerate(comb_labels) if x == 'original']
    
    laser_feat = feat[laser_ids]
    orig_feat = feat[orig_ids]

    ax.plot(laser_feat.T, 'bo', label='Laser')
    ax.plot(orig_feat.T, 'r+', label = 'Original')

    N = int(len(laser_feat.T)/3)

    x_c = 0
    for i in range(N):
        ri = float(i)/float(N)
        gi = 1.-ri
        bi = 0.

        ax.axvspan(x_c, x_c + 3, color=(ri,gi,bi), alpha=0.5)
        ax.text(x_c, 10, 'coeff{}'.format(i), fontsize=10)

        x_c += 3

#     for idx, f in enumerate(feat):
        
#         lb = 'original'
#         if idx >= 3:
#             lb = 'laser'

#         ax.scatter(np.arange(0, len(f)), f, label = lb)

    

    
#     ax.scatter(orig_feat, orig_ids, marker='o', label="Original")
#     ax.scatter(laser_feat, laser_ids, marker='x', label="Laser")
    
    handles, labels = ax.get_legend_handles_labels()
    fig.legend(handles, labels, loc='upper center', prop={'size': 15})

In [None]:
# Spectrogram approach can be used to detect the laser injection attack.

# we can apply image based approaches on the spectrogram to detect this attack. 

# However, if your regular aduio has noise in the lower frequency bands, then the image analysis wont work. 



In [None]:
# put this framework in the paper. 

# Tt > 2tf > ta ()

In [None]:
comb_labels = combf_dict[2]['labels']
feat = combf_dict[2]['feat']
orig_ids = [i for i,x in enumerate(comb_labels) if x == 'original']
laser_ids = [i for i,x in enumerate(comb_labels) if x == 'laser']
print(laser_ids)
laser_feat = feat[laser_ids]
orig_feat = feat[orig_ids]
laser_feat

In [None]:
orig_feat

In [None]:
%matplotlib inline

plt.plot(laser_feat.T, 'bo', label='laser')
plt.plot(orig_feat.T, 'r+', label = 'original')

N = int(len(laser_feat.T)/3)

x_c = 0
for i in range(N):
    ri = float(i)/float(N)
    gi = 1.-ri
    bi = 0.

    plt.axvspan(x_c, x_c + 3, color=(ri,gi,bi), alpha=0.5)
    
    x_c += 3
    
# plt.axvspan(0, 3, facecolor='b', alpha=0.5)
# plt.axvspan(3, 6, facecolor='r', alpha=0.5)
# plt.axvspan(6, 9, facecolor='g', alpha=0.5)
# plt.axvspan(9, 12, facecolor='y', alpha=0.5)
# plt.axvspan(9, 12, facecolor='y', alpha=0.5)
# plt.axvspan(12, 15, facecolor='k', alpha=0.5)
# plt.axvspan(15, 18, facecolor='r', alpha=0.5)
    
plt.show()

In [None]:
len(laser_feat.T)

In [None]:
np.arange(0, len(laser_feat.T))

In [None]:
laser_feat.T