# Data Analysis

Austin Coursey and Cameron Baird

Analyzing data generated from speaker identification verification.

In [2]:
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
import os

In [4]:
os.listdir('./data/')

['mlp_adversarial_dense16_mu_compress_rate_2kHz_eps_0.25_approx.csv',
 'mlp_adversarial_dense16_mu_compress_rate_2kHz_eps_0.25_exact.csv',
 'mlp_dense16_mu_compress_rate_2kHz_eps_0.01_approx.csv',
 'mlp_dense16_mu_compress_rate_2kHz_eps_0.01_exact.csv',
 'mlp_dense16_mu_compress_rate_2kHz_eps_0.05_approx.csv',
 'mlp_dense16_mu_compress_rate_2kHz_eps_0.05_exact.csv',
 'mlp_dense16_mu_compress_rate_2kHz_eps_0.1_approx.csv',
 'mlp_dense16_mu_compress_rate_2kHz_eps_0.1_exact.csv',
 'mlp_dense16_mu_compress_rate_2kHz_eps_0.25_approx.csv']

In [39]:
def calculate_metrics(dataset, name):
    print(f'Metrics for {name}')
    
    robust_percentage = np.mean(dataset['robust'] == 1)
    print(f'Percentage robust: {robust_percentage}')
    
    percent_correct = np.mean(dataset[' target'] == dataset[' y_pred'])
    print(f'Percentage correct: {percent_correct}')
    
    setup_time = dataset[' time_setup'].to_numpy()
    star_time = dataset[' time_star'].to_numpy()
    verify_time = dataset[' time_verify'].to_numpy()
    
    print(f'Average time to setup: {np.mean(setup_time+star_time):.4f} + time to verify: {np.mean(verify_time+star_time):.4f} = {np.mean(setup_time+verify_time+star_time):.4f}')
    print()

#### Metrics across all data

In [40]:
data_dir = './data/'
for path in os.listdir(data_dir):
    full_path = data_dir + path
    calculate_metrics(pd.read_csv(full_path), path)

Metrics for mlp_adversarial_dense16_mu_compress_rate_2kHz_eps_0.25_approx.csv
Percentage robust: 0.594
Percentage correct: 0.967
Average time to setup: 0.0715 + time to verify: 0.1203 = 0.1245

Metrics for mlp_adversarial_dense16_mu_compress_rate_2kHz_eps_0.25_exact.csv
Percentage robust: 0.663
Percentage correct: 0.967
Average time to setup: 0.0677 + time to verify: 0.7444 = 0.7486

Metrics for mlp_dense16_mu_compress_rate_2kHz_eps_0.01_approx.csv
Percentage robust: 0.967
Percentage correct: 0.967
Average time to setup: 0.0720 + time to verify: 0.0882 = 0.0926

Metrics for mlp_dense16_mu_compress_rate_2kHz_eps_0.01_exact.csv
Percentage robust: 0.967
Percentage correct: 0.967
Average time to setup: 0.0757 + time to verify: 0.0940 = 0.0985

Metrics for mlp_dense16_mu_compress_rate_2kHz_eps_0.05_approx.csv
Percentage robust: 0.966
Percentage correct: 0.967
Average time to setup: 0.0729 + time to verify: 0.0928 = 0.0972

Metrics for mlp_dense16_mu_compress_rate_2kHz_eps_0.05_exact.csv
Per

In [17]:
eps25_data = pd.read_csv('./data/mlp_dense16_mu_compress_rate_2kHz_eps_0.25_approx.csv')
eps25_data.head()

Unnamed: 0,robust,target,y_pred,time_setup,time_star,time_verify,line_number,file_path,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10
0,1,1,1,0.006989,0.086177,0.018026,1,C:\Users\camer\Documents\data\LibriSpeech\tra...,70.129128,-69.808289,64.851021,16.713888,-27.279943,29.382544,-3.693422,-2.574194,-31.242453,44.57283
1,1,1,1,0.00417,0.062445,0.020232,2,C:\Users\camer\Documents\data\LibriSpeech\tra...,112.095795,-111.525642,107.164124,24.732317,-68.256996,64.627121,-8.127777,11.603613,-57.639313,63.890179
2,1,1,1,0.004294,0.06242,0.020932,3,C:\Users\camer\Documents\data\LibriSpeech\tra...,100.729118,-107.588486,91.740532,18.462566,-48.270275,52.478962,7.754155,4.013506,-54.675766,50.364292
3,1,1,1,0.005041,0.068666,0.019911,4,C:\Users\camer\Documents\data\LibriSpeech\tra...,140.188797,-145.470551,132.650131,26.527159,-91.247337,88.122353,0.663629,19.833471,-79.628799,69.191956
4,1,1,1,0.005302,0.068312,0.025691,5,C:\Users\camer\Documents\data\LibriSpeech\tra...,111.612823,-126.881332,98.772652,15.481234,-53.036541,61.761936,23.127144,6.16456,-67.726311,44.515308


In [29]:
calculate_metrics(eps25_data)

Percentage robust: 0.624
Average time to setup: 0.0718 + time to verify: 0.1174 = 0.1216


### $\epsilon=0.25$ MLP Approximate Adversarial Robustness Training

In [30]:
eps25_adv_data = pd.read_csv('./data/mlp_adversarial_dense16_mu_compress_rate_2kHz_eps_0.25_approx.csv')
calculate_metrics(eps25_adv_data)

Percentage robust: 0.594
Average time to setup: 0.0715 + time to verify: 0.1203 = 0.1245
