# Data Analysis

Austin Coursey and Cameron Baird

Analyzing data generated from speaker identification verification.

In [122]:
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
import os

In [123]:
os.listdir('./data/')

['adversarial_config1_mlp_dense16_mu_compress_rate_2kHz_eps_0.25_exact.csv',
 'adversarial_config1_mlp_dense16_mu_compress_rate_2kHz_eps_0.5_exact.csv',
 'cnn_1d_rate_2kHz_mu_compress_eps_0.1_approx.csv',
 'cnn_1d_rate_2kHz_mu_compress_eps_0.25_approx.csv',
 'mlp_adversarial_dense16_mu_compress_rate_2kHz_eps_0.25_approx.csv',
 'mlp_adversarial_dense16_mu_compress_rate_2kHz_eps_0.25_exact.csv',
 'mlp_adversarial_dense16_mu_compress_rate_2kHz_higher_reg_weight_eps_0.25_exact.csv',
 'mlp_adversarial_dense16_mu_compress_rate_2kHz_moreparams_eps_0.25_approx.csv',
 'mlp_adversarial_dense16_mu_compress_rate_2kHz_moreparams_eps_0.25_exact.csv',
 'mlp_dense16_mu_compress_rate_2kHz_eps_0.01_approx.csv',
 'mlp_dense16_mu_compress_rate_2kHz_eps_0.01_exact.csv',
 'mlp_dense16_mu_compress_rate_2kHz_eps_0.05_approx.csv',
 'mlp_dense16_mu_compress_rate_2kHz_eps_0.05_exact.csv',
 'mlp_dense16_mu_compress_rate_2kHz_eps_0.1_approx.csv',
 'mlp_dense16_mu_compress_rate_2kHz_eps_0.1_exact.csv',
 'mlp_dense1

In [124]:
def calculate_metrics(dataset, name):
    print(f'Metrics for {name}')
    
    robust_percentage = np.mean(dataset['robust'] == 1)
    print(f'Percentage robust: {robust_percentage}')
    
    percent_correct = np.mean(dataset[' target'] == dataset[' y_pred'])
    print(f'Percentage correct: {percent_correct}')
    
    setup_time = dataset[' time_setup'].to_numpy()
    star_time = dataset[' time_star'].to_numpy()
    verify_time = dataset[' time_verify'].to_numpy()
    
    print(f'Average time to setup: {np.mean(setup_time+star_time):.4f} + time to verify: {np.mean(verify_time):.4f} = {np.mean(setup_time+verify_time+star_time):.4f}')
    print()

#### Metrics across all data

In [125]:
data_dir = './data/'
for path in os.listdir(data_dir):
    full_path = data_dir + path
    calculate_metrics(pd.read_csv(full_path), path)

Metrics for adversarial_config1_mlp_dense16_mu_compress_rate_2kHz_eps_0.25_exact.csv
Percentage robust: 0.809
Percentage correct: 0.944
Average time to setup: 0.0705 + time to verify: 0.2534 = 0.3238

Metrics for adversarial_config1_mlp_dense16_mu_compress_rate_2kHz_eps_0.5_exact.csv
Percentage robust: 0.657
Percentage correct: 0.944
Average time to setup: 0.0744 + time to verify: 2.2693 = 2.3437

Metrics for cnn_1d_rate_2kHz_mu_compress_eps_0.1_approx.csv
Percentage robust: 0.969
Percentage correct: 0.974
Average time to setup: 0.0865 + time to verify: 8.6988 = 8.7853

Metrics for cnn_1d_rate_2kHz_mu_compress_eps_0.25_approx.csv
Percentage robust: 0.963
Percentage correct: 0.974
Average time to setup: 0.0872 + time to verify: 21.3700 = 21.4571

Metrics for mlp_adversarial_dense16_mu_compress_rate_2kHz_eps_0.25_approx.csv
Percentage robust: 0.594
Percentage correct: 0.967
Average time to setup: 0.0715 + time to verify: 0.0530 = 0.1245

Metrics for mlp_adversarial_dense16_mu_compress_ra