## Notebook Template to Quickly Test Things Out

In [1]:
# General imports
# import torch
import numpy as np
import os, sys
import json
from tqdm import tqdm
import pandas as pd

In [3]:
# Local imports
sys.path.insert(0, 'src')
from utils import read_json, read_lists
# from utils.model_utils import prepare_device
# # from parse_config import ConfigParser
# from data_loader import data_loaders
# import model.model as module_arch

In [8]:
# Define constants, paths
config_path = 'configs/'
timestamp = '0112_121958'
csv_path = os.path.join('saved', 'edit', 'trials', 'CINIC10_ImageNet-VGG_16', timestamp, 'results_table.csv')


In [13]:
# Load CSV as pandas dataframe
df = pd.read_csv(csv_path)
n_total = len(df)
print("CSV loaded from {}".format(csv_path))
print("{} rows".format(n_total))

CSV loaded from saved/edit/trials/CINIC10_ImageNet-VGG_16/0112_121958/results_table.csv
37 rows


In [10]:
# Round all numbers to 3 decimal places
df.round(3)

Unnamed: 0.1,Unnamed: 0,ID,Pre Accuracy,Post Accuracy,Pre Mean Precision,Post Mean Precision,Pre Mean Recall,Post Mean Recall,Pre Mean F1,Post Mean F1,...,Num of val's Neighbors Became Target (F),Num of val's Neighbors Became Target (L),Pre key-val (F),Post key-val (F),Pre key-val (L),Post key-val (L),Pre keyN-val (F),Post keyN-val (F),Pre keyN-val (L),Post keyN-val (L)
0,0,dog-train-n02114712_211/felzenszwalb_gaussian_0,0.687,0.684,0.692,0.696,0.687,0.684,0.684,0.683,...,17,20,2.034,1.59,3.038,1.967,2.677,2.381,3.896,3.085
1,1,dog-train-n02114712_211/felzenszwalb_masked_0,0.687,0.686,0.692,0.695,0.687,0.686,0.684,0.685,...,24,20,2.062,1.641,2.974,2.133,2.66,2.373,3.773,3.122
2,2,dog-train-n02114712_211/felzenszwalb_masked_1,0.687,0.672,0.692,0.695,0.687,0.672,0.684,0.674,...,0,0,3.369,2.404,5.008,3.219,3.837,3.084,5.599,4.046
3,3,dog-train-n02114712_211/quickshift_masked_0,0.687,0.687,0.692,0.694,0.687,0.687,0.684,0.685,...,21,24,1.278,1.151,1.712,1.535,1.879,1.816,2.569,2.423
4,4,dog-train-n02114712_211/slic_masked_0,0.687,0.686,0.692,0.695,0.687,0.686,0.684,0.685,...,19,18,1.778,1.377,2.49,1.813,2.401,2.142,3.341,2.877
5,5,dog-train-n02114712_211/watershed_masked_1,0.687,0.687,0.692,0.696,0.687,0.687,0.684,0.687,...,13,14,2.365,1.901,3.346,2.478,2.859,2.528,3.988,3.256
6,6,dog-train-n02110341_7544/slic_masked_2,0.687,0.668,0.692,0.691,0.687,0.668,0.684,0.666,...,34,35,4.878,2.553,5.922,2.862,4.958,2.677,6.01,3.047
7,7,dog-train-n02089232_8735/quickshift_masked_3,0.687,0.688,0.692,0.691,0.687,0.688,0.684,0.684,...,29,28,0.533,0.513,0.931,0.879,0.965,0.99,1.549,1.618
8,8,dog-train-n02089232_8735/slic_gaussian_2,0.687,0.682,0.692,0.692,0.687,0.682,0.684,0.679,...,20,11,1.504,1.436,2.2,1.917,1.726,1.747,2.359,2.284
9,9,dog-train-n02089232_8735/slic_gaussian_4,0.687,0.688,0.692,0.693,0.687,0.688,0.684,0.685,...,31,34,0.326,0.317,0.576,0.545,0.705,0.688,0.966,0.885


## Hypothesis 1a: If a gaussian noise segment is producing sucessful change, will the masked segment as well?

Result: not necessarily

In [26]:
# Count number of rows that are gaussian noise and are masked
masked_rows = df[df['ID'].str.contains('masked')]
n_masked = len(masked_rows)

gaussian_rows = df[df['ID'].str.contains('gaussian')]
n_gaussian = len(gaussian_rows)
print("{} masked modifications\n{} Gaussian modifications".format(n_masked, n_gaussian))

# For gaussian rows, are their corresponding masked segment also there?
gaussian_IDs = gaussian_rows['ID']
corresponding_masked_IDs = gaussian_IDs.replace('gaussian', 'masked', regex=True)
# print(corresponding_masked_IDs)

segments_with_both_gaussian_and_masked = list(set(corresponding_masked_IDs) & set(df['ID']))
n_both = len(segments_with_both_gaussian_and_masked)
print("{}/{} gaussian modifications have corresponding masked segment as success:".format(n_both, n_gaussian))
# print(segments_with_both_gaussian_and_masked)

27 masked modifications
10 Gaussian modifications
3/10 gaussian modifications have corresponding masked segment as success:


## Hypothesis 1b: Masked modifications will have greater changes than noise

In [49]:
# Compare mean post edit accuracy, precision, recall, and f1
mean_masked = masked_rows.mean()
mean_gaussian = gaussian_rows.mean()
std_masked = masked_rows.std()
std_gaussian = gaussian_rows.std()
print(mean_masked)

metrics = [['Post Accuracy', 'Post Mean Precision', 'Post Mean Recall', 'Post Mean F1'], 
           ['Post Target Precision', 'Post Target Recall', 'Post Target F1'],
           ['Post Orig Pred Precision', 'Post Orig Pred Recall', 'Post Orig Pred F1']]
print("{:<30} {:<20} {:<20}".format("Metric", "Masked", "Gaussian"))
for row in metrics:
    for metric in row:
        print("{:<30} {:.3f} ({:.3f}) {:<6} {:.3f}({:.3f})".format(
            metric, 
            mean_masked[metric], std_masked[metric], "",
            mean_gaussian[metric], std_gaussian[metric]))
    print("")

Unnamed: 0                                  19.185185
Pre Accuracy                                 0.687471
Post Accuracy                                0.674029
Pre Mean Precision                           0.692474
Post Mean Precision                          0.677194
Pre Mean Recall                              0.687471
Post Mean Recall                             0.674029
Pre Mean F1                                  0.683737
Post Mean F1                                 0.667776
Pre Target Precision                         0.654326
Post Target Precision                        0.652891
Pre Target Recall                            0.661466
Post Target Recall                           0.448862
Pre Target F1                                0.656662
Post Target F1                               0.523343
Pre Orig Pred Precision                      0.706188
Post Orig Pred Precision                     0.565925
Pre Orig Pred Recall                         0.427143
Post Orig Pred Recall       