## Provide list of paths for edits and run trials for all 10 classes

In [11]:
# General imports
import torch
import numpy as np
import os, sys
import json
from tqdm import tqdm
from datetime import datetime
import pandas as pd

In [12]:
# Local imports
sys.path.insert(0, 'src')
from utils import read_json, read_lists, informal_log, list_to_dict, write_lists, write_json, ensure_files
from utils.model_utils import prepare_device
from parse_config import ConfigParser
# from data_loader import data_loaders
import datasets.datasets as module_data
import model.model as module_arch
from utils.knn_utils import load_and_analyze_knn
from utils.results_to_csv import store_csv
from edit_knn import main as edit

In [13]:
# Define constants, paths
config_path = 'configs/copies/cinic10_imagenet_segmentation_edit_trials.json'
class_list_path = 'metadata/cinic-10/class_names.txt'

analyze_in_edit = True
sort_type = 'softmax'

In [14]:
# Load config file
config_dict = read_json(config_path)
# Load class list and obtain target class idx
class_list = read_lists(class_list_path)
class_idx_dict = list_to_dict(class_list)

n_select = 100

# Set K
K = config_dict['editor']['K']

device, device_ids = prepare_device(config_dict['n_gpu'])

In [15]:
# Load datasets
data_loader_args = dict(config_dict["data_loader"]["args"])
dataset_args = dict(config_dict["dataset_args"])

# Create validation data loader
val_image_paths = read_lists(config_dict['dataset_paths']['valid_images'])
val_labels = read_lists(config_dict['dataset_paths']['valid_labels'])
val_paths_data_loader = torch.utils.data.DataLoader(
    module_data.CINIC10Dataset(
        data_dir="",
        image_paths=val_image_paths,
        labels=val_labels,
        return_paths=True,
        **dataset_args
    ),
    **data_loader_args
)

# Create data loader for covariance matrix
covariance_image_paths = read_lists(config_dict['covariance_dataset']['images'])
covariance_labels = read_lists(config_dict['covariance_dataset']['labels'])

covariance_data_loader = torch.utils.data.DataLoader(
    module_data.CINIC10Dataset(
        data_dir="",
        image_paths=covariance_image_paths,
        labels=covariance_labels,
        **dataset_args
    ),
    **data_loader_args
)

In [16]:
# Obtain timestamp
paths_timestamp = '0126_161209'
timestamp = datetime.now().strftime(r'%m%d_%H%M%S')
# timestamp = '0120_155829'

In [1]:
for target_class_idx, target_class_name in enumerate(class_list):
    if target_class_idx == 0: 
        continue
    # Create save directories and logging paths
    save_root = config_dict['trainer']['save_dir']
    save_trials_path = os.path.join(save_root, config_dict['name'], '{}_{}'.format(target_class_name, n_select), timestamp, 'trial_paths.txt')
    progress_report_path = os.path.join(save_root, config_dict['name'], '{}_{}'.format(target_class_name, n_select), timestamp, 'progress_report.txt')
    informal_log("Current target class: {}".format(target_class_name), progress_report_path)
    
    
    if os.path.exists(save_trials_path):
        print("Path {} already exists. Overwriting.".format(save_trials_path))
    else:
        if os.path.exists(progress_report_path):
            os.remove(progress_report_path)
        print("Printing progress reports to {}".format(progress_report_path))
        informal_log("Saving path to directories for each trial to {}".format(save_trials_path), progress_report_path)
    
    # Obtain paths for keys and values
    paths_dir = os.path.join('paths', 'edits', 'semantics', '{}_{}'.format(target_class_name, n_select), paths_timestamp)
    key_image_paths_path = os.path.join(paths_dir, 'key_images_{}.txt'.format(sort_type))
    key_image_paths = read_lists(key_image_paths_path)

    value_image_paths_path = os.path.join(paths_dir, 'value_images_{}.txt'.format(sort_type))
    value_image_paths = read_lists(value_image_paths_path)
    n_trials = len(value_image_paths)
    assert len(key_image_paths) == n_trials

    non_existent_key_paths = ensure_files(key_image_paths)
    non_existent_value_paths = ensure_files(value_image_paths)
    
    if len(non_existent_key_paths) > 0:
        raise ValueError("Following paths are non existent: {}".format(non_existent_key_paths))

    if len(non_existent_value_paths) > 0:
        raise ValueError("Following paths are non existent: {}".format(non_existent_value_paths))
        
    informal_log("Key image paths stored at {}".format(key_image_paths_path), progress_report_path)
    informal_log("Value image paths stored at {}".format(value_image_paths_path), progress_report_path)
    
    # Run edit for each key and value pair
    for idx, (key_path, value_path) in enumerate(zip(key_image_paths, value_image_paths)):
        split = os.path.basename(os.path.dirname(os.path.dirname(key_path)))
        class_name = os.path.basename(os.path.dirname(key_path))
        file_name = os.path.basename(key_path).split(".")[0]
        key_image_id = "{}-{}-{}".format(class_name, split, file_name)
        # Print Progress
        informal_log("({}) Starting Trial {}/{}...".format(datetime.now().strftime(r'%m%d_%H%M%S'), idx + 1, n_trials), progress_report_path)

        # Create run id 
        value_image_id = os.path.splitext(os.path.basename(value_path))[0]
        run_id = os.path.join('{}_{}'.format(target_class_name, n_select), timestamp, 'results', key_image_id, value_image_id)
        informal_log("Current run_id: {}".format(run_id), progress_report_path)

        # Read config file as json and make updates to key and value paths
        config_dict = read_json(config_path)
        config_dict['editor'].update({
            'key_paths_file': key_path,
            'value_paths_file': value_path
        })

        # Create config object
        config = ConfigParser(config_dict, run_id=run_id)

        # Log the current trial path
        informal_log(os.path.dirname(config.save_dir), save_trials_path)

        informal_log("Calling edit()...", progress_report_path)

        edit(
            config=config,
            val_paths_data_loader=val_paths_data_loader,
            covariance_data_loader=covariance_data_loader,
            do_analyze_knn=analyze_in_edit)

        # Print progress
        informal_log("Finished trial {}/{}. Results saved to {}".format(idx + 1, n_trials, os.path.dirname(config.save_dir)),
                    progress_report_path)



NameError: name 'class_list' is not defined

In [None]:
# Define variables
# target_class_name = 'airplane'
# n_select = 100
# paths_dir = os.path.join('paths', 'edits', 'semantics', '{}_{}'.format(target_class_name, n_select), paths_timestamp)


In [None]:
# key_image_paths_path = os.path.join(paths_dir, 'key_images_{}.txt'.format(sort_type))
# key_image_paths = read_lists(key_image_paths_path)

# value_image_paths_path = os.path.join(paths_dir, 'value_images_{}.txt'.format(sort_type))
# value_image_paths = read_lists(value_image_paths_path)
# n_trials = len(value_image_paths)
# assert len(key_image_paths) == n_trials

# print("{} edit image pairs".format(n_trials))
# print("First key image path: {}".format(key_image_paths[0]))
# print("First value image path: {}".format(value_image_paths[0]))

### Create log and save paths

In [None]:
# # create log path to store the paths to each trial
# save_root = config_dict['trainer']['save_dir']
# save_trials_path = os.path.join(save_root, config_dict['name'], timestamp, 'trial_paths.txt')
# progress_report_path = os.path.join(save_root, config_dict['name'], timestamp, 'progress_report.txt')
# if os.path.exists(save_trials_path):
#     # os.remove(save_trials_path)
#     print("Path {} already exists. Aborting.".format(save_trials_path))
# else:
#     # progress_report_path = os.path.join(save_root, config_dict['name'], timestamp, 'progress_report.txt')
#     if os.path.exists(progress_report_path):
#         os.remove(progress_report_path)
#     print("Saving path to directories for each trial to {}".format(save_trials_path))
#     print("Printing progress reports to {}".format(progress_report_path))

### Ensure all paths for keys and values exist

In [None]:
# non_existent_key_paths = []
# non_existent_value_paths = []
# for key_path, value_path in zip(key_image_paths, value_image_paths):
#     if not os.path.exists(key_path):
#         non_existent_key_paths.append(key_path)
#     if not os.path.exists(value_path):
#         non_existent_value_paths.append(value_path)

# if len(non_existent_key_paths) > 0:
#     raise ValueError("Following paths are non existent: {}".format(non_existent_key_paths))
    
# if len(non_existent_value_paths) > 0:
#     raise ValueError("Following paths are non existent: {}".format(non_existent_value_paths))
    

## Log where key and val image paths are from

In [None]:
# informal_log("Key image paths stored at {}".format(key_image_paths_path), progress_report_path)
# informal_log("Value image paths stored at {}".format(value_image_paths_path), progress_report_path)

## Run edit for each modified image

In [None]:
# for idx, (key_path, value_path) in enumerate(zip(key_image_paths, value_image_paths)):
#     split = os.path.basename(os.path.dirname(os.path.dirname(key_path)))
#     class_name = os.path.basename(os.path.dirname(key_path))
#     file_name = os.path.basename(key_path).split(".")[0]
#     key_image_id = "{}-{}-{}".format(class_name, split, file_name)
#     # Print Progress
#     informal_log("({}) Starting Trial {}/{}...".format(datetime.now().strftime(r'%m%d_%H%M%S'), idx + 1, n_trials), progress_report_path)
    
#     # Create run id 
#     value_image_id = os.path.splitext(os.path.basename(value_path))[0]
#     run_id = os.path.join('{}_{}'.format(target_class_name, n_select), timestamp, 'results', key_image_id, value_image_id)
#     informal_log("Current run_id: {}".format(run_id), progress_report_path)
    
#     # Read config file as json and make updates to key and value paths
#     config_dict = read_json(config_path)
#     config_dict['editor'].update({
#         'key_paths_file': key_path,
#         'value_paths_file': value_path
#     })
    
#     # Create config object
#     config = ConfigParser(config_dict, run_id=run_id)
    
#     # Log the current trial path
#     informal_log(os.path.dirname(config.save_dir), save_trials_path)
    
#     informal_log("Calling edit()...", progress_report_path)
    
#     edit(
#         config=config,
#         val_paths_data_loader=val_paths_data_loader,
#         covariance_data_loader=covariance_data_loader,
#         do_analyze_knn=analyze_in_edit)
    
#     # Print progress
#     informal_log("Finished trial {}/{}. Results saved to {}".format(idx + 1, n_trials, os.path.dirname(config.save_dir)),
#                 progress_report_path)


## FIN

In [None]:
save_trials_path = 'saved/edit/trials/CINIC10_ImageNet-VGG_16/0125_114341/trial_paths.txt'
trial_dirs = read_lists(save_trials_path)
knn_analysis_filename = 'knn_analysis_results.pth'

In [None]:
try:
    if not os.path.exists(save_trials_path):
        print("Path {} does not exist".format(save_trials_path))
    else:
        print("Obtaining trial paths from {}".format(save_trials_path))
except:
    print("Need to define save_trials_path.")



## Analyze KNN

In [None]:
## Process KNN results for each
n_trials = len(trial_dirs)
n_log = n_trials // 10 + 1  # log every 10%
progress_report_path = os.path.join(os.path.dirname(save_trials_path), 'progress_report_analysis.txt')

informal_log("Starting KNN analysis...", progress_report_path)
for trial_idx, trial_dir in tqdm(enumerate(trial_dirs)):
    # if trial_idx % n_log == 0:
    informal_log("Processing {}/{} trials. Currently processing {}".format(
        trial_idx+1, n_trials, os.path.basename(trial_dir)), progress_report_path)
                     
    results_save_dir = os.path.join(trial_dir, 'models')
    load_and_analyze_knn(
        restore_dir=results_save_dir,
        pre_edit_knn_path=os.path.join(results_save_dir, 'pre_edit_{}-nn.pth'.format(K)),
        post_edit_knn_path=os.path.join(results_save_dir, 'post_edit_{}-nn.pth'.format(K)),
        knn_analysis_filename=knn_analysis_filename,
        target_class_idx=target_class_idx,
        class_list=class_list,
        progress_report_path=progress_report_path,
        save_images=False,
        save_plots=True)
    


## Convert to CSV for all trials

In [None]:
csv_save_path = os.path.join(os.path.dirname(save_trials_path), 'results_table.csv')
store_csv(
    trial_dirs=trial_dirs,
    class_list=class_list,
    save_path=csv_save_path)
