## Build HTML file to visualize results

In [1]:
# General imports
import torch
import numpy as np
import os, sys
import json
from tqdm import tqdm
import pandas as pd
from airium import Airium
import re

In [2]:
# Local imports
sys.path.insert(0, 'src')
from utils import read_json, read_lists, ensure_dir
from utils.df_utils import load_and_preprocess_csv, get_sorted_idxs
from utils.html_utils import save_visualizations_separately, build_html
from utils.visualizations import bar_graph
from parse_config import ConfigParser
from data_loader import data_loaders
import model.model as module_arch

In [3]:
# Variables
results_timestamp = '0127_103716'
target_class = 'airplane'
n_select = 100
paths_timestamp = '0126_161209'
data_type = 'softmax'
# sort_columns = ['Post Target Recall']
sort_key = 'Post Mean Accuracy'
sort_key_ = sort_key.lower().replace(" ", "_")

In [14]:
# ID Regex
id_regex = '/+[a-z0-9_]*\-[a-z0-9_]*\-[a-z0-9_]*/.*/'
def get_image_id(path):
    return re.search(id_regex, path).group()[1:-1]

# Constant paths
class_list_path = os.path.join('metadata', 'cinic-10', 'class_names.txt')
# Results paths
results_dir = os.path.join('saved', 'edit', 'trials', 'CINIC10_ImageNet-VGG_16', '{}_{}'.format(target_class, n_select), results_timestamp)
csv_path = os.path.join(results_dir, 'results_table.csv')
trial_paths_path = os.path.join(results_dir, 'trial_paths.txt')

paths_dir = os.path.join('paths', 'edits', 'semantics', 
                         '{}_{}'.format(target_class, n_select), 
                         paths_timestamp)
value_image_paths_path = os.path.join(paths_dir, 'value_images_{}.txt'.format(data_type))

# HTML file directories
html_save_dir = os.path.join('html', '{}_{}'.format(target_class, n_select))
html_assets_dir = os.path.join(html_save_dir, 'assets')

html_summary_assets_dir = os.path.join(html_assets_dir, 'summary')
ensure_dir(html_summary_assets_dir)

html_individual_assets_dir = os.path.join(html_assets_dir, 'individual')
ensure_dir(html_individual_assets_dir)
# ensure_dir(html_assets_dir)

In [15]:
# Load class list
class_list = read_lists(class_list_path)
# Load CSV and paths
df = load_and_preprocess_csv(
    csv_path,
    drop_duplicates=['ID'])

value_image_paths = read_lists(value_image_paths_path)
trial_paths = read_lists(trial_paths_path)

## Sanity Checks

In [16]:
# Sanity check same number of rows
n_rows = len(df)
assert len(value_image_paths) == n_rows, "{} rows in paths; {} rows in data frame".format(len(value_image_paths), n_rows)
assert len(trial_paths) == n_rows, "{} rows in paths; {} rows in data frame".format(len(trial_paths), n_rows)

# Sanity check that each row corresponds to one another
for image_id, value_image_path, trial_path in zip(df['ID'], value_image_paths, trial_paths):
    image_id = image_id.split('/')
    for id_part in image_id:
        assert id_part in value_image_path
        assert id_part in trial_path

# Check columns in sort_columns are in dataframe
# for column in sort_columns:
assert sort_key in df.columns

### Sort paths

In [17]:
# Get sorted idxs based on sort columns
sorted_df, sorted_idxs = get_sorted_idxs(
    df=df,
    columns=[sort_key],
    increasing=False)

# Sort image paths and trial paths accordingly
sorted_value_image_paths = [value_image_paths[idx] for idx in sorted_idxs]
sorted_trial_paths = [trial_paths[idx] for idx in sorted_idxs]
sorted_IDs = [re.search(id_regex, path).group()[1:-1] for path in sorted_value_image_paths]

# Sanity check
for id_, trial_path in zip(sorted_IDs, sorted_trial_paths):
    assert id_ in trial_path


## Class Summary Visualizations

### Get graphics from neighbor analysis


In [34]:
parent_dir = os.path.dirname(trial_paths_path)
parent_dir = os.path.join(parent_dir, 'graphs', 'neighbor_analysis')

input_dir = os.path.join(parent_dir, sort_key_)
file_names = ['auc_neighbors_summary.png']


html_summary_save_dirs, html_summary_save_paths, html_summary_save_ids = save_visualizations_separately(
    input_dirs=[input_dir],
    file_names=file_names,
    output_dir=html_summary_assets_dir,
    overwrite=True)

print(html_summary_save_paths)

100%|████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 162.31it/s]

[['html/airplane_100/assets/summary/auc_neighbors_summary.png']]





## Save metrics as string

In [35]:
# Save metrics for each data point as string
sorted_df = sorted_df.round(3)
metrics = ['Accuracy', 'Recall', 'Precision', 'F1']
groups = ['Mean', 'Target', 'Orig Pred']
metric_strings = []
key = '{} {} {}'
for idx in range(n_rows):
    metric_string = ['ID: {}'.format(sorted_df['ID'].iloc[idx])]
    # accuracy_key = '{} Accuracy'
    # metric_string.append("Accuracy: {} -> {}".format(sorted_df[accuracy_key.format('Pre')].iloc[idx], sorted_df[accuracy_key.format('Post')].iloc[idx]))

    for group in groups:
        # metric_string.append("\t{}".format(group))
        
        for metric in metrics:
            
            metric_string.append("\t {} {:<15} {} -> {}".format(group, metric + ":", 
                                                             sorted_df[key.format('Pre', group, metric)].iloc[idx],
                                                             sorted_df[key.format('Post', group, metric)].iloc[idx]))

    metric_strings.append(metric_string)
assert len(metric_strings) == n_rows


### Get paths to all things we want to visualize: 

1) cumulative masking graphic
2) cumulative masking graph
3) class distribution pre/post edit

### Get graphics from segmentation process

In [36]:
# Copy files from segmentation process
file_names = [
    '{}_cumulative_modifying.png'.format(data_type),
    'target_{}_v_n_images.png'.format(data_type),
    'softmax_delta_v_n_images.png']
input_dirs = [os.path.dirname(path) for path in sorted_value_image_paths]
html_asset_save_dirs, html_asset_save_paths, html_asset_save_ids = save_visualizations_separately(
    input_dirs=input_dirs,
    file_names=file_names,
    output_dir=html_individual_assets_dir,
    overwrite=False)



100%|████████████████████████████████████████████████████████████████████| 158/158 [00:00<00:00, 519.26it/s]


### Create Bar Graphs

In [37]:
# Create class distribution bar graphs per row
columns = ['Pre Class Dist', 'Post Class Dist']
bar_graph_save_paths = []
for idx, (trial_dir, html_asset_save_dir) in enumerate(tqdm(zip(sorted_trial_paths, html_asset_save_dirs))):
    image_id = os.path.join(os.path.basename(os.path.dirname(html_asset_save_dir)),
                            os.path.basename(html_asset_save_dir))
    assert image_id in trial_dir
    
    data = []
    for column in columns:
        data.append(sorted_df.iloc[idx][column])
    data = np.stack(data, axis=0)
    
    bar_graph_save_path = os.path.join(html_asset_save_dir, 'class_distribution_bar_graph.png')
    bar_graph_save_paths.append(bar_graph_save_path)
    if os.path.isfile(bar_graph_save_path):
        continue
    bar_graph(
        data=data,
        labels=class_list,
        groups=columns,
        title='Class Distribution for {}'.format(image_id),
        xlabel_rotation=30,
        ylabel='Counts',
        save_path=bar_graph_save_path,
        show_plot=False)

158it [00:00, 1250.83it/s]


### Get nearest neighbor visualizations, and graphs for neighbor overlap

Run neighbor_analysis.ipynb

In [38]:
file_names = ['pre_post_neighbor_overlap_absolute.png',
              'pre_post_neighbor_overlap_relative.png',
              'logit_feature_neighbor_overlap_absolute.png',
              'logit_feature_neighbor_overlap_relative.png',
              'pre-edit_key_nn_visual_features.png',
              'post-edit_key_nn_visual_features.png',
              # 'pre-edit_key_nn_visual_logits.png',
              # 'post-edit_key_nn_visual_logits.png',
              'pre-edit_val_nn_visual_features.png',
              'post-edit_val_nn_visual_features.png']
              # 'pre-edit_val_nn_visual_logits.png',
              # 'post-edit_val_nn_visual_logits.png']
input_dirs = [os.path.join(trial_dir, 'models', 'knn_visualizations') for trial_dir in sorted_trial_paths]
# Copy over neighbor visualizations to html asset directories
neighbor_asset_save_dirs, neighbor_asset_save_paths, neighbor_asset_save_ids = save_visualizations_separately(
    input_dirs=input_dirs,
    file_names=file_names,
    output_dir=html_individual_assets_dir,
    overwrite=False)

print(neighbor_asset_save_paths[0])

100%|████████████████████████████████████████████████████████████████████| 158/158 [00:00<00:00, 214.35it/s]

['html/airplane_100/assets/individual/airplane-train-n02704645_17657/felzenszwalb_gaussian_softmax/models/knn_visualizations/pre_post_neighbor_overlap_absolute.png', 'html/airplane_100/assets/individual/airplane-train-n02704645_17657/felzenszwalb_gaussian_softmax/models/knn_visualizations/pre_post_neighbor_overlap_relative.png', 'html/airplane_100/assets/individual/airplane-train-n02704645_17657/felzenszwalb_gaussian_softmax/models/knn_visualizations/logit_feature_neighbor_overlap_absolute.png', 'html/airplane_100/assets/individual/airplane-train-n02704645_17657/felzenszwalb_gaussian_softmax/models/knn_visualizations/logit_feature_neighbor_overlap_relative.png', 'html/airplane_100/assets/individual/airplane-train-n02704645_17657/felzenszwalb_gaussian_softmax/models/knn_visualizations/pre-edit_key_nn_visual_features.png', 'html/airplane_100/assets/individual/airplane-train-n02704645_17657/felzenszwalb_gaussian_softmax/models/knn_visualizations/post-edit_key_nn_visual_features.png', 'htm




### Combine the paths 

In [39]:
asset_paths = [] # list[list[str]] outer list corresponds with each edit pair. Innter list corresponds with each asset

# Summary of class statistics path
for html_summary_save_path_group in html_summary_save_paths:
    asset_paths.append(html_summary_save_path_group)
    
# Edit specific paths
for vis_paths, bar_path, neighbor_paths in zip(
            html_asset_save_paths, 
            bar_graph_save_paths, 
            neighbor_asset_save_paths):
    cur_paths = []
    if type(vis_paths) == str:
        cur_paths.append(vis_paths)
    elif type(vis_paths) == list or type(vis_paths) == tuple:
        cur_paths += vis_paths
        
    if type(bar_path) == str:
        cur_paths.append(bar_path)
    elif type(bar_path) == list or type(bar_path) == tuple:
        cur_paths += bar_path
        
    if type(neighbor_paths) == str:
        cur_paths.append(neighbor_paths)
    elif type(neighbor_paths) == list or type(neighbor_paths) == tuple:
        cur_paths += neighbor_paths
    
    asset_paths.append(cur_paths)


## Create HTML file

In [40]:
def build_html(file_paths,
               asset_ids,
               html_save_path,
               texts=None,
               id_regex='/+[a-z0-9_]*\-[a-z0-9_]*\-[a-z0-9_]*/.*/'):
    '''
    Given paths to assets to embed, build HTML page

    Arg(s):
        file_paths : list[list[str]]
            paths to each asset (sorted to group assets together)
        html_save_path : str
            where the html file will be saved to
        id_regex : str
            Regular expression to extract ID

    Returns:
        html_string : str
            html as a string
    '''
    n_data = len(file_paths)
    # Create Airium object
    air = Airium()

    air('<!DOCTYPE html>')
    with air.html(lang="pl"):
        # Set HTML header
        with air.head():
            air.meta(charset="utf-8")
            air.title(_t="Cumulative Image Visualization")

        # Set HTML body
        text_idx = 0
        with air.body():
            prev_id = ""
            # Summary is always the first element
            summary_paths = file_paths[0]
            if 'summary' in summary_paths[0]:
                with air.h3():
                    air("Summary of Class Edit")
                    
                for path in summary_paths:
                    relative_asset_path = os.path.relpath(path, os.path.dirname(html_save_path))
                    air.img(src=relative_asset_path)
                    air.p("\n\n")
                # Remove first element from file_paths
                try:
                    file_paths.remove(summary_paths)
                except:
                    pass
            # Iterate through each edit's files and display them
            for group_idx, group_paths in enumerate(file_paths):
                asset_id = asset_ids[group_idx]

                with air.h3():
                    air("{}/{}. {}".format(group_idx+1, n_data, asset_id))
                if texts is not None:
                    for text in texts[group_idx]:
                        with air.p():
                            air(text)
                # prev_id = asset_id
                for asset_path in group_paths:
                    # Embed asset as image
                    relative_asset_path = os.path.relpath(asset_path, os.path.dirname(html_save_path))
                    air.img(src=relative_asset_path, height=350)
                    air.p("\n\n")
    # Turn Airium object to html string
    html_string = str(air)
    return html_string

In [41]:
html_file_name = "sort_"
# for sort_criteria in sort_columns:
html_file_name += sort_key.lower().replace(' ', '_')
html_file_name += '_'

html_file_name += 'visualization.html'
html_save_path = os.path.join(html_save_dir, html_file_name)
html_string = build_html(
    asset_paths,
    asset_ids=sorted_IDs,
    texts=metric_strings,
    html_save_path=html_save_path)           

with open(html_save_path, 'wb') as f:
    f.write(bytes(html_string, encoding='utf-8'))
print("Saved HTML file to {}".format(html_save_path))

Saved HTML file to html/airplane_100/sort_post_mean_accuracy_visualization.html


## Create HTML page for class summary

In [48]:
def build_summary_html(asset_paths,
                       html_save_path,
                       headers=None):
    if headers is not None:
        assert len(asset_paths) == len(headers)
        
    air = Airium()

    air('<!DOCTYPE html>')
    with air.html(lang="pl"):
        # Set HTML header
        with air.head():
            air.meta(charset="utf-8")
            air.title(_t="Summary Page")

        # Set HTML body
        with air.body():
            for header, paths in zip(headers, asset_paths):
                with air.h3():
                    air(header)
                for path in paths:
                    relative_asset_path = os.path.relpath(path, os.path.dirname(html_save_path))
                    air.img(src=relative_asset_path)
    html_string = str(air)
    return html_string
        

In [46]:
metrics = ['Accuracy', 'Recall', 'Precision', 'F1']
groups = ['Mean', 'Target', 'Orig Pred']
metric_key_template = "Post {} {}"

parent_dir = os.path.dirname(trial_paths_path)
parent_dir = os.path.join(parent_dir, 'graphs', 'neighbor_analysis')

# input_dir = os.path.join(parent_dir, sort_key_)
file_names = ['auc_neighbors_summary.png']
input_dirs = []
titles = []
for metric in metrics:
    for group in groups:
        metric_key = metric_key_template.format(group, metric)
        metric_key_ = metric_key.lower().replace(" ", "_")
        
        input_dirs.append(os.path.join(parent_dir, metric_key_))
        titles.append(metric_key)
        
summary_page_save_dirs, summary_page_save_paths, summary_page_save_ids = save_visualizations_separately(
    input_dirs=input_dirs,
    file_names=file_names,
    output_dir=html_summary_assets_dir,
    overwrite=False)

100%|█████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 2433.13it/s]


In [53]:
html_file_name = 'class_summary.html'
html_save_path = os.path.join(html_save_dir, html_file_name)

print(summary_page_save_paths)
html_string = build_summary_html(
    asset_paths=summary_page_save_paths,
    html_save_path=html_save_path,
    headers=titles)


with open(html_save_path, 'wb') as f:
    f.write(bytes(html_string, encoding='utf-8'))
print("Saved HTML file to {}".format(html_save_path))

[['html/airplane_100/assets/summary/post_mean_accuracy/auc_neighbors_summary.png'], ['html/airplane_100/assets/summary/post_target_accuracy/auc_neighbors_summary.png'], ['html/airplane_100/assets/summary/post_orig_pred_accuracy/auc_neighbors_summary.png'], ['html/airplane_100/assets/summary/post_mean_recall/auc_neighbors_summary.png'], ['html/airplane_100/assets/summary/post_target_recall/auc_neighbors_summary.png'], ['html/airplane_100/assets/summary/post_orig_pred_recall/auc_neighbors_summary.png'], ['html/airplane_100/assets/summary/post_mean_precision/auc_neighbors_summary.png'], ['html/airplane_100/assets/summary/post_target_precision/auc_neighbors_summary.png'], ['html/airplane_100/assets/summary/post_orig_pred_precision/auc_neighbors_summary.png'], ['html/airplane_100/assets/summary/post_mean_f1/auc_neighbors_summary.png'], ['html/airplane_100/assets/summary/post_target_f1/auc_neighbors_summary.png'], ['html/airplane_100/assets/summary/post_orig_pred_f1/auc_neighbors_summary.png