# Clean and process the dataframe from survey results with:

* Merge all human survey dataframes into 1
* Calculate soft labels and predictions for humans
* Add human entropy and t2c
* Add model and explainer soft labels to df
* Add alignment of all 3 pairs to df
* Add top and bottom confidence values for all three agents


In [12]:
from collections import OrderedDict
# import matplotlib.pyplot as plt
import os, sys
import pandas as pd
import numpy as np
import json
from tqdm import tqdm
import torch
from scipy import stats

sys.path.insert(0, 'src')
from utils.utils import read_lists, load_image, ensure_dir
from utils.df_utils import convert_string_columns


In [7]:
RESULTS_DIR = os.path.join('saved', 'ADE20K', 'survey_results', 'ADE20K_soft_labels')
MEASUREMENT_COLUMN_NAMES = ['selectedAttrs', 'attrUncs']
TASK_METADATA_COLUMN_NAMES = ['filename', 'task', 'concept_group']
EXPLAINER_DIRNAME = 'saved/PlacesCategoryClassification/0510_102912/ADE20K_predictions/saga/KD_baseline_explainer/hparam_search/0523_164052/best'

CONGRUENCY_PATH_TEMPLATE = os.path.join(EXPLAINER_DIRNAME, '{}_paths.txt')
CONGRUENT_PATHS_PATH = CONGRUENCY_PATH_TEMPLATE.format('congruent')
INCONGRUENT_PATHS_PATH = CONGRUENCY_PATH_TEMPLATE.format('incongruent')

MODEL_DIRNAME = 'saved/PlacesCategoryClassification/0510_102912/ADE20K_predictions/saga'
# Path to where images in ADE20K are stored. (Prefix to path in congruent/incongruent paths files)
ADE20K_PARENT_DIR = os.path.join('data', 'broden1_224', 'images')
CSV_SAVE_PATH = os.path.join(os.path.dirname(RESULTS_DIR), 'processed_results_{}_samples.csv')

SCENE_CATEGORIES_PATH = os.path.join('data', 'ade20k', 'scene_categories.txt')


In [8]:
# Create a 2-way dictionary mapping from category <-> index
scene_categories = read_lists(SCENE_CATEGORIES_PATH)
scene_categories_dict = {}
for idx, category in enumerate(scene_categories):
    scene_categories_dict[idx] = category
    scene_categories_dict[category] = idx
n_categories = len(scene_categories)

### Merge all human survey dataframes into 1

In [5]:
csv_paths = []

for filename in os.listdir(RESULTS_DIR):
    if filename.endswith('csv'):
        csv_paths.append(os.path.join(RESULTS_DIR, filename))

csv_paths = sorted(csv_paths)

df_list = []
for csv_path in csv_paths:
    print("Processing {}".format(os.path.basename(csv_path))) 
    df = pd.read_csv(csv_path)
    # Separate dataframe into rows with measurements and with metadata
    measurement_df = df[MEASUREMENT_COLUMN_NAMES]
    metadata_df = df.drop(MEASUREMENT_COLUMN_NAMES, axis=1)

    # Drop empty rows
    measurement_df = measurement_df.dropna()
    # Drop rows without data in task metadata columns
    metadata_df = metadata_df.dropna(subset=TASK_METADATA_COLUMN_NAMES)

    # Remove columns that are empty
    metadata_df = metadata_df.dropna(axis=1)
    
    # congruents = []
    # for filename in metadata_df['filename']:
    #     path = os.path.join(ADE20K_PARENT_DIR, filename)
    #     if path in congruent_paths:
    #         congruents.append(1)
    #     elif path in incongruent_paths:
    #         congruents.append(0)
    #     else:
    #         raise ValueError("Path {} not found in congruent or incongruent paths... :0".format(path))
    # metadata_df['congruent'] = congruents

    # Assert that the two DFs have the same number of rows
    assert len(metadata_df) == len(measurement_df), "Uneven length data frames. Metadata length: {} Measurement length: {}".format(
        len(metadata_df), len(measurement_df))

    # Reset indices to allow for joining appropriately
    metadata_df = metadata_df.reset_index(drop=True)
    measurement_df = measurement_df.reset_index(drop=True)


    # Join the data frames
    df = pd.concat([metadata_df, measurement_df], axis=1)
    assert len(df) == len(metadata_df)

    # Add dataframe to list of dataframes
    df_list.append(df)
    
# Concatenate rows of all dataframes together
df = pd.concat(df_list)
n_samples = len(df)
print("Total of {} samples".format(n_samples))
# n_congruent = len(df[df['congruent'] == 1])
# n_incongruent = len(df[df['congruent'] == 0])
# print("Total of {} congruent samples and {} incongruent samples".format(n_congruent, n_incongruent))


Processing 000_uncertainty-annotation_PARTICIPANT_SESSION_2023-05-03_17h26.47.486.csv
Processing 001_uncertainty-annotation_PARTICIPANT_SESSION_2023-05-03_17h35.07.423.csv
Processing 002_uncertainty-annotation_PARTICIPANT_SESSION_2023-05-03_17h40.48.508.csv
Processing 003_uncertainty-annotation_PARTICIPANT_SESSION_2023-05-03_17h48.15.198.csv
Processing 004_uncertainty-annotation_PARTICIPANT_SESSION_2023-05-03_17h55.01.103.csv
Processing 005_uncertainty-annotation_PARTICIPANT_SESSION_2023-05-03_18h00.12.78.csv
Processing 006_uncertainty-annotation_PARTICIPANT_SESSION_2023-05-03_18h05.33.934.csv
Processing 007_uncertainty-annotation_PARTICIPANT_SESSION_2023-05-03_18h11.11.320.csv
Processing 008_uncertainty-annotation_PARTICIPANT_SESSION_2023-05-04_09h23.31.131.csv
Processing 009_uncertainty-annotation_PARTICIPANT_SESSION_2023-05-04_09h28.52.510.csv
Total of 300 samples


### Calculate soft labels and predictions for humans


In [9]:
print("Calculating human soft labels")
human_probabilities = []
human_outputs = []
human_predictions = []
for row in tqdm(df['attrUncs']):
    soft_label = np.zeros(n_categories)
    # Each 'score' item is a dictionary of class and certainty amount
    row = json.loads(row)
    for item in row:
        category = item['label']
        certainty = item['y'] / 100.0
        category_idx = scene_categories_dict[category]
        soft_label[category_idx] = certainty
    label_sum = np.sum(soft_label)
    human_outputs.append(soft_label)

    # Normalize to sum to one
    soft_label = soft_label / label_sum
    # Assert the soft label sums to 1
    assert np.abs(np.sum(soft_label) - 1.0) < 1e-5

    human_probabilities.append(soft_label)
    human_predictions.append(np.argmax(soft_label))

df['human_probabilities'] = human_probabilities
df['human_outputs'] = human_outputs
df['human_predictions'] = human_predictions
print("Calculated human outputs, probabilities, and predictions")



Calculating human soft labels


100%|██████████████████████████████████████████████████████████████████████████████| 300/300 [00:00<00:00, 28880.42it/s]

Calculated human outputs, probabilities, and predictions





### Add human entropy and t2c

In [10]:
def get_top_2_confusion(soft_labels):
    '''
    Given soft label distribution, calculate difference between top 2 labels

    Arg(s):
        soft_labels : N x C np.array
            soft label array for N samples and C class predictions

    Returns:
        confusion : N-dim np.array
            confusion for each sample
    '''
    # Sort soft labels ascending
    sorted_soft_labels = np.sort(soft_labels, axis=-1)
    # Calculate difference of p(x) for top 2 classes
    top_2_difference = sorted_soft_labels[:, -1] - sorted_soft_labels[:, -2]
    # Confusion = 1 - difference (higher is worse)
    top_2_confusion = 1 - top_2_difference

    return top_2_confusion


In [16]:
# Calculate entropy
entropy = stats.entropy(human_probabilities, axis=1)
df['human_entropy'] = entropy

# Top 2 confusion
human_t2c = get_top_2_confusion(human_outputs)
df['human_t2c'] = human_t2c

print("Calculated entropy, t2c for human labels")

Calculated entropy, t2c for human labels


### Add model and explainer soft labels to df, add KL divergence

In [15]:
# Obtain explainer softmax probabilities
explainer_outputs_path = os.path.join(EXPLAINER_DIRNAME, 'val_outputs_predictions.pth')
explainer_out = torch.load(explainer_outputs_path)
explainer_probabilities = explainer_out['probabilities']

# Obtainer model's softmax probabilities
model_outputs_path = os.path.join(MODEL_DIRNAME, 'val_outputs_predictions.pth')
model_out = torch.load(model_outputs_path)
model_probabilities = model_out['probabilities']

# Obtain mapping from image name to index of validation set
image_labels_path = 'data/ade20k/full_ade20k_imagelabels.pth'
image_labels = torch.load(image_labels_path)
val_images = image_labels['val']
val_images = [path.split('images/')[-1] for path in val_images]
val_name_idx_dict = {}
for idx, image_name in enumerate(val_images):
    val_name_idx_dict[image_name] = idx


# For each row in the data frame, store the explainers' and models' outputs
for name, outputs in zip(['explainer', 'model'], [explainer_out, model_out]):
    for output_type in ['outputs', 'probabilities', 'predictions']:
        cur_outputs = outputs[output_type]
        accumulator = []
        for image_name in df['filename']:
            val_idx = val_name_idx_dict[image_name]
            cur_item = cur_outputs[val_idx]
            accumulator.append(cur_item)
        df['{}_{}'.format(name, output_type)] = accumulator
        
    # add entropy and t2c for model
    # if name == 'model':
    cur_probabilities = np.stack(df['{}_probabilities'.format(name)].to_numpy(), axis=0)
    cur_outputs = np.stack(df['{}_outputs'.format(name)].to_numpy(), axis=0)
    cur_entropy = stats.entropy(cur_probabilities, axis=1)
    cur_t2c = get_top_2_confusion(cur_outputs)
    # Min-max scale t2c to be between [0, 1]
    min_t2c = np.amin(cur_t2c)
    max_t2c = np.amax(cur_t2c)
    scaled_cur_t2c = (cur_t2c - min_t2c) / (max_t2c - min_t2c)
    df['{}_entropy'.format(name)] = cur_entropy
    df['{}_t2c'.format(name)] = cur_t2c
    df['{}_scaled_t2c'.format(name)] = scaled_cur_t2c
        
print("Added model and explainer's outputs, probabilities, and predictions to dataframe")
print("Added model and explainer entropy, t2c to dataframe")



Added model and explainer's outputs, probabilities, and predictions to dataframe
Added model and explainer entropy, t2c to dataframe


In [17]:
# KL divergence between model-explainer
p = np.stack(df['explainer_probabilities'], axis=0)
q = np.stack(df['model_probabilities'], axis=0)
kl_model_explainer = stats.entropy(pk=p, qk=q, axis=1)
df['kl_model_explainer'] = kl_model_explainer
print("Added KL divergence between model and explainer probabilities")

Added KL divergence between model and explainer probabilities


### Add alignment and top and bottom confidence

In [18]:
agent_pairs = [
    ('human', 'explainer'),
    ('human', 'model'),
    ('model', 'explainer')]
for agent1, agent2 in agent_pairs:
    agent1_predictions = df['{}_predictions'.format(agent1)]
    agent2_predictions = df['{}_predictions'.format(agent2)]
    alignment = np.where(agent1_predictions == agent2_predictions, 1, 0)
    df['{}_{}_alignment'.format(agent1, agent2)] = alignment
print("Added all three pairs of alignment to the dataframe")

def add_confidence(df, 
                   agent, 
                   top=True):
    column_name = '{}_probabilities'.format(agent)
    assert column_name in df.columns
    
    # Convert str -> numpy if necessary
    if type(df[column_name][0]) == str:
        df = convert_string_columns(df, [column_name])
    
    # Calculate confidence scores and add to DF
    probabilities = np.stack(df[column_name].to_numpy(), axis=0)
    if top:
        confidence = np.amax(probabilities, axis=1)
        df['{}_top_confidence'.format(agent)] = confidence
    else:  # confidence of bottom logit
        confidence = np.amin(probabilities, axis=1)
        df['{}_bottom_confidence'.format(agent)] = confidence
    return df

agents = ['human', 'model', 'explainer']

for agent in agents:
    df = add_confidence(
        df,
        agent,
        top=True)
    df = add_confidence(
        df,
        agent,
        top=False)

print("Added top and bottom confidences for all agents to df")

Added all three pairs of alignment to the dataframe
Added top and bottom confidences for all agents to df


In [22]:
n_samples = len(df)
csv_save_path = CSV_SAVE_PATH.format(n_samples)
if os.path.exists(csv_save_path):
    print("File already exists at {}. Rename/remove it in order to save".format(csv_save_path))
else:
    df.to_csv(csv_save_path)
    print("Saved csv to {}".format(csv_save_path))

Saved csv to saved/ADE20K/survey_results/processed_results_300_samples.csv
