This notebook compute JSD after storing the vectors of predictions.

It also includes YTI dataset, but JSD, because the background masking applied for evaluation, is not very meaningful in this specific setting. 

Notebook is useful for visualizing the final table, with highlighted the best JSD per activity.

In [1]:
import numpy as np
from scipy.spatial.distance import jensenshannon
from typing import List
import os
import pickle
import pandas as pd
from IPython.display import display

In [2]:
def count_segments(vector):
    '''
    Return the lengths and labels of segments.
    '''
    if len(vector) == 0:
        return []

    # Find the indices where the value changes
    change_indices = np.where(np.diff(vector) != 0)[0] + 1
    # Include the start and end of the vector
    segment_indices = np.concatenate(([0], change_indices, [len(vector)]))
    # Calculate segment lengths
    segment_lengths = np.diff(segment_indices)
    # Get the segment values
    segment_values = vector[segment_indices[:-1]]

    return list(zip(segment_values, segment_lengths))

def compute_js_divergence(hist1, hist2):    
    '''
    Compute the Jensen-Shannon divergence between two histograms.
    '''
    js_div = jensenshannon(hist1, hist2, base=2)
    return js_div

def compute_bins(max_segment, min_segment, bin_width):
    '''
    Compute bins for histogram based on the segment lengths.
    '''
    # # New max is the threshold after which we consider all segments as one bin
    # # Add this for visualization only:
    # new_max = max_segment/3
    new_max = max_segment
    num_bins = int(np.ceil((new_max - min_segment) / bin_width))  # Calculate number of bins
    bins = [min_segment + i * bin_width for i in range(num_bins)]  # Create bin edges
    bins.extend([max_segment])

    return bins

def custom_binning(segments, min_segment, max_segment, bin_width, return_quant=False):
    '''
    Compute histogram counts for the segment lengths.
    '''
    bins = compute_bins(max_segment, min_segment, bin_width)    
    # Compute histogram counts
    counts, _ = np.histogram(segments, bins=bins)
    
    # Create bins based on counts
    quantized_bins = []
    for i in range(len(counts)):
        bin_segments = segments[(segments >= bins[i]) & (segments < bins[i+1])]
        quantized_bins.append(list(bin_segments))
    
    if return_quant:
        return bins, counts, quantized_bins
    return bins, counts

def compute_hists(vector, bin_width, max_length=None, return_segment_length=False): 
    '''
    Compute histogram counts for the segment lengths.
    '''
    segments = count_segments(vector)
    segment_lengths = [length for _, length in segments]
    segment_lengths = np.array(segment_lengths)
    if max_length is None:
        max_length = len(vector)
    custom_bins, counts = custom_binning(segment_lengths, 0, max_length, bin_width=bin_width)    
    counts = np.array(counts)
    if return_segment_length:
        return counts, segment_lengths
    return counts

In [3]:
# Example of computing the JS divergence for CTE and TOT
vectors_path = 'YOUR_PATH'

def compute_distributions(method, dataset, bin_width, max_length=None, return_base_hist=False, activities:List=None):
    '''
    Compute the JS divergence for the CTE and TOT methods.
    Args:
    - method: str, method to compute the JS divergence.
    - dataset: str, dataset to compute the JS divergence.
    - bin_width: int, width of the bins for the histograms.
    - max_length: dict, maximum length of the videos for each
    activity. If None, the maximum length is computed from the
    predictions.
    - return_base_hist: bool, if True, the histograms of the
    gt distribution is returned.
    - activities: list, activities to compute the JS divergence. If
    None, the activities are set according to the dataset.
    Returns:
    - res: dict, JS divergence for each activity.
    - base_hist: dict, base histograms for each activity.
    '''
    if activities is None and dataset == 'BF':
        activities = ['coffee', 'cereals', 'tea', 'milk', 'juice', 
                'sandwich', 'scrambledegg', 'friedegg', 'salat', 'pancake']
    elif activities is None and dataset == 'IKEA':
        activities = ['Kallax_Shelf_Drawer', 'Lack_Coffee_Table', 'Lack_Side_Table', 'Lack_TV_Bench']
    elif activities is None and dataset == 'YTI':
        activities = ['changing_tire', 'cpr', 'jump_car', 'repot', 'coffee']
        
    if max_length is None:
        max_length = {}

    # Initialize variables
    means_js = []
    base_hist = {}
    all_predictions = []

    for activity in activities:
        if activity not in max_length.keys():
            max_length[activity] = None
        '''
        The predictions should be saved in a directory pointed by `path`.
        The predictions should be saved in a pickle file with the following structure:
        {
            'gt': ([np.array], None), # Ground truth
            0: ([np.array], dict) # Predictions, dictionary for matching the predicted labels with gt
        }
        '''
        path = f'{vectors_path}/{dataset}/{method}/{activity}/'
        jss = []        
        hists = []
        for file in os.listdir(path):
            with open(os.path.join(path, file), 'rb') as f:
                segm = pickle.load(f)
            gt = segm['gt'][0]
            # Exclude background
            gt = gt[gt != -1]
            pred = segm[0][0]
            all_predictions.append(pred)
            
            if max_length[activity] is None and dataset == 'YTI':
                max_length[activity] = len(pred)

            
            video_name = file.removeprefix(f'{method}_')
            if activity not in base_hist.keys():
                base_hist[activity] = {}
            if video_name not in base_hist[activity]:
                base, gt_seg_len = compute_hists(gt, bin_width, max_length[activity], return_segment_length=True)
                base_hist[activity][video_name] = base
            else:
                base = base_hist[activity][video_name]
            hist, seg_len = compute_hists(pred, bin_width, max_length[activity], return_segment_length=True)

            js_div = compute_js_divergence(base, hist)
            jss.append(js_div)
            hists.append(hist)

        means_js.append(np.mean(jss))

    res = {
        'JS': means_js,
        'all_preds': np.hstack(all_predictions),
    }
    if return_base_hist:
        return res, base_hist
    return res


In [4]:
# Precomputed max_length for each activity in Breakfast
max_length_activities = {
    'coffee': 1121,
    'cereals': 1004,
    'tea': 994,
    'milk': 1812,
    'juice': 1771,
    'sandwich': 3130,
    'scrambledegg': 5944,
    'friedegg': 8138,
    'salat': 5445,
    'pancake': 9341
}

In [5]:
metric = 'JS'
dfs = {}
dataset = "BF"
bin_width = 20

# `Base_hist` represents the histograms of the ground truth distribution
base_hist = {}
# `max_length_activities` represents the maximum length of the videos for each activity
# max_length_activities = None
# Compute distributions for CTE, TOT and TOT+TCL
cte, base_hist = compute_distributions('cte', dataset, bin_width, return_base_hist=True, max_length=max_length_activities)
tot = compute_distributions('tot', dataset, bin_width, max_length=max_length_activities)
tot_tcl = compute_distributions('tot_tcl', dataset, bin_width, max_length=max_length_activities)

# For each datasets, the number of total frame per activity is set
if dataset == 'BF':
    # 'coffee', 'cereals', 'tea', 'milk', 'juice', 'sandwich', 'scrambledegg', 'friedegg', 'salat', 'pancake'
    total_frames = [97958, 129551, 131782, 177387, 241462, 259495, 517478, 539733, 558928, 937125]
elif dataset == 'IKEA':
    # 'Kallax_Shelf_Drawer', 'Lack_Coffee_Table', 'Lack_Side_Table', 'Lack_TV_Bench'
    total_frames = [493635, 972309, 747453, 831693]
elif dataset == 'YTI':
    # 'changing tire', 'coffee', 'cpr', 'jump car', 'repot'
    total_frames = [20158, 20705, 11220, 12546, 12345]

# Create a DataFrame with the results
df = pd.DataFrame({
    'n_frames': total_frames,
    'cte': cte[metric],
    'tot': tot[metric],
    'tot_tcl': tot_tcl[metric],
})

# Highlight minimum value per row with black text (JSD lower is better)
def highlight_min(x):
    is_min = x == x.min()
    styles = np.where(is_min, 'background-color: yellow; color: black', '')
    return styles

# Calculate weighted average of all the columns based on column 1, the number of frames
weighted_avg = np.average(df[['cte', 'tot', 'tot_tcl']], weights=df['n_frames'], axis=0)

# Create a DataFrame for the weighted averages
weighted_avg_df = pd.DataFrame([weighted_avg], columns=['cte', 'tot', 'tot_tcl'])

# Append the weighted averages as the last row to the original DataFrame
df = pd.concat([df, weighted_avg_df], ignore_index=True)

# Apply the style using applymap
styled_df = df.style.apply(lambda x: highlight_min(x), axis=1)

# Display the styled DataFrame
dfs[bin_width] = styled_df
display(styled_df)

Unnamed: 0,n_frames,cte,tot,tot_tcl
0,97958.0,0.834573,0.824994,0.788244
1,129551.0,0.870401,0.848207,0.84525
2,131782.0,0.869641,0.882234,0.806441
3,177387.0,0.880183,0.862942,0.86164
4,241462.0,0.864508,0.908549,0.834595
5,259495.0,0.908274,0.928674,0.844862
6,517478.0,0.873144,0.896093,0.838682
7,539733.0,0.888757,0.902804,0.867361
8,558928.0,0.902603,0.905874,0.903815
9,937125.0,0.903957,0.917168,0.85793
