In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Load data that was cached already

In [28]:
pred_labels = np.load('/data/ddmg/frank/shuvom/pred_matrices/model_pred_labels.npy')
true_labels = np.load('/data/ddmg/frank/shuvom/pred_matrices/true_labels.npy')
selected_classes = np.load('/data/ddmg/frank/shuvom/pred_matrices/selected_classes.npy')

ds_errors = np.load('/data/ddmg/frank/shuvom/pred_matrices/ds_error_rates_all_classes.npy')

# ds_errors.shape

(100, 3, 2, 2)

Only use the selected classes

In [15]:
indices = np.where(np.isin(true_labels, selected_classes))[0]
subset_pred_labels = pred_labels[indices]
subset_true_labels = true_labels[indices]


In [79]:
from itertools import permutations, product

def get_indices(lst, value):
    return [i for i, v in enumerate(lst) if v == value]

def generate_valid_orderings(lst):
    unique_values = sorted(set(lst))
    indices_permutations = [list(permutations(get_indices(lst, value))) for value in unique_values]
    merged_permutations = product(*indices_permutations)
    valid_orderings = [sum(permutation, ()) for permutation in merged_permutations]  # Merge tuples in each permutation
    return valid_orderings

def generate_all_valid_orderings(list1, list2):
    return generate_valid_orderings(list1), generate_valid_orderings(list2)


def count_inversions(list1, list2):
    inv_count = 0
    n = len(list1)
    for i in range(n):
        for j in range(i + 1, n):
            # Check if the order of indices i and j is inverted between the two lists
            if (list1.index(i) < list1.index(j) and list2.index(i) > list2.index(j)) or \
               (list1.index(i) > list1.index(j) and list2.index(i) < list2.index(j)):
                inv_count += 1
    return inv_count

def compute_inversion_distances(list1, list2):
    # Get all valid orderings for list1 and list2
    valid_orderings1, valid_orderings2 = generate_all_valid_orderings(list1, list2)

    # print(valid_orderings1, valid_orderings2)

    # Initialize an empty list to store the inversion distances
    inversion_distances = []

    # Iterate over all pairs of orderings
    for ordering1 in valid_orderings1:
        for ordering2 in valid_orderings2:
            # Compute the inversion distance for the current pair of orderings
            inv_dist = count_inversions(ordering1, ordering2)
            # Add the inversion distance to the list
            inversion_distances.append(inv_dist)

    return inversion_distances


In [88]:
n_simulations = 1000

for j in range(n_simulations):

    tot_invs = 0
    for i, selected_class in enumerate(selected_classes):

        # Get the indices where the true labels match the selected class
        class_indices = np.where(subset_true_labels == selected_class)[0]
        class_pred_labels = subset_pred_labels[class_indices]

        # Randomly select 10 indices without replacement from the first 50 indices
        bootstrapped_indices = np.random.choice(50, size=5, replace=False)
        bootstrapped_class_pred_labels = class_pred_labels[bootstrapped_indices]

        # Create a binary mask where the predicted labels match the selected class
        binary_mask = (bootstrapped_class_pred_labels == selected_class)  
        # Sum the binary mask along the 1 axis
        binary_mask_sum = np.sum(binary_mask, axis=0)

        binary_mask_total = (class_pred_labels == selected_class) 
        binary_mask_total_sum = np.sum(binary_mask_total, axis=0)

        # ds_error_rates = ds_errors[:,:,1,1][i]
        inv_dists = compute_inversion_distances(binary_mask_sum, binary_mask_total_sum)
        tot_invs += min(inv_dists)
    print(tot_invs)

    
    
    
    


24
23
16
24
17
21
13
14
24
19
17
31
27
15
23
15
21
26
25
14
26
20
22
19
23
20
17
18
22
14
17
16
21
20
26
16
17
13
23
18
14
19
24
21
21
23
21
16
16
15
15
15
20
19
22
16
16
15
14
19
20
16
19
12
18
22
20
20
14
27
15
16
24
14
24
21
10
17
11
18
22
22
14
14
27
19
25
17
19
21
22
14
23
12
19
16
22
17
28
20
21
16
19
17
12
25
19
25
20
27
12
18
27
12
22
19
14
23
11
27
23
22
21
16
18
26
16
24
23
22
27
18
32
16
12
13
26
25
12
33
19
11
22
15
10
10
15
14
16
19
17
10
24
18
22
17
23
16
15
12
10
24
15
27
18
23
18
12
13
17
19
20
21
11
14
23
26
15
14
11
16
28
16
21
19
16
22
17
13
17
17
23
19
23
22
32
25
23
13
26
17
21
24
17
23
20
26
24
17
17
15
20
18
21
23
14
16
27
21
14
18
19
20
18
26
17
22
26
19
22
21
15
12
28
20
13
17
13
25
19
16
17
23
18
10
15
17
20
27
27
15
14
13
24
29
17
15
17
19
12
22
11
12
25
16
22
21
19
13
24
11
24
21
21
17
17
17
13
10
20
24
24
31
11
18
28
17
20
22
19
27
9
20
26
27
21
19
28
22
14
15
25
18
22
20
16
11
11
18
33
17
17
15
23
16
20
18
18
13
18
19
21
18
16
27
17
10
20
15
17
26
21
15
22

In [87]:
tot_sum = 0

for i, selected_class in enumerate(selected_classes):
    class_indices = np.where(subset_true_labels == selected_class)[0]
    class_pred_labels = subset_pred_labels[class_indices]

    binary_mask_total = (class_pred_labels == selected_class) 
    binary_mask_total_sum = np.sum(binary_mask_total, axis=0)

    ds_error_rates = ds_errors[:,:,1,1][i]
    inv_dists = compute_inversion_distances(ds_error_rates, binary_mask_total_sum)
    tot_sum += min(inv_dists)
print(tot_invs)


64


In [72]:
from scipy.stats import kendalltau



# Test Case 2:
list1 = [0, 1, 2, 3]
list2 = [3, 2, 1, 0]
print(count_inversions(list1, list2))  # Expected Output: 6


# Now you can use this function in your existing code to compute the inversion distances
list1_ordering = (0, 2, 1)
list2_ordering = (2, 0, 1)
inversion_distance = count_inversions(list1_ordering, list2_ordering)
print(inversion_distance)  # Expected Output: 1


6
1


In [69]:


# Test cases:
# Test Case 1:
list1 = [0, 1, 2]
list2 = [2, 1, 0]
print(count_inversions(list1, list2))  # Expected Output: 1

# Test Case 2:
list1 = [0, 1, 2, 3]
list2 = [3, 2, 1, 0]
print(count_inversions(list1, list2))  # Expected Output: 6

# Test Case 3:
list1 = [0, 1, 2, 3, 4]
list2 = [4, 3, 2, 1, 0]
print(count_inversions(list1, list2))  # Expected Output: 10

# Test Case 4:
list1 = [0, 1, 2]
list2 = [1, 0, 2]
print(count_inversions(list1, list2))  # Expected Output: 1

# Test Case 5:
list1 = [0, 1, 2]
list2 = [0, 1, 2]
print(count_inversions(list1, list2))  # Expected Output: 0


3
6
10
1
0


In [73]:
# # Test Case 1:
# list1 = [8, 9, 8]
# list2 = [6, 10, 5]
# result = compute_inversion_distances(list1, list2)
# print(result)  # Expected Output: 0

# # Test Case 2:
# list1 = [1, 2, 3]
# list2 = [3, 2, 1]
# result = compute_inversion_distances(list1, list2)
# print(result)  # Expected Output: 1

# # Test Case 3:
# list1 = [5, 5, 5]
# list2 = [5, 5, 5]
# result = compute_inversion_distances(list1, list2)
# print(result)  # Expected Output: 0

# # Test Case 4:
# list1 = [1, 2, 3, 4]
# list2 = [4, 3, 2, 1]
# result = compute_inversion_distances(list1, list2)
# print(result)  # Expected Output: 3

# # Test Case 5:
# list1 = [10, 20, 10, 30]
# list2 = [20, 10, 30, 10]
# result = compute_inversion_distances(list1, list2)
# print(result)  # Expected Output: 1

# # Test Case 6:
# list1 = [4, 3, 2, 1]
# list2 = [1, 2, 3, 4]
# result = compute_inversion_distances(list1, list2)
# print(result)  # Expected Output: 3


[(0, 2, 1, 3), (2, 0, 1, 3)] [(1, 3, 0, 2), (3, 1, 0, 2)]
[4, 5, 5, 6]
[(3, 2, 1, 0)] [(0, 1, 2, 3)]
[6]
