In [46]:
import pandas as pd 
import numpy as np 
import utils

In [47]:
burn_in = 1000
thining = 20
real_order = [1, 3, 5, 2, 4]

In [48]:
def obtain_most_likely_order(df, burn_in, thining):
    """Obtain the most likely order based on all the accepted orders 
    Inputs:
        - all_current_accepted_order_dicts 
        - burn_in
        - thining
    Outputs:
        - a dictionary where key is biomarker and value is the most likely order for that biomarker
    """
    df = pd.DataFrame(all_current_accepted_order_dicts)
    biomarker_stage_probability_df = utils.get_biomarker_stage_probability(df, burn_in, thining)
    dic = {}
    assigned_stages = set()
    for i, biomarker in enumerate(biomarker_stage_probability_df.index):
        # probability array for that biomarker 
        prob_arr = np.array(biomarker_stage_probability_df.iloc[i, :])
        max_prob = np.max(prob_arr)
        occurances = np.sum(prob_arr == max_prob)
        if occurances > 1:
            # get the indcies of all stages with max prob 
            indices = np.where(prob_arr == max_prob)[0]
            # filter out already assigned stages
            available_indices = [idx for idx in indices if idx + 1 not in assigned_stages]
            if available_indices:
                max_value_idx = available_indices[0] + 1
            else:
                raise ValueError("Something wrong here")
        else:
            max_value_idx = np.argmax(prob_arr) + 1
        dic[biomarker] = max_value_idx
        assigned_stages.add(max_value_idx)
    return dic 

In [49]:
df = pd.read_csv(
    "logs/uniform_prior/chen_data_conjugate_priors/all_current_accepted_order_dicts.csv").drop(
        "iteration", axis = 1)

In [50]:
df.head()

Unnamed: 0,FCI(HIP)-1,GMI(HIP)-3,FCI(Fusi)-5,FCI(PCC)-2,GMI(FUS)-4
0,4,3,5,2,1
1,4,2,5,3,1
2,4,2,5,3,1
3,1,2,5,3,4
4,1,2,5,3,4


In [51]:
most_likely_order_dic = obtain_most_likely_order(
                df, burn_in, thining)
most_likely_order_dic

{'FCI(HIP)-1': 2,
 'GMI(HIP)-3': 3,
 'FCI(Fusi)-5': 5,
 'FCI(PCC)-2': 1,
 'GMI(FUS)-4': 4}

In [52]:
most_likely_order = np.array(list(most_likely_order_dic.values()))
most_likely_order

array([2, 3, 5, 1, 4])

In [58]:
set(most_likely_order)

{1, 2, 3, 4, 5}

In [53]:
if set(most_likely_order) != set(real_order):
    print("This most likelihood has repeated stages.")

In [54]:
matches = (most_likely_order == np.array(real_order)).astype(int)

matches

array([0, 1, 1, 0, 1])

In [55]:
np.sum(matches)

3

In [56]:

# Element-wise comparison
matches = most_likely_order == real_order
# Count the number of matching elements
num_overlap = np.sum(matches)
accuracy = num_overlap/len(real_order)

In [57]:
accuracy

0.6