# Aim

This notebook is to calculate the likelihoods of max and real order for simulated and chen data, respectively. 

In [25]:
import utils
import pandas as pd 
import numpy as np 
import eval 

## Simulated

In [26]:
data_we_have = eval.get_data_we_have("Simulated Data")
max_order = [2, 5, 4, 3, 1]
real_order = [1, 3, 5, 2, 4]

In [27]:
n_participants = len(data_we_have.participant.unique())
biomarkers = data_we_have.biomarker.unique()
n_biomarkers = len(biomarkers)
n_stages = n_biomarkers + 1
non_diseased_participant_ids = data_we_have.loc[
    data_we_have.diseased == False].participant.unique()
diseased_stages = np.arange(start = 1, stop = n_stages, step = 1)

In [28]:
theta_phi_estimates = utils.get_theta_phi_estimates(
    data_we_have, 
    biomarkers, 
    n_clusters = 2,
    method = "kmeans_and_hierarchical_clustering"
)

In [29]:
max_order_dict = dict(zip(biomarkers, max_order))
max_order_dict

{'HIP-FCI': 2, 'HIP-GMI': 5, 'FUS-FCI': 4, 'PCC-FCI': 3, 'FUS-GMI': 1}

In [30]:
all_participant_ln_likelihood = utils.compute_all_participant_ln_likelihood(
    data_we_have,
    max_order_dict,
    n_participants,
    non_diseased_participant_ids,
    theta_phi_estimates,
    diseased_stages,
)
all_participant_ln_likelihood

-817.697288019683

In [31]:
real_order_dict = dict(zip(biomarkers, real_order))
real_order_dict

{'HIP-FCI': 1, 'HIP-GMI': 3, 'FUS-FCI': 5, 'PCC-FCI': 2, 'FUS-GMI': 4}

In [32]:
all_participant_ln_likelihood = utils.compute_all_participant_ln_likelihood(
    data_we_have,
    real_order_dict,
    n_participants,
    non_diseased_participant_ids,
    theta_phi_estimates,
    diseased_stages,
)
all_participant_ln_likelihood

-851.6828144578428

## Chen Data

In [33]:
data_we_have = eval.get_data_we_have("Chen Data")
max_order = [3, 5, 1, 4, 2]
real_order = [1, 3, 5, 2, 4]

In [34]:
n_participants = len(data_we_have.participant.unique())
biomarkers = data_we_have.biomarker.unique()
n_biomarkers = len(biomarkers)
n_stages = n_biomarkers + 1
non_diseased_participant_ids = data_we_have.loc[
    data_we_have.diseased == False].participant.unique()
diseased_stages = np.arange(start = 1, stop = n_stages, step = 1)

In [35]:
theta_phi_estimates = utils.get_theta_phi_estimates(
    data_we_have, 
    biomarkers, 
    n_clusters = 2,
    method = "kmeans_and_hierarchical_clustering"
)

In [36]:
max_order_dict = dict(zip(biomarkers, max_order))
max_order_dict

{'FCI(HIP)': 3, 'GMI(HIP)': 5, 'FCI(Fusi)': 1, 'FCI(PCC)': 4, 'GMI(FUS)': 2}

In [37]:
all_participant_ln_likelihood = utils.compute_all_participant_ln_likelihood(
    data_we_have,
    max_order_dict,
    n_participants,
    non_diseased_participant_ids,
    theta_phi_estimates,
    diseased_stages,
)
all_participant_ln_likelihood

-1074.8069836311095

In [38]:
real_order_dict = dict(zip(biomarkers, real_order))
real_order_dict

{'FCI(HIP)': 1, 'GMI(HIP)': 3, 'FCI(Fusi)': 5, 'FCI(PCC)': 2, 'GMI(FUS)': 4}

In [39]:
all_participant_ln_likelihood = utils.compute_all_participant_ln_likelihood(
    data_we_have,
    real_order_dict,
    n_participants,
    non_diseased_participant_ids,
    theta_phi_estimates,
    diseased_stages,
)
all_participant_ln_likelihood

-1198.2782155380535