In [None]:
%%capture
pip install bipartitepandas

In [None]:
%%capture
pip install pytwoway

In [None]:
%%capture
pip install ruptures

In [None]:
# import simbipartiteTest as simTest
import matplotlib.pyplot as plt
import matplotlib.gridspec # To plot clustermap and heatmap side by side
import seaborn as sns
# import CostVisitSimTest as CostSim
import pandas as pd
import pytwoway as tw
import bipartitepandas as bpd
import numpy as np
# import PyChest
import ruptures as rpt
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
# import scipy
import time
from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
# Ignore warnings below
simplefilter("ignore", category=ConvergenceWarning) # Useful for logistic regression
pd.options.mode.chained_assignment = None  # default='warn' # Remove copy on slice warning

In [None]:
def temporal_simulation(nb_of_periods,
                           n_patients,
                           n_doctors,
                           z,
                           alpha_law_graph=(0, 0.5),
                           psi_law_graph=(0, 0.5),
                           alpha_law_cost=(0, 0.5),
                           psi_law_cost=(0, 0.5),
                           preconditioner = 'ichol',
                           beta_age_p_graph=0.01,
                           beta_age_d_graph=0.01,
                           beta_sex_p_graph=0.5,
                           beta_sex_d_graph=0.5,
                           beta_distance_graph=0.5,
                           beta_age_p_cost=0.5,
                           beta_age_d_cost=0.5,
                           beta_sex_p_cost=0.5,
                           beta_sex_d_cost=0.5,
                           beta_distance_cost=0.5):
    """
    dataframe has to be the dataframe of connections between patients and doctors.
    """
    # We set up the parameters to estimate the FE.

    if preconditioner not in ['ichol', 'jacobi']:
        raise Exception("preconditioner has to be 'ichol' or 'jacobi'. Prefer 'jacobi' for large datasets.")
        
    
    fecontrol_params = tw.fecontrol_params(
    {
        'ho': True,
        'he': False,
        'feonly': True,
        'continuous_controls': ['distance', 'age_d', 'age_p'],
        'categorical_controls': ['sex_p', 'sex_d'],
        'attach_fe_estimates': True,
        'ncore': 8,
        'preconditioner': preconditioner # It looks like it gives better results (especially for large datasets ?)
    }
    )

    clean_params = bpd.clean_params(
    {
        'connectedness': 'leave_out_spell',
        'collapse_at_connectedness_measure': True,
        'drop_single_stayers': True,
        'drop_returns': 'returners',
        'copy': False
    }
    )

    rng = np.random.default_rng(None)
    alpha_graph = []
    psi_graph = []
    alpha_cost = {} # These are dicts to use the function map later
    psi_cost = {}
    changepoint_patient = np.zeros(n_patients)
    changepoint_doctor = np.zeros(n_doctors + 1)
    coor_patients = []
    coor_doctors = []
    D = np.zeros([n_patients, n_doctors + 1], dtype = np.ndarray)
    log = LogisticRegression()

    for i in range(n_patients):
        
        # We generate the FE for the graph formation model
        alpha_graph.append( np.random.uniform(alpha_law_graph[0], alpha_law_graph[1]) )
        
        # We generate the FE for the cost model
        alpha_cost[i] = np.random.uniform(alpha_law_cost[0], alpha_law_cost[1])

        # We generate the periods when there's a changepoint for each patient
        changepoint_patient[i] = np.random.randint(0, nb_of_periods)

        # Generate the coordinates of the patients
        coor_patients.append( np.random.uniform(0, 1, 2) )
                               
    for j in range(n_doctors + 1):

        # We generate the FE for the graph formation model
        psi_graph.append( np.random.uniform(psi_law_graph[0], psi_law_graph[1]) )

        # We generate the FE for the cost model
        psi_cost[j] = np.random.uniform(psi_law_cost[0], psi_law_cost[1])

        # We generate the periods when there's a changepoint for each doctor
        changepoint_doctor[j] = np.random.randint(0, nb_of_periods)
        
        if j != 0:
            
            # Generate the coordinates of the doctors
            coor_doctors.append( np.random.uniform(0, 1, 2) )

    # Generate distance matrix
    for i in range(n_patients):
        for j in range(0, n_doctors + 1):
            if j == 0: # We associate the indice 0 to the "ghost doctor"
                D[i][0] = 0
            else: # we take the j-1 index of coor_doctors as we added the ghost doctor, j = 1 corresponds to j = 0 in coord_doctors
                d = np.sqrt(np.power((coor_patients[i][0] - coor_doctors[j-1][0]), 2) + np.power((coor_patients[i][1] - coor_doctors[j-1][1]), 2))
                D[i][j] = d

    # Random draws of ages for patients and doctors
    sim_patient_age = rng.integers(low = 1, high = 99, size = n_patients)
    sim_doctor_age = rng.integers(low = 26, high = 99, size = n_doctors + 1)

    # Random draws of genders of patients and doctors
    sim_patient_gender = rng.integers(low = 0, high = 2, size = n_patients)
    sim_doctor_gender = rng.integers(low = 0, high = 2, size = n_doctors + 1)

    # Compile ids
    id_p = np.repeat(range(n_patients), n_doctors + 1)
    id_d = np.tile(range(n_doctors + 1), n_patients)

    # Compile fixed effects
    # alp_data = np.repeat(alpha_cost, n_doctors + 1)
    # psi_data = psi_graph * n_patients

    # Compile observed features
    age_p_data = np.repeat(sim_patient_age, n_doctors + 1)
    age_d_data = np.tile(sim_doctor_age, n_patients)
    sex_p_data = np.repeat(sim_patient_gender, n_doctors + 1)
    sex_d_data = np.tile(sim_doctor_gender, n_patients)

    estimates = []
                               
    # At each period, determine connections                           
    for t in range(nb_of_periods):
    
        # Generate the identifier matrix A based on the distance
        A = np.zeros([n_patients, n_doctors + 1], dtype = np.ndarray)
        for i in range(0, n_patients):
            for j in range(0, n_doctors + 1):
                if j == 0:
                    A[i][0] = 1
                elif D[i][j] > z: # if patient i and doctor j are too far away, there is no relation
                    continue
                else:
                    T = alpha_graph[i] + psi_graph[j] + beta_age_p_graph * sim_patient_age[i] + beta_age_d_graph * sim_doctor_age[j] + beta_sex_p_graph * sim_patient_gender[i] + beta_sex_d_graph * sim_doctor_gender[j] + beta_distance_graph * D[i][j]
                    p = 1 / (1 + np.exp(-T))
                    A[i][j] = np.random.binomial(1, p)

        # Compile relations between doctors and patients
        relation = A.flatten()

        # Merge all columns into a dataframe
        dataframe = pd.DataFrame(data={'i': id_p, 'j': id_d, 'y' : relation, 'age_p': age_p_data, 'age_d': age_d_data, 
                               'sex_p': sex_p_data, 'sex_d': sex_d_data
                                })
        dataframe['distance'] = D[dataframe['i'], dataframe['j']].astype(float)
        # Logistic regression for graph formation

        # Add dummy variables
        e_i = pd.DataFrame(np.zeros((n_patients*(n_doctors + 1), n_patients), dtype=int))
        for col in e_i.columns:
            e_i.rename(columns = {col :f'p_{col}'}, inplace = True)
            
        e_j = pd.DataFrame(np.zeros((n_patients*(n_doctors + 1), n_doctors + 1), dtype=int))
        for col in e_j.columns:
            e_j.rename(columns = {col :f'd_{col}'}, inplace = True)
        
        df = pd.concat([dataframe, e_i, e_j], axis = 1)
        
        for i in range(n_patients):
            indexes = df[df['i'] == i].index
            df[f'p_{i}'][indexes] = [1 for i in range(len(indexes))]
        
        for j in range(n_doctors + 1):
            indexes = df[df['j'] == j].index
            df[f'd_{j}'][indexes] = [1 for i in range(len(indexes))]
        
        y = df['y'].astype(int)
        X = df.drop(['i', 'j', 'y'], axis = 1)

        # Scale only specific columns
        # X_to_scale = X[['age_p', 'age_d']]
        # scaler = preprocessing.StandardScaler().fit(X_to_scale)
        # X_pscaled = pd.DataFrame(scaler.transform(X_to_scale))
        # X_pscaled.columns = ['age_p', 'age_d']
        # X_scaled = pd.concat([X_pscaled, X.drop(['age_p', 'age_d'], axis=1)], axis = 1)

        # Scale all columns
        # scaler = preprocessing.StandardScaler().fit(X)
        # X_scaled = scaler.transform(X)
        # reg = log.fit(X_scaled, y)
        
        reg = log.fit(X, y)
        coeffs = reg.coef_[0]

        # drop the rows if there is no relation between patient_i and doctor_j
        dataframe = dataframe.drop(dataframe[dataframe['y'] == 0].index)
        dataframe = dataframe.drop('y', axis = 1)
        dataframe = dataframe.reset_index().drop(['index'], axis = 1)
        # cancel connections between patient i and ghost doctor if patient i isn't only connected to the ghost doctor
        # number_of_connections = dataframe['i'].value_counts(sort=None)
        # for i in range(n_patients):
    
        #     if number_of_connections[i] > 1: # if patient i isn't only connected to the ghost doctor, we remove its connection with the ghost doctor.
        
        #         index_to_drop = dataframe[dataframe['i'] == i].index[0] # we get the index of the row to drop
        #         dataframe = dataframe.drop(index_to_drop)

        list_of_indexes_patient = np.where(changepoint_patient == t)[0]
        list_of_indexes_doctor = np.where(changepoint_doctor == t)[0]

        # We update the laws (if needed) of the patients/doctors
        for index_patient in list_of_indexes_patient: 
            
            alpha_cost[index_patient] = np.random.uniform( np.random.uniform(alpha_law_graph[0] + 5, alpha_law_graph[1] + 5) )
    
        for index_doctor in list_of_indexes_doctor:
            
            psi_cost[index_doctor] = np.random.uniform( np.random.uniform(psi_law_graph[0] + 5, psi_law_graph[1] + 5) )

        dataframe['alpha'] = dataframe['i'].map(alpha_cost).astype(float)
        dataframe['psi'] = dataframe['j'].map(psi_cost).astype(float)
        # dataframe['distance'] = D[dataframe['i'], dataframe['j']].astype(float)

        # Compute the cost
        dataframe['y'] = dataframe['alpha'] + dataframe['psi'] + beta_age_p_cost * dataframe['age_p'] + beta_age_d_cost * dataframe['age_d'] + beta_sex_p_cost * dataframe['sex_p'] + beta_sex_d_cost * dataframe['sex_d'] + beta_distance_cost * dataframe['distance']

        # Change dtype of categorical variables
        dataframe['sex_p'] = dataframe['sex_p'].astype("category")
        dataframe['sex_d'] = dataframe['sex_d'].astype("category")

        if len(dataframe[dataframe['j'] == 0]) == 0: # s'il n'y a pas de docteur fantôme présent dans la simulation, on réindexe les docteurs (pour que leur index commence à 0)
        # CELA POSE PROBLEME, PARFOIS J = 0 CORRESPOND AU DOCTEUR FANTOME, PARFOIS AU PREMIER DOCTEUR... LAISSER LE DOC FANTOME ANYWAY ? On ne cancel aps la connexion avec le docteur
        
            dataframe['j'] = dataframe['j'] - 1
        
        # We create a BipartiteDataFrame in order to estimate the FE
    
        bdf = bpd.BipartiteDataFrame(dataframe.drop(['alpha', 'psi'] , axis = 1),
                                     custom_categorical_dict = {'sex_p': True,
                                                                'sex_d': True},
                                     custom_dtype_dict = {'sex_p': 'categorical',
                                                          'sex_d': 'categorical'},
                                     custom_how_collapse_dict = {'sex_p': 'first',
                                                                 'sex_d': 'first'}) # We transform the dataframe as BipartitePandas dataframe to Estimate the FE.

    
        bdf.clean(clean_params)
        fe_estimator = tw.FEControlEstimator(bdf, fecontrol_params)
        print(f"Estimating FE for period {t}")
        fe_estimator.fit()
        d = {}
        d['estimates'] = fe_estimator.gamma_hat_dict # Estimates of the EF, Beta for the cost model
        # d['estimates'] = [fe_estimator.alpha_hat, fe_estimator.psi_hat]
        d['true_value'] = dataframe # True values of the features, the initial dataframe.
        d['graph'] = {}
        d['graph']['coeffs'] = coeffs
        d['graph']['alpha'] = alpha_graph
        d['graph']['psi'] = psi_graph
        estimates.append(d)

    return estimates

def extract(temporal_simulation):
    """
    temporal_simulation[t]['true_value'] is a DataFrame of data at time t
    temporal_simulation[t]['estimates'] is a dict containing the estimates from FEControlEstimator (pytwoway module) of FE at time t
    We only extract the real doctors (ghost doctor isn't taken)
    """

    
    estimates = {}
    estimates['estimates'] = {}
    estimates['estimates']['cost'] = {}
    estimates['estimates']['graph'] = {}
    estimates['estimates']['cost']['alpha'] = {}
    estimates['estimates']['cost']['psi'] = {}
    estimates['estimates']['graph']['alpha'] = {}
    estimates['estimates']['graph']['psi'] = {}
    estimates['true_value'] = {}
    estimates['true_value']['cost'] = {}
    estimates['true_value']['graph'] = {}
    estimates['true_value']['cost']['alpha'] = {}
    estimates['true_value']['cost']['psi'] = {}
    estimates['true_value']['graph']['alpha'] = {}
    estimates['true_value']['graph']['psi'] = {}
    
    nb_of_periods = len(temporal_simulation)
    n_patients = len(temporal_simulation[0]['estimates']['alpha'])
    n_doctors = len(temporal_simulation[0]['estimates']['psi']) # contient le docteur fantôme car on ne le supprime pas

    for i in range(n_patients):

        estimates['estimates']['cost']['alpha'][i] = []
        estimates['estimates']['graph']['alpha'][i] = []
        estimates['true_value']['cost']['alpha'][i] = []
        estimates['true_value']['graph']['alpha'][i] = []
        
    for j in range(n_doctors - 1):
            
        estimates['estimates']['cost']['psi'][j] = []
        estimates['estimates']['graph']['psi'][j] = []
        estimates['true_value']['cost']['psi'][j] = []
        estimates['true_value']['graph']['psi'][j] = []
    

    for t in range(nb_of_periods):
        df = temporal_simulation[t]['true_value']
        for i in temporal_simulation[t]['true_value']['i'].unique():

            estimates['estimates']['cost']['alpha'][i].append( temporal_simulation[t]['estimates']['alpha'][i] )
            estimates['estimates']['graph']['alpha'][i].append( temporal_simulation[t]['graph']['coeffs'][5 + i] )
            estimates['true_value']['cost']['alpha'][i].append( df[df['i'] == i]['alpha'].iloc[0] )
            estimates['true_value']['graph']['alpha'][i].append( temporal_simulation[t]['graph']['alpha'][i] )


        # for j in np.delete(simulation[t]['true_value']['j'].unique(), np.where(simulation[t]['true_value']['j'].unique() == 0)) :
        for j in range(n_doctors - 1): # We dodge the ghost doctor
    
            estimates['estimates']['cost']['psi'][j].append( temporal_simulation[t]['estimates']['psi'][j+1] )
            estimates['estimates']['graph']['psi'][j].append( temporal_simulation[t]['graph']['coeffs'][5 + n_patients + j + 1] )
            estimates['true_value']['cost']['psi'][j].append( df[df['j'] == j+1]['psi'].iloc[0] )
            estimates['true_value']['graph']['psi'][j].append( temporal_simulation[t]['graph']['psi'][j + 1] )
            

            
    return estimates

def changepoint(estimates, process_count, cost="l2", windows_width = 20):
    """
    All the models are: "l1", "rbf", "linear", "normal", "ar"
    """
    n_patients = len(estimates['estimates']['cost']['alpha'])
    n_doctors = len(estimates['estimates']['cost']['psi'])
    changepoint_estimates = {}
    changepoint_estimates['estimates'] = {}
    changepoint_estimates['estimates']['alpha'] = {}
    changepoint_estimates['estimates']['psi'] = {}
    changepoint_estimates['true_value'] = {}
    changepoint_estimates['true_value']['alpha'] = {}
    changepoint_estimates['true_value']['psi'] = {}

    # Two best models
    # algo = rpt.Dynp(model=cost) # "l1", "l2", "rbf", "linear", "normal", "ar" 
    algo = rpt.Window(width=windows_width, model=cost)

    # Two worst models
    # algo = rpt.Binseg(model=cost)
    # algo = rpt.BottomUp(model=cost)
    
    for i in range(n_patients):

        patient_true_signal = np.array(estimates['true_value']['cost']['alpha'][i])
        patient_estimates_signal = np.array(estimates['estimates']['cost']['alpha'][i])

        
        changepoint_estimates['true_value']['alpha'][i] = algo.fit_predict(patient_true_signal, n_bkps=process_count - 1)[0]
        changepoint_estimates['estimates']['alpha'][i] = algo.fit_predict(patient_estimates_signal, n_bkps=process_count - 1)[0]

        
    for j in range(n_doctors):

        doctor_true_signal = np.array(estimates['true_value']['cost']['psi'][j])
        doctor_estimates_signal = np.array(estimates['estimates']['cost']['psi'][j]) 
        
        changepoint_estimates['true_value']['psi'][j] = algo.fit_predict(doctor_true_signal, n_bkps=process_count - 1)[0]
        changepoint_estimates['estimates']['psi'][j] = algo.fit_predict(doctor_estimates_signal, n_bkps=process_count - 1)[0]

    
    return changepoint_estimates

def changepoint_accuracy(changepoint_estimates):

    accuracy = {}
    accuracy['exact'] = {}
    accuracy['almost'] = {}
    
    n_patients = len(changepoint_estimates['estimates']['alpha'])
    n_doctors = len(changepoint_estimates['estimates']['psi'])
    patient_exact_estimation = 0
    doctor_exact_estimation = 0
    patient_almost_estimation = 0
    doctor_almost_estimation = 0
    
    for i in range(n_patients):
        
        true_value_patient = changepoint_estimates['true_value']['alpha'][i]
        estimates_value_patient = changepoint_estimates['estimates']['alpha'][i]
        
        if estimates_value_patient == true_value_patient: # Accuracy exacte
            patient_exact_estimation += 1
        
        if estimates_value_patient in np.arange( true_value_patient - 1, true_value_patient + 1): # Intervalle de confiance
            patient_almost_estimation += 1
            
    for j in range(n_doctors):

        true_value_doctor = changepoint_estimates['true_value']['psi'][j]
        estimates_value_doctor = changepoint_estimates['estimates']['psi'][j]
        
        if estimates_value_doctor == true_value_doctor: # Accuracy exacte
            doctor_exact_estimation += 1
            
        
        if estimates_value_doctor in np.arange( true_value_doctor - 1, true_value_doctor + 1): # Intervalle de confiance
            doctor_almost_estimation += 1

    accuracy['exact']['patient'] = patient_exact_estimation / n_patients
    accuracy['exact']['doctor'] = doctor_exact_estimation / n_doctors
    accuracy['almost']['patient'] = patient_almost_estimation / n_patients
    accuracy['almost']['doctor'] = doctor_almost_estimation / n_doctors
    
    return accuracy

In [None]:
%%capture
start_time_sparse_1 = time.time()
simulation_sparse_1 = temporal_simulation(nb_of_periods=100,
                                      n_patients=600,
                                      n_doctors=200,
                                      z=0.7,
                                      alpha_law_graph=(0, 0.5),
                                      psi_law_graph=(0, 0.5),
                                      alpha_law_cost=(0, 0.5),
                                      psi_law_cost=(0, 0.5),
                                      preconditioner = 'ichol',
                                      beta_age_p_graph=0.02,
                                      beta_age_d_graph=0.02,
                                      beta_sex_p_graph=0.001,
                                      beta_sex_d_graph=0.001,
                                      beta_distance_graph=-6,
                                      beta_age_p_cost=0.01,
                                      beta_age_d_cost=0.01,
                                      beta_sex_p_cost=0.5,
                                      beta_sex_d_cost=0.5,
                                      beta_distance_cost=0.5)
end_time_sparse_1 = time.time()
# end_time_sparse_1 - start_time_sparse_1

start_time_sparse_2 = time.time()
simulation_sparse_2 = temporal_simulation(nb_of_periods=100,
                                      n_patients=600,
                                      n_doctors=200,
                                      z=0.7,
                                      alpha_law_graph=(0, 0.5),
                                      psi_law_graph=(0, 0.5),
                                      alpha_law_cost=(0, 0.5),
                                      psi_law_cost=(0, 0.5),
                                      preconditioner = 'ichol',
                                      beta_age_p_graph=0.02,
                                      beta_age_d_graph=0.02,
                                      beta_sex_p_graph=0.001,
                                      beta_sex_d_graph=0.001,
                                      beta_distance_graph=-6,
                                      beta_age_p_cost=0.01,
                                      beta_age_d_cost=0.01,
                                      beta_sex_p_cost=0.5,
                                      beta_sex_d_cost=0.5,
                                      beta_distance_cost=0.5)
end_time_sparse_2 = time.time()
# end_time_sparse_2 - start_time_sparse_2

start_time_sparse_3 = time.time()
simulation_sparse_3 = temporal_simulation(nb_of_periods=100,
                                      n_patients=600,
                                      n_doctors=200,
                                      z=0.7,
                                      alpha_law_graph=(0, 0.5),
                                      psi_law_graph=(0, 0.5),
                                      alpha_law_cost=(0, 0.5),
                                      psi_law_cost=(0, 0.5),
                                      preconditioner = 'ichol',
                                      beta_age_p_graph=0.02,
                                      beta_age_d_graph=0.02,
                                      beta_sex_p_graph=0.001,
                                      beta_sex_d_graph=0.001,
                                      beta_distance_graph=-6,
                                      beta_age_p_cost=0.01,
                                      beta_age_d_cost=0.01,
                                      beta_sex_p_cost=0.5,
                                      beta_sex_d_cost=0.5,
                                      beta_distance_cost=0.5)
end_time_sparse_3 = time.time()
# end_time_sparse_3 - start_time_sparse_3

In [None]:
%%capture
start_time_mid_1 = time.time()
simulation_mid_1 = temporal_simulation(nb_of_periods=100,
                                      n_patients=600,
                                      n_doctors=200,
                                      z=0.7,
                                      alpha_law_graph=(0, 0.5),
                                      psi_law_graph=(0, 0.5),
                                      alpha_law_cost=(0, 0.5),
                                      psi_law_cost=(0, 0.5),
                                      preconditioner = 'ichol',
                                      beta_age_p_graph=0.01,
                                      beta_age_d_graph=0.01,
                                      beta_sex_p_graph=0.5,
                                      beta_sex_d_graph=0.5,
                                      beta_distance_graph=-0.5,
                                      beta_age_p_cost=0.01,
                                      beta_age_d_cost=0.01,
                                      beta_sex_p_cost=0.5,
                                      beta_sex_d_cost=0.5,
                                     beta_distance_cost=0.5)
end_time_mid_1 = time.time()
# end_time_mid_1 - start_time_mid_1

start_time_mid_2 = time.time()
simulation_mid_2 = temporal_simulation(nb_of_periods=100,
                                      n_patients=600,
                                      n_doctors=200,
                                      z=0.7,
                                      alpha_law_graph=(0, 0.5),
                                      psi_law_graph=(0, 0.5),
                                      alpha_law_cost=(0, 0.5),
                                      psi_law_cost=(0, 0.5),
                                      preconditioner = 'ichol',
                                      beta_age_p_graph=0.01,
                                      beta_age_d_graph=0.01,
                                      beta_sex_p_graph=0.5,
                                      beta_sex_d_graph=0.5,
                                      beta_distance_graph=-0.5,
                                      beta_age_p_cost=0.01,
                                      beta_age_d_cost=0.01,
                                      beta_sex_p_cost=0.5,
                                      beta_sex_d_cost=0.5,
                                     beta_distance_cost=0.5)
end_time_mid_2 = time.time()
# end_time_mid_2 - start_time_mid_2

start_time_mid_3 = time.time()
simulation_mid_3 = temporal_simulation(nb_of_periods=100,
                                      n_patients=600,
                                      n_doctors=200,
                                      z=0.7,
                                      alpha_law_graph=(0, 0.5),
                                      psi_law_graph=(0, 0.5),
                                      alpha_law_cost=(0, 0.5),
                                      psi_law_cost=(0, 0.5),
                                      preconditioner = 'ichol',
                                      beta_age_p_graph=0.01,
                                      beta_age_d_graph=0.01,
                                      beta_sex_p_graph=0.5,
                                      beta_sex_d_graph=0.5,
                                      beta_distance_graph=-0.5,
                                      beta_age_p_cost=0.01,
                                      beta_age_d_cost=0.01,
                                      beta_sex_p_cost=0.5,
                                      beta_sex_d_cost=0.5,
                                     beta_distance_cost=0.5)
end_time_mid_3 = time.time()
# end_time_mid_3 - start_time_mid_3

In [None]:
%%capture
start_time_dense_1 = time.time()
simulation_dense_1 = temporal_simulation(nb_of_periods=100,
                                      n_patients=600,
                                      n_doctors=200,
                                      z=0.7,
                                      alpha_law_graph=(0, 0.5),
                                      psi_law_graph=(0, 0.5),
                                      alpha_law_cost=(0, 0.5),
                                      psi_law_cost=(0, 0.5),
                                      preconditioner = 'ichol',
                                      beta_age_p_graph=0.035,
                                      beta_age_d_graph=0.035,
                                      beta_sex_p_graph=0.5,
                                      beta_sex_d_graph=0.5,
                                      beta_distance_graph=-0.5,
                                      beta_age_p_cost=0.01,
                                      beta_age_d_cost=0.01,
                                      beta_sex_p_cost=0.5,
                                      beta_sex_d_cost=0.5,
                                     beta_distance_cost=0.5)
end_time_dense_1 = time.time()
# end_time_dense_1 - start_time_dense_1

start_time_dense_2 = time.time()
simulation_dense_2 = temporal_simulation(nb_of_periods=100,
                                      n_patients=600,
                                      n_doctors=200,
                                      z=0.7,
                                      alpha_law_graph=(0, 0.5),
                                      psi_law_graph=(0, 0.5),
                                      alpha_law_cost=(0, 0.5),
                                      psi_law_cost=(0, 0.5),
                                      preconditioner = 'ichol',
                                      beta_age_p_graph=0.035,
                                      beta_age_d_graph=0.035,
                                      beta_sex_p_graph=0.5,
                                      beta_sex_d_graph=0.5,
                                      beta_distance_graph=-0.5,
                                      beta_age_p_cost=0.01,
                                      beta_age_d_cost=0.01,
                                      beta_sex_p_cost=0.5,
                                      beta_sex_d_cost=0.5,
                                     beta_distance_cost=0.5)
end_time_dense_2 = time.time()
# end_time_dense_2 - start_time_dense_2

start_time_dense_3 = time.time()
simulation_dense_3 = temporal_simulation(nb_of_periods=100,
                                      n_patients=600,
                                      n_doctors=200,
                                      z=0.7,
                                      alpha_law_graph=(0, 0.5),
                                      psi_law_graph=(0, 0.5),
                                      alpha_law_cost=(0, 0.5),
                                      psi_law_cost=(0, 0.5),
                                      preconditioner = 'ichol',
                                      beta_age_p_graph=0.035,
                                      beta_age_d_graph=0.035,
                                      beta_sex_p_graph=0.5,
                                      beta_sex_d_graph=0.5,
                                      beta_distance_graph=-0.5,
                                      beta_age_p_cost=0.01,
                                      beta_age_d_cost=0.01,
                                      beta_sex_p_cost=0.5,
                                      beta_sex_d_cost=0.5,
                                     beta_distance_cost=0.5)
end_time_dense_3 = time.time()
# end_time_dense_3 - start_time_dense_3

In [None]:
df1_sparse = extract(simulation_sparse_1)
df2_sparse = extract(simulation_sparse_2)
df3_sparse = extract(simulation_sparse_3)
df1_mid = extract(simulation_mid_1)
df2_mid = extract(simulation_mid_2)
df3_mid = extract(simulation_mid_3)
df1_dense = extract(simulation_dense_1)
df2_dense = extract(simulation_dense_2)
df3_dense = extract(simulation_dense_3)

changepoint_sparse_1 = changepoint(df1_sparse, process_count=2, windows_width=20) # Calcul des changepoints (sur la suite d'EF théorique et la suite d'estimateurs des EF)
changepoint_sparse_2 = changepoint(df2_sparse, process_count=2, windows_width=20)
changepoint_sparse_3 = changepoint(df3_sparse, process_count=2, windows_width=20)
changepoint_mid_1 = changepoint(df1_mid, process_count=2, windows_width=20)
changepoint_mid_2 = changepoint(df2_mid, process_count=2, windows_width=20)
changepoint_mid_3 = changepoint(df3_mid, process_count=2, windows_width=20)
changepoint_dense_1 = changepoint(df1_dense, process_count=2, windows_width=20)
changepoint_dense_2 = changepoint(df2_dense, process_count=2, windows_width=20)
changepoint_dense_3 = changepoint(df3_dense, process_count=2, windows_width=20)

accuracy_sparse_1 = changepoint_accuracy(changepoint_sparse_1) # Calcul de la précision
accuracy_sparse_2 = changepoint_accuracy(changepoint_sparse_2)
accuracy_sparse_3 = changepoint_accuracy(changepoint_sparse_3)
accuracy_mid_1 = changepoint_accuracy(changepoint_mid_1)
accuracy_mid_2 = changepoint_accuracy(changepoint_mid_2)
accuracy_mid_3 = changepoint_accuracy(changepoint_mid_3)
accuracy_dense_1 = changepoint_accuracy(changepoint_dense_1)
accuracy_dense_2 = changepoint_accuracy(changepoint_dense_2)
accuracy_dense_3 = changepoint_accuracy(changepoint_dense_3)

mean_exact_accuracy_sparse_patient = ( accuracy_sparse_1['exact']['patient'] + accuracy_sparse_2['exact']['patient'] + accuracy_sparse_3['exact']['patient'] ) / 3
mean_exact_accuracy_sparse_doctor = ( accuracy_sparse_1['exact']['doctor'] + accuracy_sparse_2['exact']['doctor'] + accuracy_sparse_3['exact']['doctor'] ) / 3
mean_exact_accuracy_mid_patient = ( accuracy_mid_1['exact']['patient'] + accuracy_mid_2['exact']['patient'] + accuracy_mid_3['exact']['patient'] ) / 3
mean_exact_accuracy_mid_doctor = ( accuracy_mid_1['exact']['doctor'] + accuracy_mid_2['exact']['doctor'] + accuracy_mid_3['exact']['doctor'] ) / 3
mean_exact_accuracy_dense_patient = ( accuracy_dense_1['exact']['patient'] + accuracy_dense_2['exact']['patient'] + accuracy_dense_3['exact']['patient'] ) / 3
mean_exact_accuracy_dense_doctor = ( accuracy_dense_1['exact']['doctor'] + accuracy_dense_2['exact']['doctor'] + accuracy_dense_3['exact']['doctor'] ) / 3

mean_almost_accuracy_sparse_patient = ( accuracy_sparse_1['almost']['patient'] + accuracy_sparse_2['almost']['patient'] + accuracy_sparse_3['almost']['patient'] ) / 3
mean_almost_accuracy_sparse_doctor = ( accuracy_sparse_1['almost']['doctor'] + accuracy_sparse_2['almost']['doctor'] + accuracy_sparse_3['almost']['doctor'] ) / 3
mean_almost_accuracy_mid_patient = ( accuracy_mid_1['almost']['patient'] + accuracy_mid_2['almost']['patient'] + accuracy_mid_3['almost']['patient'] ) / 3
mean_almost_accuracy_mid_doctor = ( accuracy_mid_1['almost']['doctor'] + accuracy_mid_2['almost']['doctor'] + accuracy_mid_3['almost']['doctor'] ) / 3
mean_almost_accuracy_dense_patient = ( accuracy_dense_1['almost']['patient'] + accuracy_dense_2['almost']['patient'] + accuracy_dense_3['almost']['patient'] ) / 3
mean_almost_accuracy_dense_doctor = ( accuracy_dense_1['almost']['doctor'] + accuracy_dense_2['almost']['doctor'] + accuracy_dense_3['almost']['doctor'] ) / 3

df_accuracy_patient = pd.DataFrame([[mean_exact_accuracy_sparse_patient, mean_exact_accuracy_mid_patient, mean_exact_accuracy_dense_patient],
                                    [mean_almost_accuracy_sparse_patient, mean_almost_accuracy_mid_patient, mean_almost_accuracy_dense_patient]], columns = ['sparse', 'mid', 'dense'])

df_accuracy_patient.index = ['exact', 'almost']
df_accuracy_patient.index.name = 'Precision'

df_accuracy_doctor = pd.DataFrame([[mean_exact_accuracy_sparse_doctor, mean_exact_accuracy_mid_doctor, mean_exact_accuracy_dense_doctor],
                                    [mean_almost_accuracy_sparse_doctor, mean_almost_accuracy_mid_doctor, mean_almost_accuracy_dense_doctor]], columns = ['sparse', 'mid', 'dense'])

df_accuracy_doctor.index = ['exact', 'almost']
df_accuracy_doctor.index.name = 'Precision'

df_accuracy_patient.to_csv('accuracy z0.7 ratio 3.csv')
df_accuracy_doctor.to_csv('accuracy z0.7 ratio 3.csv')