In [1]:
import numpy as np
import pandas as pd
import torch
import sys

sys.path.append('/home/benoit.sautydechalon/deformetrica')

from deformetrica.core.estimator_tools.samplers.srw_mhwg_sampler import SrwMhwgSampler
from deformetrica.core.estimators.gradient_ascent import GradientAscent
from deformetrica.core.estimators.mcmc_saem import McmcSaem
# Estimators
from deformetrica.core.estimators.scipy_optimize import ScipyOptimize
from deformetrica.core.model_tools.manifolds.exponential_factory import ExponentialFactory
from deformetrica.core.model_tools.manifolds.generic_spatiotemporal_reference_frame import GenericSpatiotemporalReferenceFrame
from deformetrica.core.models.longitudinal_metric_learning import LongitudinalMetricLearning
from deformetrica.core.models.model_functions import create_regular_grid_of_points
from deformetrica.in_out.array_readers_and_writers import read_2D_array
from deformetrica.in_out.dataset_functions import read_and_create_scalar_dataset, read_and_create_image_dataset
from deformetrica.support.probability_distributions.multi_scalar_normal_distribution import MultiScalarNormalDistribution
from deformetrica.support.utilities.general_settings import Settings
from deformetrica import estimate_longitudinal_metric_model
from deformetrica.in_out.array_readers_and_writers import *
from deformetrica.launch.estimate_longitudinal_metric_model import instantiate_longitudinal_metric_model
import deformetrica as dfca



## 1. Loading the individual parameters and raw data

In [37]:
path = 'simulated_study'

In [40]:
rer = np.load(path+'/output_5/LongitudinalMetricModel__EstimatedParameters__IndividualRandomEffectsSamples.npy', 
              allow_pickle=True)[()]
trajectories = np.load(path+'/output_5/LongitudinalMetricModel__EstimatedParameters__Trajectory.npy',
       allow_pickle=True)[()]
ids = pd.read_csv(path+'/output_5/LongitudinalMetricModel_subject_ids_unique.txt', header=None).values

In [41]:
# First we put them in a dataframe for visualization purpose

ip = pd.DataFrame(columns=['tau','xi','source'], index=[int(idx[0]) for idx in ids])
ip['tau'] = rer['onset_age'].mean(axis=0)
ip['xi'] = rer['log_acceleration'].mean(axis=0)
ip['source'] = rer['sources'].mean(axis=0)

ip

Unnamed: 0,tau,xi,source
0,57.234959,0.013993,0.113005
1,75.403667,-0.635392,-0.013705
2,67.486395,1.067332,0.167432
3,76.939583,-0.438451,-0.009470
4,60.442647,-0.215578,0.011482
...,...,...,...
95,62.383067,-0.442946,-0.000977
96,31.692327,-0.975841,0.248329
97,84.056978,-0.876157,-0.038796
98,75.402298,-0.021838,-0.175676


In [5]:
# Then in the required format for the deformetrica model

rer['onset_age'] = rer['onset_age'].mean(axis=0)
rer['log_acceleration'] = rer['log_acceleration'].mean(axis=0)
rer['sources'] = np.array([[source] for source in rer['sources'].mean(axis=0)])
averaged_rer = rer
averaged_rer

{'onset_age': array([ 71.45334796,  60.87557225,  56.81096268,  85.67214893,
         60.32979066,  63.44964174, 103.9245966 ,  92.83191348,
         36.41627641,  51.62714774,  95.00884024,  71.06143529,
         51.87449329,  71.86792188,  64.54125897,  63.60018842,
         71.91099723,  53.02136603,  55.70486947,  62.01726054,
         64.97403875,  77.05698696,  89.44385608,  52.77783832,
         73.22491155,  63.43902417,  49.82094629,  80.53407046,
         45.01278505,  74.89388988,  53.77607442,  73.98949744,
         78.31879818,  52.21042872,  64.98824218,  62.66350842,
         18.50273555,  70.29188591,  79.44111999,  57.98627381,
         69.40226802,  65.38654428,  71.652016  ,  72.7549246 ,
         66.83158571,  80.38764051,  63.97023588,  73.29367736,
         65.62672296,  69.71439598,  46.22178902,  62.87569493,
         78.8170029 ,  54.49066882,  75.00747538,  58.59942694,
         61.58754298,  55.84044972,  81.13334351,  59.34408474,
         61.33143898,  61.8

In [22]:
data = pd.DataFrame(index=[ids, times], columns=['adas_memory', 'hippocampus'])
data['adas_memory'] = adas_memory
data['hippocampus'] = hippocampus
data

Unnamed: 0,Unnamed: 1,adas_memory,hippocampus
4,74.868477,0.407333,0.377389
4,75.075706,0.518444,0.375055
4,75.282936,0.466667,0.364087
4,75.490158,0.540667,0.382969
4,76.111847,0.511111,0.429288
...,...,...,...
1425,78.461342,0.540667,0.242699
1425,79.093491,0.607333,0.267690
1430,84.350166,0.577778,0.745385
1430,84.780472,0.659333,0.774981


In [44]:
times = [time[0] for time in pd.read_csv(path+'/output_5/LongitudinalMetricModel_absolute_times.txt', header=None).values]
logistic = [score[0] for score in pd.read_csv(path+'/simulated_data_5/Y.csv', header=None).values]
sum_logistic = [score[1] for score in pd.read_csv(path+'/simulated_data_5/Y.csv', header=None).values]
ids = [int(idx[0]) for idx in pd.read_csv(path+'/output_5/LongitudinalMetricModel_subject_ids.txt', header=None).values]

In [45]:
data = pd.DataFrame(index=[ids, times], columns=['logistic', 'sum_logistic'])
data['logistic'] = logistic
data['sum_logistic'] = sum_logistic
data

Unnamed: 0,Unnamed: 1,logistic,sum_logistic
0,96.094437,0.825440,0.758157
0,96.597565,0.694294,0.864409
0,97.100685,0.733746,0.803578
0,97.603806,0.572616,0.974391
0,98.106934,0.807546,0.816188
...,...,...,...
99,75.555489,0.740701,0.581178
99,75.777214,0.596949,0.658479
99,75.998947,0.633617,0.734469
99,76.220680,0.524243,0.633501


In [58]:
individual_parameters = pd.read_csv(path+'/simulated_data_5/true_individual_parameters.csv').set_index('Unnamed: 0')
individual_parameters.index.name = 'ID'
individual_parameters

Unnamed: 0_level_0,xi,tau,source
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,-0.057622,64.533047,-2.892781
1,-0.586603,68.477432,4.401125
2,0.299599,57.162128,-2.918315
3,-1.085045,75.067964,-7.672403
4,0.114910,58.931843,-0.342369
...,...,...,...
95,0.212753,58.217001,-6.283154
96,0.672398,54.109809,-3.066973
97,-0.109101,62.784147,-1.224788
98,0.494468,77.708597,-2.392265


In [85]:
tau = [tau[0] for tau in pd.read_csv(path+'/preprocessing_5/2_gradient_descent_on_the_mode/LongitudinalMetricModel_onset_ages.txt', header=None).values]
xi = [xi[0] for xi in pd.read_csv(path+'/preprocessing_5/2_gradient_descent_on_the_mode/LongitudinalMetricModel_log_accelerations.txt', header=None).values]
sources = [np.float(sources) for sources in pd.read_csv(path+'/preprocessing_5/2_gradient_descent_on_the_mode/LongitudinalMetricModel_sources.txt', header=None).values]

individual_parameters_after_initialization = pd.DataFrame(columns=individual_parameters.columns, index=individual_parameters.index)
individual_parameters_after_initialization['tau'] = tau
individual_parameters_after_initialization['xi'] = xi
individual_parameters_after_initialization['source'] = sources

In [77]:
tau = [tau[0] for tau in pd.read_csv(path+'/output_5/LongitudinalMetricModel_onset_ages.txt', header=None).values]
xi = [xi[0] for xi in pd.read_csv(path+'/output_5/LongitudinalMetricModel_log_accelerations.txt', header=None).values]
sources = [np.float(sources) for sources in pd.read_csv(path+'/output_5/LongitudinalMetricModel_sources.txt', header=None).values]

estimated_ip = pd.DataFrame(columns=individual_parameters.columns, index=individual_parameters.index)
estimated_ip['tau'] = tau
estimated_ip['xi'] = xi
estimated_ip['source'] = sources

In [87]:
abs(individual_parameters - estimated_ip).describe()

Unnamed: 0,xi,tau,source
count,100.0,100.0,100.0
mean,0.613673,6.197021,3.717563
std,0.443893,5.500334,2.648419
min,0.008004,0.050568,0.07658
25%,0.272298,2.280326,1.710237
50%,0.54516,4.485741,3.279438
75%,0.848787,8.421963,5.324163
max,1.950411,25.907011,12.640162


In [32]:
path = 'mds_study'
times = [time[0] for time in pd.read_csv(path+'/output/LongitudinalMetricModel_absolute_times.txt', header=None).values]
mds1 = [score[0] for score in pd.read_csv(path+'/mds_data/MDS_values.csv', header=None).values]
mds2 = [score[1] for score in pd.read_csv(path+'/mds_data/MDS_values.csv', header=None).values]
ids = [int(idx[0]) for idx in pd.read_csv(path+'/output/LongitudinalMetricModel_subject_ids.txt', header=None).values]

In [33]:
data = pd.DataFrame(index=[ids, times], columns=['mds1', 'mds2'])
data['mds1'] = mds1
data['mds2'] = mds2
data

Unnamed: 0,Unnamed: 1,mds1,mds2
3001,59.910362,0.500000,0.133333
3001,59.982315,0.571429,0.133333
3001,60.215916,0.500000,0.133333
3001,60.449516,0.500000,0.200000
3001,60.683113,0.571429,0.200000
...,...,...,...
3467,71.582550,0.500000,0.733333
3467,71.708649,0.714286,0.800000
3467,71.957832,0.357143,0.733333
3467,72.333115,0.357143,0.666667


In [35]:
reconstructed = pd.read_csv('mds_study/personalize_output/LongitudinalMetricModel_reconstructed_values.txt', sep=' ', header=None)
reconstructed.columns = ['mds1', 'mds2']
reconstructed.index = data.index

In [36]:
abs(reconstructed-data).describe()

Unnamed: 0,mds1,mds2
count,3000.0,3000.0
mean,0.163081,0.180404
std,0.13482,0.168752
min,0.000148,9.4e-05
25%,0.061791,0.060085
50%,0.131837,0.135601
75%,0.231493,0.250328
max,1.103094,2.036188


## 2. Evaluating the reconstruction error

In [10]:
path = 'mds_study'
args = {'verbosity':'INFO', 'output':'personalize',
        'model':path+'/model_after_fit.xml', 'dataset':path+'/data_set.xml', 'parameters':path+'/optimization_parameters_saem.xml'}


"""
Read xml files, set general settings, and call the adapted function.
"""

xml_parameters = dfca.io.XmlParameters()
xml_parameters.read_all_xmls(args['model'],
                             args['dataset'],
                             args['parameters'])

logger = logging.getLogger(__name__)
logging.getLogger('matplotlib').setLevel(logging.ERROR)
logger.setLevel(logging.INFO)



In [11]:
dataset = read_and_create_scalar_dataset(xml_parameters)
model, individual_RER = instantiate_longitudinal_metric_model(xml_parameters, logger, dataset, observation_type='scalar')

INFO:__main__:Setting initial onset ages from mds_study/output/LongitudinalMetricModel_onset_ages.txt file
INFO:__main__:Setting initial log accelerations from mds_study/output/LongitudinalMetricModel_log_accelerations.txt file
INFO:__main__:Initializing exponential type to parametric
INFO:__main__:Loading metric parameters from file mds_study/output/LongitudinalMetricModel_metric_parameters.txt
INFO:__main__:Loading the interpolation points from file mds_study/output/LongitudinalMetricModel_interpolation_points.txt
INFO:__main__:The width for the metric interpolation is set to 0.3
INFO:__main__:I am setting the no_parallel_transport flag to False.
INFO:__main__:>> Reading 1-source initial modulation matrix from file: mds_study/output/LongitudinalMetricModel_modulation_matrix.txt
INFO:__main__:Setting initial sources from mds_study/output/LongitudinalMetricModel_sources.txt file
INFO:deformetrica.core.models.longitudinal_metric_learning:Acceleration factors max/min: (2.7182817, 57, 0.1

Patient  196  was estimated with an absurd timeshift tensor(126.3761)
In geodesic update ,  40 65.23617055599765 110


INFO:deformetrica.core.models.longitudinal_metric_learning:Tmin 40 Tmax 110 Update of the spatiotemporalframe: 1421 ms
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints below tmin
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints below tmin
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints below tmin
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints below tmin
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints below tmin
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints below tmin
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints above tmax
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints above tmax
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to est

In [12]:
v0, p0, metric_parameters, modulation_matrix = model._fixed_effects_to_torch_tensors(False)
onset_ages, log_accelerations, sources = model._individual_RER_to_torch_tensors(averaged_rer, False)
t0 = model.get_reference_time()

absolute_times = model._compute_absolute_times(dataset.times, log_accelerations, onset_ages)

absolute_times_to_write = []
for elt in absolute_times:
    for e in elt.cpu().data.numpy():
        absolute_times_to_write.append(e)

#np.savetxt(os.path.join(Settings().output_dir, "LongitudinalMetricModel_absolute_times.txt"), np.array(absolute_times_to_write))

accelerations = torch.exp(log_accelerations)

model._update_spatiotemporal_reference_frame(absolute_times, p0, v0, metric_parameters,
                                            modulation_matrix)

INFO:deformetrica.core.models.longitudinal_metric_learning:Acceleration factors max/min: (2.7182817, 57, 0.17859595, 125)


Patient  196  was estimated with an absurd timeshift tensor(126.3209)
In geodesic update ,  40 65.23617055599765 110


INFO:deformetrica.core.models.longitudinal_metric_learning:Tmin 40 Tmax 110 Update of the spatiotemporalframe: 1510 ms


In [13]:
predictions = []
subject_ids = []
times = []

targets = dataset.deformable_objects

number_of_subjects = dataset.number_of_subjects
residuals = []

for i in range(number_of_subjects):
    predictions_i = []
    for j, t in enumerate(absolute_times[i]):
        if sources is not None:
            prediction = model.spatiotemporal_reference_frame.get_position(t, sources=sources[i])
        else:
            prediction = model.spatiotemporal_reference_frame.get_position(t)
        predictions_i.append(prediction.cpu().data.numpy())
        predictions.append(prediction.cpu().data.numpy())
        subject_ids.append(dataset.subject_ids[i])
        times.append(dataset.times[i][j])

    targets_i = targets[i].cpu().data.numpy()

    residuals.append(np.linalg.norm(predictions_i - targets_i, axis=0, ord=1)/len(absolute_times[i]))


INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints below tmin
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints below tmin
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints below tmin
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints below tmin
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints below tmin
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints below tmin
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints above tmax
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints above tmax
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints above tmax
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints a

In [14]:
predicted = data.copy()
predicted['mds1'] = [prediction[0] for prediction in predictions]
predicted['mds2'] = [prediction[1] for prediction in predictions]

In [15]:
diff = predicted - data
abs(diff).describe()

Unnamed: 0,mds1,mds2
count,3000.0,3000.0
mean,0.155629,0.151568
std,0.131285,0.14538
min,5.9e-05,0.000157
25%,0.058197,0.052837
50%,0.123773,0.112176
75%,0.215987,0.203136
max,1.057793,1.719999


In [134]:
predicted.describe()

Unnamed: 0,adas_memory,hippocampus
count,942.0,942.0
mean,0.3741,0.437007
std,0.285419,0.619094
min,-0.323792,-0.617507
25%,0.253271,-0.11544
50%,0.474453,0.482866
75%,0.556344,0.922737
max,0.937028,1.572575


In [129]:
data.describe(percentiles=[.9])

Unnamed: 0,adas_memory,hippocampus
count,942.0,942.0
mean,0.483881,0.43018
std,0.175045,0.181203
min,0.066667,0.0
50%,0.488889,0.432856
90%,0.711111,0.648502
max,0.985111,1.0
