In [1]:
import numpy as np
import pandas as pd
import torch
import sys

sys.path.append('/home/benoit.sautydechalon/deformetrica')

from deformetrica.core.estimator_tools.samplers.srw_mhwg_sampler import SrwMhwgSampler
from deformetrica.core.estimators.gradient_ascent import GradientAscent
from deformetrica.core.estimators.mcmc_saem import McmcSaem
# Estimators
from deformetrica.core.estimators.scipy_optimize import ScipyOptimize
from deformetrica.core.model_tools.manifolds.exponential_factory import ExponentialFactory
from deformetrica.core.model_tools.manifolds.generic_spatiotemporal_reference_frame import GenericSpatiotemporalReferenceFrame
from deformetrica.core.models.longitudinal_metric_learning import LongitudinalMetricLearning
from deformetrica.core.models.model_functions import create_regular_grid_of_points
from deformetrica.in_out.array_readers_and_writers import read_2D_array
from deformetrica.in_out.dataset_functions import read_and_create_scalar_dataset, read_and_create_image_dataset
from deformetrica.support.probability_distributions.multi_scalar_normal_distribution import MultiScalarNormalDistribution
from deformetrica.support.utilities.general_settings import Settings
from deformetrica import estimate_longitudinal_metric_model
from deformetrica.in_out.array_readers_and_writers import *
from deformetrica.launch.estimate_longitudinal_metric_model import instantiate_longitudinal_metric_model
import deformetrica as dfca



## 1. Loading the individual parameters and raw data

In [24]:
path = 'bivariate_study'

In [136]:
rer = np.load(path+'/output/LongitudinalMetricModel__EstimatedParameters__IndividualRandomEffectsSamples.npy', 
              allow_pickle=True)[()]
trajectories = np.load(path+'/output/LongitudinalMetricModel__EstimatedParameters__Trajectory.npy',
       allow_pickle=True)[()]
ids = pd.read_csv(path+'/output/LongitudinalMetricModel_subject_ids_unique.txt', header=None).values

In [102]:
# First we put them in a dataframe for visualization purpose

ip = pd.DataFrame(columns=['tau','xi','source'], index=[int(idx[0]) for idx in ids])
ip['tau'] = rer['onset_age'].mean(axis=0)
ip['xi'] = rer['log_acceleration'].mean(axis=0)
ip['source'] = rer['sources'].mean(axis=0)

ip

Unnamed: 0,tau,xi,source
4,67.606189,-0.073793,0.452786
6,74.419056,-1.080613,-0.718563
7,66.140717,-0.071428,0.466941
16,81.893425,-0.729838,-0.078377
29,60.146789,0.872884,1.235304
...,...,...,...
1402,63.308329,0.286339,1.066510
1409,58.746042,0.348766,-0.103641
1419,85.071384,0.532479,1.062330
1425,62.906911,-0.746155,1.226602


In [103]:
# Then in the required format for the deformetrica model

rer['onset_age'] = rer['onset_age'].mean(axis=0)
rer['log_acceleration'] = rer['log_acceleration'].mean(axis=0)
rer['sources'] = np.array([[source] for source in rer['sources'].mean(axis=0)])
averaged_rer = rer
averaged_rer

{'onset_age': array([ 67.60618949,  74.41905628,  66.14071748,  81.89342454,
         60.14678875,  81.62823344,  69.53361246,  82.32514039,
         84.68083294,  78.43670691,  67.49330881,  71.71732925,
         94.25442516,  73.72854262,  74.37217167,  70.91582779,
         61.9286866 ,  77.31320713,  69.77981526,  82.21828163,
         71.68515321,  78.88131915,  91.07455372,  66.01306741,
         73.59114472,  65.76306417,  80.40158112,  68.22999881,
         80.07310386,  75.81677425,  82.83848927,  73.98223395,
         88.8403536 ,  80.23788302,  71.59234461,  81.15598374,
         64.33163164,  57.80121215,  83.80203098,  63.05123313,
         80.4345927 ,  81.8298373 ,  92.41945854,  68.27427679,
         93.25661806,  93.80526621,  71.29632975,  83.51184077,
         64.11425737,  76.95369232,  82.04814597,  68.42450281,
         86.92691963,  85.49510879,  98.67072755,  89.43516852,
         92.3232145 ,  77.45635281,  71.52718607,  70.01767773,
         80.86197474,  72.2

In [9]:
times = [time[0] for time in pd.read_csv(path+'/output_low_kernel/LongitudinalMetricModel_absolute_times.txt', header=None).values]
logistic = [score[0] for score in pd.read_csv(path+'/bivariate_data/Y.csv', header=None).values]
sum_logistic = [score[1] for score in pd.read_csv(path+'/bivariate_data/Y.csv', header=None).values]
ids = [int(idx[0]) for idx in pd.read_csv(path+'/output_low_kernel/LongitudinalMetricModel_subject_ids.txt', header=None).values]

In [220]:
data = pd.DataFrame(index=[ids, times], columns=['adas_memory', 'hippocampus'])
data['adas_memory'] = adas_memory
data['hippocampus'] = hippocampus
data

ValueError: Length of values (942) does not match length of index (500)

In [10]:
data = pd.DataFrame(index=[ids, times], columns=['logistic', 'sum_logistic'])
data['logistic'] = logistic
data['sum_logistic'] = sum_logistic
data

Unnamed: 0,Unnamed: 1,logistic,sum_logistic
0,79.701805,0.720653,0.686915
0,80.139442,0.736504,0.748917
0,80.577080,0.655216,0.670995
0,81.014717,0.589541,0.722403
0,81.452362,0.632006,0.776159
...,...,...,...
99,44.463203,0.229042,0.268949
99,45.176208,0.323663,0.265731
99,45.889214,0.314090,0.308468
99,46.602219,0.339580,0.444543


In [11]:
reconstructed = pd.read_csv('simulated_study/output_4/LongitudinalMetricModel_reconstructed_values.txt', sep=' ', header=None)
reconstructed.columns = ['logistic', 'sum_logistic']
reconstructed.index = data.index

In [23]:
abs(reconstructed-data).describe()

Unnamed: 0,logistic,sum_logistic
count,500.0,500.0
mean,0.051269,0.053454
std,0.040593,0.038906
min,3.5e-05,7.7e-05
25%,0.019566,0.022477
50%,0.042437,0.046694
75%,0.072438,0.074098
max,0.231824,0.239956


## 2. Evaluating the reconstruction error

In [207]:
path = 'simulated_study'
args = {'verbosity':'INFO', 'output':'personalize',
        'model':path+'/model_after_fit.xml', 'dataset':path+'/data_set.xml', 'parameters':path+'/optimization_parameters_saem.xml'}


"""
Read xml files, set general settings, and call the adapted function.
"""

xml_parameters = dfca.io.XmlParameters()
xml_parameters.read_all_xmls(args['model'],
                             args['dataset'],
                             args['parameters'])

logger = logging.getLogger(__name__)
logging.getLogger('matplotlib').setLevel(logging.ERROR)
logger.setLevel(logging.INFO)



In [208]:
dataset = read_and_create_scalar_dataset(xml_parameters)
model, individual_RER = instantiate_longitudinal_metric_model(xml_parameters, logger, dataset, observation_type='scalar')

INFO:__main__:Setting initial onset ages from simulated_study/output/LongitudinalMetricModel_onset_ages.txt file
INFO:__main__:Setting initial log accelerations from simulated_study/output/LongitudinalMetricModel_log_accelerations.txt file
INFO:__main__:Initializing exponential type to parametric
INFO:__main__:Loading metric parameters from file simulated_study/output/LongitudinalMetricModel_metric_parameters.txt
INFO:__main__:Loading the interpolation points from file simulated_study/output/LongitudinalMetricModel_interpolation_points.txt
INFO:__main__:The width for the metric interpolation is set to 0.3
INFO:__main__:I am setting the no_parallel_transport flag to False.
INFO:__main__:>> Reading 1-source initial modulation matrix from file: simulated_study/output/LongitudinalMetricModel_modulation_matrix.txt
INFO:__main__:Setting initial sources from simulated_study/output/LongitudinalMetricModel_sources.txt file
INFO:deformetrica.core.models.longitudinal_metric_learning:Acceleration 

759 good iterations out of 760


INFO:deformetrica.core.models.longitudinal_metric_learning:Tmin 40 Tmax 104.85572814941406 Update of the spatiotemporalframe: 4509 ms


862 good iterations out of 863


INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints below tmin
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints below tmin
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints below tmin
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints below tmin
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints below tmin
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints above tmax
INFO:__main__:>> Initial noise variance set to 0.22739248621836305 based on the initial mean residual value.
INFO:deformetrica.core.models.longitudinal_metric_learning:>> The time shift variance prior degrees of freedom parameter is set to 100
INFO:deformetrica.core.models.longitudinal_metric_learning:>> The log-acceleration variance prior degrees of freedom parameter is set to the number of

In [130]:
v0, p0, metric_parameters, modulation_matrix = model._fixed_effects_to_torch_tensors(False)
onset_ages, log_accelerations, sources = model._individual_RER_to_torch_tensors(averaged_rer, False)
t0 = model.get_reference_time()

absolute_times = model._compute_absolute_times(dataset.times, log_accelerations, onset_ages)

absolute_times_to_write = []
for elt in absolute_times:
    for e in elt.cpu().data.numpy():
        absolute_times_to_write.append(e)

#np.savetxt(os.path.join(Settings().output_dir, "LongitudinalMetricModel_absolute_times.txt"), np.array(absolute_times_to_write))

accelerations = torch.exp(log_accelerations)

model._update_spatiotemporal_reference_frame(absolute_times, p0, v0, metric_parameters,
                                            modulation_matrix)

INFO:deformetrica.core.models.longitudinal_metric_learning:Acceleration factors max/min: (2.7016509, 133, 0.32726613, 233)


610 good iterations out of 611


INFO:deformetrica.core.models.longitudinal_metric_learning:Tmin 45.84938049316406 Tmax 96.53067016601562 Update of the spatiotemporalframe: 2150 ms


657 good iterations out of 658


In [131]:
predictions = []
subject_ids = []
times = []

targets = dataset.deformable_objects

number_of_subjects = dataset.number_of_subjects
residuals = []

for i in range(number_of_subjects):
    predictions_i = []
    for j, t in enumerate(absolute_times[i]):
        if sources is not None:
            prediction = model.spatiotemporal_reference_frame.get_position(t, sources=sources[i])
        else:
            prediction = model.spatiotemporal_reference_frame.get_position(t)
        predictions_i.append(prediction.cpu().data.numpy())
        predictions.append(prediction.cpu().data.numpy())
        subject_ids.append(dataset.subject_ids[i])
        times.append(dataset.times[i][j])

    targets_i = targets[i].cpu().data.numpy()

    residuals.append(np.linalg.norm(predictions_i - targets_i, axis=0, ord=1)/len(absolute_times[i]))


INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints below tmin
INFO:deformetrica.core.model_tools.manifolds.generic_geodesic:Want to estimate timepoints above tmax


In [132]:
predicted = data.copy()
predicted['adas_memory'] = [prediction[0] for prediction in predictions]
predicted['hippocampus'] = [prediction[1] for prediction in predictions]

In [133]:
diff = predicted - data
abs(diff).describe()

Unnamed: 0,adas_memory,hippocampus
count,942.0,942.0
mean,0.223971,0.413521
std,0.220601,0.231883
min,0.000156,0.003487
25%,0.058899,0.237536
50%,0.139755,0.407358
75%,0.30619,0.592808
max,0.854399,1.018662


In [134]:
predicted.describe()

Unnamed: 0,adas_memory,hippocampus
count,942.0,942.0
mean,0.3741,0.437007
std,0.285419,0.619094
min,-0.323792,-0.617507
25%,0.253271,-0.11544
50%,0.474453,0.482866
75%,0.556344,0.922737
max,0.937028,1.572575


In [129]:
data.describe(percentiles=[.9])

Unnamed: 0,adas_memory,hippocampus
count,942.0,942.0
mean,0.483881,0.43018
std,0.175045,0.181203
min,0.066667,0.0
50%,0.488889,0.432856
90%,0.711111,0.648502
max,0.985111,1.0
