In [None]:
import datetime
from IPython.display import display
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import pickle
%matplotlib inline
#%matplotlib tk
'''
import logging

logging.basicConfig(filename='c14.log',
                              filemode='a',
                              format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
                             datefmt='%H:%M:%S',
                             level=logging.DEBUG)
'''                 
pd.set_option('display.max_rows', 500)

In [None]:
import arviz as az
from matplotlib import rcParams,rc
import seaborn as sb
from mpl_toolkits.axes_grid1 import make_axes_locatable
fonts = 1
sb.set(context="paper",style='ticks',font_scale=fonts)
plt.rcParams["font.family"] = "Times New Roman"

In [None]:
%load_ext autoreload
%autoreload 2
import c14
import c14.models.liver_new_mitosis as lm
import c14.corner
from c14.utils import *


In [None]:
data = pd.read_csv('C14data_liver_samples_20211020.csv')

In [None]:
#Data files can be found in Zenodo 

directory_uns = 'liver_uns/'
data = pd.read_csv('C14data_liver_samples_20211020.csv')
data = data.groupby(['type', 'sample', 'ploidy', 'pathology']).mean().dropna(how='all').reset_index()
data['age'] = data['Dcoll'] - data['Dbirth']
data = data.query('type == "unsorted" and pathology != ["Y", "C"]')
data['sigma14C'] = data['e14C']
data_uns = data


directory_non = 'liver_non/'
data = pd.read_csv('C14data_liver_samples_20211020.csv')
data = data.groupby(['type', 'sample', 'ploidy', 'pathology']).mean().dropna(how='all').reset_index()
#data['Dbirth'] -= 0.5
data['age'] = data['Dcoll'] - data['Dbirth']
#data = data.query('type == "hepatocyte" and pathology != "Y"')
data = data.query('type == "non-hepatocyte" and pathology != ["Y","C"]')
data['sigma14C'] = data['e14C']
data_non = data



In [None]:
data_non

In [None]:
np.seterr(all='ignore')
results= dict()
NUM=0
results= dict(data=data_uns,results={},models={})


for mm in lm.models_list :
    m = mm(dnatotal=lm.dnatotal_spline)
    if m.__class__.__name__ not in ['POP1']:
        continue
    path = directory_uns+m.__class__.__name__+'spline_'+str(NUM) + '.pickle'
    try:
        with open(path, 'rb') as handle:
            res = pickle.load(handle)
    except FileNotFoundError:
        print(m.__class__.__name__,path)
        continue
    except:
        print(path)
        continue
    #print('FOUND',mm.__name__)
    results['results'].update(  {m.__class__.__name__+'spline_' : {'raw':res} } )
    results['models'].update(   {m.__class__.__name__+'spline_' : m } )        

get_arviz(results,burnin=1000)
ranking = get_getranking(results)
ranking
printt = get_pointestimates(results,error_in_real=False)[['Parameter','Value','Confidence Interval']]
printt.to_excel('liver_unsorted.xlsx')
calc_c14_all(results)
printt

In [None]:
 rename={'sigma':r'$\sigma$','d':r'$\delta$'}

In [None]:
import os
for n,v in results['results'].items():  
    fimag = 'figs/unsorted'+n+'.png'
    if not os.path.isfile(fimag) or True:
        c14.corner.corner_R(v['azdata'].posterior.a.values,
                   results['models'][n].parameter_names,
                        point_estimate=results['models'][n].transform_physical_to_fit(v['median'],mode='bayes'),
                        rename=rename,
                        logparas=results['models'][n].logparas)    
        f = plt.gcf()
        f.show();
        #f.suptitle('hepato ' + n,fontsize=30)
        f.savefig(fimag,dpi=600)


In [None]:
np.seterr(all='ignore')
NUM=0
results_non= dict(data=data_non,results={},models={})


for mm in lm.models_list :
    m = mm(dnatotal=lm.dnatotal_spline)
    if m.__class__.__name__ not in ['POP1']:
        continue
    path = directory_non+m.__class__.__name__+'spline_'+str(NUM) + '.pickle'
    try:
        with open(path, 'rb') as handle:
            res = pickle.load(handle)
    except FileNotFoundError:
        print(m.__class__.__name__,path)
        continue
    except:
        print(path)
        continue
    #print('FOUND',mm.__name__)
    results_non['results'].update(  {m.__class__.__name__+'spline_' : {'raw':res} } )
    results_non['models'].update(   {m.__class__.__name__+'spline_' : m } )        

get_arviz(results_non,burnin=1000)
ranking_non = get_getranking(results_non)
ranking_non
printt_non = get_pointestimates(results_non,error_in_real=False)[['Parameter','Value','Confidence Interval']]
printt_non.to_excel('liver_non.xlsx')
calc_c14_all(results_non)
printt_non

In [None]:
import os
for n,v in results_non['results'].items():  
    fimag = 'figs/non'+n+'.png'
    if not os.path.isfile(fimag) or True:
        c14.corner.corner_R(v['azdata'].posterior.a.values,
                   results_non['models'][n].parameter_names,
                        point_estimate=results_non['models'][n].transform_physical_to_fit(v['median'],mode='bayes'),
                        rename=rename,
                        logparas=results_non['models'][n].logparas)    
        f = plt.gcf()
        f.show();
        #f.suptitle('hepato ' + n,fontsize=30)
        f.savefig(fimag,dpi=600)


In [None]:
from collections import defaultdict


subject = 85
excel_writer = pd.ExcelWriter('time_rates_flows_uns.xlsx')
for m_i,(model_name,model) in enumerate(results['models'].items()):
    print(model_name)
    age = results['data'].loc[subject,'age']
    T = np.arange(0,90,0.1)
    parameter_phy =  {i:results['point_est'].loc[model_name].loc[('median',i)] for i in model.parameter_names}
    model.set_parameters_phy(parameter_phy,mode='bayes')
    ipsd=[]
    for i in T:
        ip = model.calc_implicit_parameters(i)
        ipsd.append(ip)

    a_i =  [j for j,x in enumerate(results['data'].index) if x == subject][0]  
    ipid = listofdict_to_dictofarray_f(ipsd,a_i)
    df = pd.DataFrame(ipid)
    df.index.name = 'age'
    df.index =df.index/10
    
    ipid.update({n:np.array([v]) for n,v in parameter_phy.items()})
    flow_name = defaultdict(list)
    for i,(ind2,flows) in enumerate(model.flow_in.items()):
        y = np.zeros_like(T)
        for rate,pop,factor in flows:
            try:
                if pop is None:
                    y += ipid[rate] * factor
                else:
                    y += ipid[rate] * factor * ipid[pop]
            except Exception as e:
                raise Exception([e,Exception(f'in class {model_name}')])
        #df['inflow_'+ind2] = y
        for rate,pop,factor in flows:
            try:
                if pop is None:
                    yi = ipid[rate] * factor
                else:
                    yi = ipid[rate] * factor * ipid[pop]
            except Exception as e:
                raise Exception([e,Exception(f'in class {model_name}')])
            df['probIN_'+pop+'_to_'+ind2] = yi/y
            df['flow_'+pop+'_to_'+ind2] = yi
            flow_name[pop] += [ind2]
        print(ind2,flows)
    for pop_source,val in flow_name.items():
        norm = 0
        for pop in val:
            norm += df['flow_'+pop_source+'_to_'+pop]
        for pop in val:
            df['probOUT_'+pop_source+'_to_'+pop] = df['flow_'+pop_source+'_to_'+pop]/norm
    df.to_excel(excel_writer=excel_writer,sheet_name=model_name)
excel_writer.save()

In [None]:
from collections import defaultdict


subject = 68
excel_writer = pd.ExcelWriter('time_rates_flows_non.xlsx')
for m_i,(model_name,model) in enumerate(results_non['models'].items()):
    print(model_name)
    age = results_non['data'].loc[subject,'age']
    T = np.arange(0,90,0.1)
    parameter_phy =  {i:results_non['point_est'].loc[model_name].loc[('median',i)] for i in model.parameter_names}
    model.set_parameters_phy(parameter_phy,mode='bayes')
    ipsd=[]
    for i in T:
        ip = model.calc_implicit_parameters(i)
        ipsd.append(ip)

    a_i =  [j for j,x in enumerate(results_non['data'].index) if x == subject][0]  
    ipid = listofdict_to_dictofarray_f(ipsd,a_i)
    df = pd.DataFrame(ipid)
    df.index.name = 'age'
    df.index =df.index/10
    
    ipid.update({n:np.array([v]) for n,v in parameter_phy.items()})
    flow_name = defaultdict(list)
    for i,(ind2,flows) in enumerate(model.flow_in.items()):
        y = np.zeros_like(T)
        for rate,pop,factor in flows:
            try:
                if pop is None:
                    y += ipid[rate] * factor
                else:
                    y += ipid[rate] * factor * ipid[pop]
            except Exception as e:
                raise Exception([e,Exception(f'in class {model_name}')])
        #df['inflow_'+ind2] = y
        for rate,pop,factor in flows:
            try:
                if pop is None:
                    yi = ipid[rate] * factor
                else:
                    yi = ipid[rate] * factor * ipid[pop]
            except Exception as e:
                raise Exception([e,Exception(f'in class {model_name}')])
            df['probIN_'+pop+'_to_'+ind2] = yi/y
            df['flow_'+pop+'_to_'+ind2] = yi
            flow_name[pop] += [ind2]
        print(ind2,flows)
    for pop_source,val in flow_name.items():
        norm = 0
        for pop in val:
            norm += df['flow_'+pop_source+'_to_'+pop]
        for pop in val:
            df['probOUT_'+pop_source+'_to_'+pop] = df['flow_'+pop_source+'_to_'+pop]/norm
    df.to_excel(excel_writer=excel_writer,sheet_name=model_name)
excel_writer.save()