# Initialise 

default_parameters -> fit

default_limit -> physical

parameters -> physical

In [1]:
import datetime
from IPython.display import display
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings

%matplotlib inline

mem = joblib.Memory(location='/scratch/tmp/', verbose=1)

import logging

# logging.basicConfig(filename='c14.log',
#                             filemode='a',
#                             format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
#                             datefmt='%H:%M:%S',
#                             level=logging.INFO)

In [2]:
%load_ext autoreload
%autoreload 2
import c14
import c14.livermodels

In [3]:
data = pd.read_csv('../../../results/C14data_liver_samples_20190722.csv')
data = data.groupby(['type', 'sample', 'ploidy', 'pathology']).mean().dropna(how='all').reset_index()
data['age'] = data['Dcoll'] - data['Dbirth']
data = data.query('type == "hepatocyte" and pathology != "Y"')
edata = c14.exp_data(data)

In [4]:
@mem.cache
def fit_model(edata, model, step_size=0.1, optimize_minuit_multistart_kwargs=dict(n=100,processes=8)):
    print(f'Fitting model {model.__name__}')
    result = {'model': model()}
    result['op'] = c14.optimize(model=result['model'],data=edata,step_size=step_size)
    result['vis'] = c14.visualisze(model=result['model'],data=edata,step_size=2)
    result['multistart_result'] = result['op'].optimize_minuit_multistart(**optimize_minuit_multistart_kwargs)
    try:
        result['point_estimate'] = result['multistart_result'].sort_values('fval').iloc[0]['values']
        result['cov'] = result['multistart_result'].sort_values('fval').iloc[0]['cov']
    except:
        print(f'Multistart found no valid results for model {model.__name__}')
    try:
        result['errors'] = result['vis'].calc_error(result['point_estimate'], result['cov'])
    except:
        print(f'Could not calculate errors for {model.__name__}')
    return result

In [5]:
def fit_models(edata, models, step_size=0.1, optimize_minuit_multistart_kwargs=dict(n=100,processes=8)):
    results = {model.__name__: fit_model(edata, model, step_size, optimize_minuit_multistart_kwargs)
               for model in models}
    return results

In [6]:
models = c14.livermodels.models_list
models

[c14.livermodels.A,
 c14.livermodels.Al4s,
 c14.livermodels.Ak0,
 c14.livermodels.Al40,
 c14.livermodels.Akl40,
 c14.livermodels.Akl40lin,
 c14.livermodels.Akl40q,
 c14.livermodels.Akl402x2n,
 c14.livermodels.Al2l4,
 c14.livermodels.B,
 c14.livermodels.C]

In [7]:
models = models[0:1]

In [8]:
for model in models:
    print(model.__name__)
    model()

A


In [9]:
init_limit = {'r': (-3, 0), 
              'lambda4': (-3, 0),
              'kappa24': (-3, 0),
              'kappa42': (-3, 0),
              'delta2': (-3, 0),
              'delta2_0': (-3, 0),
              'delta2_100': (-3, 0),
              'delta4': (-3, 0),
              'f': (1, 3)
             }

In [None]:
%%time
results = fit_models(edata, models, step_size=2.0, 
                     optimize_minuit_multistart_kwargs=dict(n=100, 
                                                            processes=7, 
                                                            init_limit=init_limit)
                    )

________________________________________________________________________________
[Memory] Calling __main__--home-fabrost-pksCloud-projects-C14_liver-source-fabian-python-__ipython-input__.fit_model...
fit_model(<c14.base.exp_data object at 0x7f08df8b1978>, <class 'c14.livermodels.A'>, 2.0, { 'init_limit': { 'delta2': (-3, 0),
                  'delta2_0': (-3, 0),
                  'delta2_100': (-3, 0),
                  'delta4': (-3, 0),
                  'f': (1, 3),
                  'kappa24': (-3, 0),
                  'kappa42': (-3, 0),
                  'lambda4': (-3, 0),
                  'r': (-3, 0)},
  'n': 100,
  'processes': 7})
Fitting model A
[(-3, 0), (-3, 0), (-3, 0), (-3, 0)]


  grad[k] = (f(*((xk + d,) + args)) - f0) / d[k]
  grad[k] = (f(*((xk + d,) + args)) - f0) / d[k]
  p_phy[p] = 10**p_fit[p]
  p_phy[p] = 10**p_fit[p]
  grad[k] = (f(*((xk + d,) + args)) - f0) / d[k]


In [None]:
%%time
results = fit_models(edata, models, step_size=0.5, 
                     optimize_minuit_multistart_kwargs=dict(n=100, 
                                                            processes=7, 
                                                            init_limit=init_limit)
                    )

In [None]:
%%time
results = fit_models(edata, models, step_size=0.5, 
                     optimize_minuit_multistart_kwargs=dict(n=1000, 
                                                            processes=7, 
                                                            init_limit=init_limit)
                    )

In [None]:
for name, result in results.items():
    print(name)
    fig, axs = plt.subplots(1, 2, figsize=(12,6))
    result['vis'].plot_parameter(result['point_estimate'], result['errors'], no_plot=[], axis=axs[0])
    axs[0].legend()
    axs[0].set_title(name)
    axs[0].set_ylim(0,1)
    
    fval = result['multistart_result']['fval']
    axs[1].scatter(np.arange(len(fval)), fval.values)
    axs[1].set_xlabel('rank')
    axs[1].set_ylabel('log L')
    
    plt.show()
   
    print(pd.Series(result['model'].tranform_parameters(result['point_estimate'])))

In [None]:
final_results = pd.DataFrame(pd.Series({name: result['op'].calc_aicc(result['point_estimate']) for name, result in results.items()}, name='cAIC'))
final_results['loglike'] = [result['op'].loglike_dict(result['point_estimate']) for model, result in results.items()]
final_results['nparas'] = [result['model'].nparas for model, result in results.items()]
final_results = pd.DataFrame(final_results)
final_results['delta'] = final_results['cAIC'] - final_results['cAIC'].min() 
final_results['w'] = np.exp(-0.5*final_results['delta']) / np.exp(-0.5*final_results['delta']).sum()
final_results['ER'] = final_results['w'].max() / final_results['w']
final_results.sort_values('cAIC')