In [1]:
import os
os.environ['OMP_NUM_THREADS'] = '1'

import time
from multiprocessing import Pool
from tqdm.auto import tqdm
import re
from copy import deepcopy

import numpy as np
from scipy import integrate
from matplotlib import pyplot as plt

import noctiluca as nl
import bayesmsd

In [2]:
filename = '../data/20250411_chromatin_dynamics_all_data.h5'
data       = nl.io.load.hdf5(filename)['data']

In [3]:
n_subsample = 4 # cut off the "kink" at the beginning of MINFLUX data
def subsample(traj):
    out = nl.Trajectory(traj[::n_subsample])
    out.meta['Δt'] = n_subsample*traj.meta['Δt']
    return out

In [4]:
data.makeSelection('minflux')
data.apply(subsample, inplace=True)

# Fits

In [5]:
def chop(traj, dt=None, L=200, Fmin=2):
    if dt is None:
        dt = traj.meta['Δt']
    
    def chop_traj(traj, dt=dt):
        if 'Δt' in traj.meta:
            dt = traj.meta['Δt']
            
        chops = []
        i0 = 0
        while i0 < len(traj):
            i1 = i0+L
            chop = traj.data[:, i0:min(i1, len(traj)), :]
            try:
                t_start = np.nonzero(~np.any(np.isnan(chop), axis=(0, 2)))[0][0]
            except IndexError: # no valid entries in this chop
                new_traj = nl.Trajectory(chop[:, [0]])
            else:
                new_traj = nl.Trajectory(chop[:, t_start:])
                
            new_traj.meta['Δt'] = dt
            chops.append(new_traj)

            i0 = i1
            
        return chops
    
    chops = chop_traj(traj)
    out = nl.TaggedSet(chops, hasTags=False)
    while len(chops) > 1:
        cg_traj = nl.Trajectory(np.stack([traj.data[:, 0] for traj in chops], axis=1))
        cg_traj.meta['Δt'] = L*chops[0].meta['Δt']
        chops = chop_traj(cg_traj)
        for traj in chops:
            out.add(traj)
    
    # Clean out useless trajectories
    out.makeSelection(lambda traj, _: traj.F < Fmin)
    out.deleteSelection()
    return out

In [6]:
ct = 'mESC'
bar = tqdm()

fits = {}
for treatment in ['ctrl', 'DRB', 'TSA', 'ICRF']:

    cond = ', '.join(['H2B', ct, treatment])
    fits[treatment] = {
        'single' : {},
        'joints' : {},
    }

    # Minflux
    data.makeSelection(['minflux', cond], logic=all)
    dt = data[0].meta['Δt']

    fitdata = nl.TaggedSet()
    for traj in data:
        fitdata |= chop(traj.rescale(1e6, keepmeta=['Δt']))

    with nl.Parallelize():
        _ = nl.analysis.MSD(fitdata, chunksize=10, show_progress=True)

    fit = bayesmsd.lib.NPFit(fitdata, motion_blur_f=dt/n_subsample, parametrization='(log(αΓ), α)')
    fit.parameters['log(σ²) (dim 1)'].fix_to = 'log(σ²) (dim 0)'
    fit.likelihood_chunksize = 200

    fits[treatment]['single'][f'minflux'] = fit

    bar.update()

    # Conventional
    for dt_tag in ['100ms', '2s']:
        data.makeSelection(['SPT', dt_tag, cond], logic=all)
        dt = data[0].meta['Δt']
        tau_e = 0.08671 # same exposure for both conditions

        fitdata = data.apply(lambda traj : traj.relative(keepmeta=['MSD', 'Δt']), inplace=False)

        fit = bayesmsd.lib.NPFit(fitdata, motion_blur_f=tau_e, parametrization='(log(αΓ), α)')
        fit.parameters['log(σ²) (dim 1)'].fix_to = 'log(σ²) (dim 0)'
        fit.likelihood_chunksize = 100

        fits[treatment]['single'][f'SPT-{dt_tag}'] = fit

        bar.update()

    # Assemble list of fit(group)s to run
    groups = {
        'minflux'       : ['minflux'],
        'SPT 100ms'     : ['SPT-100ms'],
        'SPT 2s'        : ['SPT-2s'],
        'SPT'           : ['SPT-100ms', 'SPT-2s'],
        'minflux + SPT' : ['minflux', 'SPT-100ms', 'SPT-2s'],
    }

    for groupname in groups:
        fits_dict = fits[treatment]['single']

        fit = bayesmsd.FitGroup({name : fits_dict[name] for name in groups[groupname]})
        fit.parameters['α']       = deepcopy(fits_dict['minflux'].parameters[      'α (dim 0)'])
        fit.parameters['log(αΓ)'] = deepcopy(fits_dict['minflux'].parameters['log(αΓ) (dim 0)'])

        # hacky...
        def patch_initial_params(self=fit):
            params = type(self).initial_params(self)
            a    = [val for key, val in params.items() if      'α' in key][0]
            logG = [val for key, val in params.items() if 'log(αΓ)' in key][0]
            params['α'] = a
            params['log(αΓ)'] = logG
            return params
        fit.initial_params = patch_initial_params

        for fitname in fit.fits_dict:
            fit.parameters[fitname+f' α (dim 0)'].fix_to = 'α'
            if fitname == 'minflux':
                fit.parameters[fitname+f' log(αΓ) (dim 0)'].fix_to = 'log(αΓ)'
            else: # not minflux, so correct for 2-loc
                def twoGref(params): return params['log(αΓ)']+np.log(2)
                fit.parameters[fitname+f' log(αΓ) (dim 0)'].fix_to = twoGref

        fits[treatment]['joints'][groupname] = fit

        bar.update()

bar.close()

0it [00:00, ?it/s]

  0%|          | 0/5035 [00:00<?, ?it/s]

  0%|          | 0/4146 [00:00<?, ?it/s]

  0%|          | 0/4002 [00:00<?, ?it/s]

  0%|          | 0/4858 [00:00<?, ?it/s]

In [7]:
fitres = {}
for treatment in ['ctrl', 'DRB', 'TSA', 'ICRF']:
    print()
    print(17*'=')
    print(f'|| {ct:>5s} {treatment:<5s} ||')
    print(17*'=')
    print()
    
    fitres[treatment] = {}
    for name in fits[treatment]['joints']:
        print(name)
        print('='*20)

        with nl.Parallelize():
            fitres[treatment][name] = fits[treatment]['joints'][name].run(show_progress=True)

        for key in fitres[treatment][name]['params']:
            print(key, fitres[treatment][name]['params'][key])
        print()


||  mESC ctrl  ||

minflux


fit iterations: 0it [00:00, ?it/s]

minflux log(σ²) (dim 0) -8.06473909569683
α 0.3237536208843953
log(αΓ) -6.5626791822568755
minflux α (dim 0) 0.3237536208843953
minflux log(αΓ) (dim 0) -6.5626791822568755

SPT 100ms


fit iterations: 0it [00:00, ?it/s]

SPT-100ms log(σ²) (dim 0) -6.738444513640547
α 0.422197956029141
log(αΓ) -6.27721445857177
SPT-100ms α (dim 0) 0.422197956029141
SPT-100ms log(αΓ) (dim 0) -5.584067278011824

SPT 2s


fit iterations: 0it [00:00, ?it/s]

SPT-2s log(σ²) (dim 0) -5.891007459784242
α 0.5960185166059928
log(αΓ) -6.5007578459964765
SPT-2s α (dim 0) 0.5960185166059928
SPT-2s log(αΓ) (dim 0) -5.807610665436531

SPT


fit iterations: 0it [00:00, ?it/s]

SPT-100ms log(σ²) (dim 0) -6.6945225801284955
SPT-2s log(σ²) (dim 0) -6.707645108094548
α 0.45609135467456435
log(αΓ) -6.244445211071193
SPT-100ms α (dim 0) 0.45609135467456435
SPT-2s α (dim 0) 0.45609135467456435
SPT-100ms log(αΓ) (dim 0) -5.551298030511248
SPT-2s log(αΓ) (dim 0) -5.551298030511248

minflux + SPT


fit iterations: 0it [00:00, ?it/s]

minflux log(σ²) (dim 0) -7.975232255247852
SPT-100ms log(σ²) (dim 0) -6.840906041432038
SPT-2s log(σ²) (dim 0) -7.079005955077314
α 0.39016199166754567
log(αΓ) -6.231948966167968
minflux α (dim 0) 0.39016199166754567
minflux log(αΓ) (dim 0) -6.231948966167968
SPT-100ms α (dim 0) 0.39016199166754567
SPT-2s α (dim 0) 0.39016199166754567
SPT-100ms log(αΓ) (dim 0) -5.5388017856080225
SPT-2s log(αΓ) (dim 0) -5.5388017856080225


||  mESC DRB   ||

minflux


fit iterations: 0it [00:00, ?it/s]

minflux log(σ²) (dim 0) -8.135505686114527
α 0.3112012363170198
log(αΓ) -6.472226188969962
minflux α (dim 0) 0.3112012363170198
minflux log(αΓ) (dim 0) -6.472226188969962

SPT 100ms


fit iterations: 0it [00:00, ?it/s]

SPT-100ms log(σ²) (dim 0) -6.6826503417906835
α 0.40328062644317353
log(αΓ) -6.135039720118119
SPT-100ms α (dim 0) 0.40328062644317353
SPT-100ms log(αΓ) (dim 0) -5.441892539558173

SPT 2s


fit iterations: 0it [00:00, ?it/s]

SPT-2s log(σ²) (dim 0) -5.855187803119444
α 0.5309409470055377
log(αΓ) -6.481926281833539
SPT-2s α (dim 0) 0.5309409470055377
SPT-2s log(αΓ) (dim 0) -5.788779101273594

SPT


fit iterations: 0it [00:00, ?it/s]

SPT-100ms log(σ²) (dim 0) -6.688021148215798
SPT-2s log(σ²) (dim 0) -7.029112595150025
α 0.38477787331019786
log(αΓ) -6.185827161605495
SPT-100ms α (dim 0) 0.38477787331019786
SPT-2s α (dim 0) 0.38477787331019786
SPT-100ms log(αΓ) (dim 0) -5.4926799810455496
SPT-2s log(αΓ) (dim 0) -5.4926799810455496

minflux + SPT


fit iterations: 0it [00:00, ?it/s]

minflux log(σ²) (dim 0) -8.052644593586157
SPT-100ms log(σ²) (dim 0) -6.736923732386401
SPT-2s log(σ²) (dim 0) -7.161478230333096
α 0.36446991957762515
log(αΓ) -6.186759464389324
minflux α (dim 0) 0.36446991957762515
minflux log(αΓ) (dim 0) -6.186759464389324
SPT-100ms α (dim 0) 0.36446991957762515
SPT-2s α (dim 0) 0.36446991957762515
SPT-100ms log(αΓ) (dim 0) -5.493612283829378
SPT-2s log(αΓ) (dim 0) -5.493612283829378


||  mESC TSA   ||

minflux


fit iterations: 0it [00:00, ?it/s]

minflux log(σ²) (dim 0) -8.229248029066703
α 0.27710943469518456
log(αΓ) -6.549132219503516
minflux α (dim 0) 0.27710943469518456
minflux log(αΓ) (dim 0) -6.549132219503516

SPT 100ms


fit iterations: 0it [00:00, ?it/s]

SPT-100ms log(σ²) (dim 0) -6.695818827343953
α 0.40496639277087326
log(αΓ) -6.261994299015724
SPT-100ms α (dim 0) 0.40496639277087326
SPT-100ms log(αΓ) (dim 0) -5.568847118455778

SPT 2s


fit iterations: 0it [00:00, ?it/s]

SPT-2s log(σ²) (dim 0) -5.902062232085948
α 0.5377968400713273
log(αΓ) -6.5682737576787
SPT-2s α (dim 0) 0.5377968400713273
SPT-2s log(αΓ) (dim 0) -5.875126577118754

SPT


fit iterations: 0it [00:00, ?it/s]

SPT-100ms log(σ²) (dim 0) -6.682884692772803
SPT-2s log(σ²) (dim 0) -6.815210230696745
α 0.40065943956442196
log(αΓ) -6.295134578986808
SPT-100ms α (dim 0) 0.40065943956442196
SPT-2s α (dim 0) 0.40065943956442196
SPT-100ms log(αΓ) (dim 0) -5.601987398426862
SPT-2s log(αΓ) (dim 0) -5.601987398426862

minflux + SPT


fit iterations: 0it [00:00, ?it/s]

minflux log(σ²) (dim 0) -8.079896270354133
SPT-100ms log(σ²) (dim 0) -6.857259946700292
SPT-2s log(σ²) (dim 0) -7.422323164309047
α 0.33787086863783067
log(αΓ) -6.262739738805113
minflux α (dim 0) 0.33787086863783067
minflux log(αΓ) (dim 0) -6.262739738805113
SPT-100ms α (dim 0) 0.33787086863783067
SPT-2s α (dim 0) 0.33787086863783067
SPT-100ms log(αΓ) (dim 0) -5.5695925582451675
SPT-2s log(αΓ) (dim 0) -5.5695925582451675


||  mESC ICRF  ||

minflux


fit iterations: 0it [00:00, ?it/s]

minflux log(σ²) (dim 0) -8.135496966210102
α 0.27465137606369855
log(αΓ) -6.925434592919514
minflux α (dim 0) 0.27465137606369855
minflux log(αΓ) (dim 0) -6.925434592919514

SPT 100ms


fit iterations: 0it [00:00, ?it/s]

SPT-100ms log(σ²) (dim 0) -6.701098954382351
α 0.3980555102020379
log(αΓ) -6.406021405624678
SPT-100ms α (dim 0) 0.3980555102020379
SPT-100ms log(αΓ) (dim 0) -5.712874225064732

SPT 2s


fit iterations: 0it [00:00, ?it/s]

SPT-2s log(σ²) (dim 0) -5.8722633458619296
α 0.5624040703441333
log(αΓ) -6.620494628208836
SPT-2s α (dim 0) 0.5624040703441333
SPT-2s log(αΓ) (dim 0) -5.9273474476488905

SPT


fit iterations: 0it [00:00, ?it/s]

SPT-100ms log(σ²) (dim 0) -6.663071663939398
SPT-2s log(σ²) (dim 0) -6.532471617057527
α 0.4297869219421949
log(αΓ) -6.374474285869804
SPT-100ms α (dim 0) 0.4297869219421949
SPT-2s α (dim 0) 0.4297869219421949
SPT-100ms log(αΓ) (dim 0) -5.681327105309858
SPT-2s log(αΓ) (dim 0) -5.681327105309858

minflux + SPT


fit iterations: 0it [00:00, ?it/s]

minflux log(σ²) (dim 0) -7.997825670216551
SPT-100ms log(σ²) (dim 0) -6.755373110811076
SPT-2s log(σ²) (dim 0) -6.68301948025095
α 0.3802994957154255
log(αΓ) -6.375910621736829
minflux α (dim 0) 0.3802994957154255
minflux log(αΓ) (dim 0) -6.375910621736829
SPT-100ms α (dim 0) 0.3802994957154255
SPT-2s α (dim 0) 0.3802994957154255
SPT-100ms log(αΓ) (dim 0) -5.682763441176884
SPT-2s log(αΓ) (dim 0) -5.682763441176884



In [8]:
nl.io.write.hdf5(fitres, f'../data/20250327_fitres_NPFit-aGparam_{ct}.h5')

## Profiler
Estimate credible intervals for point estimates from profile likelihood. __Attention: computationally expensive__

This can also move the point estimate, if we find better parameters while exploring

In [None]:
fitres = nl.io.load.hdf5(f'../data/20250327_fitres_NPFit-aGparam_{ct}.h5')
mci = {}
for treatment in ['ctrl', 'DRB', 'TSA', 'ICRF']:
    print()
    print(17*'=')
    print(f'|| {ct:>5s} {treatment:<5s} ||')
    print(17*'=')
    print()
    
    mci[treatment] = {}
    for name in fits[treatment]['joints']:
        print(name)
        print('='*20)
        
        profiler = bayesmsd.Profiler(fits[treatment]['joints'][name], max_restarts=50)
        profiler.point_estimate = fitres[treatment][name]

        with nl.Parallelize():
            mci[treatment][name] = profiler.find_MCI(show_progress=True)

        for key in mci[treatment][name]:
            m, (cil, cih) = mci[treatment][name][key]
            print(f"{key:>25s} = {m:>6.3f} [{cil:>6.3f}, {cih:>6.3f}]")
        print()


||  mESC ctrl  ||

minflux


profiler iterations: 0it [00:00, ?it/s]

  minflux log(σ²) (dim 0) = -8.065 [-8.077, -8.053]
                        α =  0.324 [ 0.316,  0.331]
                  log(αΓ) = -6.563 [-6.603, -6.522]

SPT 100ms


profiler iterations: 0it [00:00, ?it/s]

SPT-100ms log(σ²) (dim 0) = -6.738 [-6.756, -6.721]
                        α =  0.422 [ 0.411,  0.434]
                  log(αΓ) = -6.277 [-6.295, -6.258]

SPT 2s


profiler iterations: 0it [00:00, ?it/s]

   SPT-2s log(σ²) (dim 0) = -5.891 [-5.935, -5.841]
                        α =  0.596 [ 0.579,  0.613]
                  log(αΓ) = -6.501 [-6.532, -6.469]

SPT


profiler iterations: 0it [00:00, ?it/s]

SPT-100ms log(σ²) (dim 0) = -6.695 [-6.706, -6.684]
   SPT-2s log(σ²) (dim 0) = -6.708 [-6.758, -6.660]
                        α =  0.456 [ 0.451,  0.461]
                  log(αΓ) = -6.244 [-6.253, -6.236]

minflux + SPT


profiler iterations: 0it [00:00, ?it/s]

  minflux log(σ²) (dim 0) = -7.975 [-7.982, -7.969]
SPT-100ms log(σ²) (dim 0) = -6.841 [-6.851, -6.831]
   SPT-2s log(σ²) (dim 0) = -7.079 [-7.147, -7.016]
                        α =  0.390 [ 0.388,  0.392]
                  log(αΓ) = -6.232 [-6.238, -6.226]


||  mESC DRB   ||

minflux


profiler iterations: 0it [00:00, ?it/s]

  minflux log(σ²) (dim 0) = -8.136 [-8.151, -8.121]
                        α =  0.311 [ 0.304,  0.319]
                  log(αΓ) = -6.472 [-6.515, -6.430]

SPT 100ms


profiler iterations: 0it [00:00, ?it/s]

SPT-100ms log(σ²) (dim 0) = -6.683 [-6.701, -6.665]
                        α =  0.403 [ 0.392,  0.414]
                  log(αΓ) = -6.135 [-6.153, -6.117]

SPT 2s


profiler iterations: 0it [00:00, ?it/s]

SPT-100ms log(σ²) (dim 0) = -6.688 [-6.700, -6.676]
   SPT-2s log(σ²) (dim 0) = -7.029 [-7.102, -6.962]
                        α =  0.385 [ 0.380,  0.389]
                  log(αΓ) = -6.186 [-6.194, -6.179]

minflux + SPT


profiler iterations: 0it [00:00, ?it/s]

In [None]:
nl.io.write.hdf5(mci, f'../data/20250327_mci_NPFit-aGparam_{ct}.h5')