In [None]:
import sys
sys.path.append('../../')
from chain_growth.hcg_list import make_hcl_l
from chain_growth.fragment_list import generate_fragment_list
from chain_growth.hcg_fct import hierarchical_chain_growth

In [None]:
import numpy as np
from time import perf_counter

# run hierarchical chain growth for short tau K18 example

## prepare HCG
### input file and path / output path

In [None]:
## path to MD fragments
path0 = '..'
## path to store assembled models in
path = 'truncated_tauK18/'
# file with sequence, format: "fasta" or "PDB"
sequence_f = '../truncated_tauK18.fasta'

### fragment construction

In [None]:
# length of MD fragments (without the end-capping groups if present)
fragment_length = 5
# length of the residue overlap between subsequent fragments
overlap = 2

# generate list of fragments, dictionary of overlaps between fragments
# generating overlaps_d is necessary, since to match the full-ltngh sequennce
# the overlap between e.g., the two last fragments can vary
fragment_l, overlaps_d = generate_fragment_list(sequence_f, fragment_length, overlap)

In [None]:
from chain_growth.fragment_list import get_sequence

In [None]:
s = get_sequence(sequence_f).three_letter_list()

### lists for HCG

In [None]:
n_pairs = len(fragment_l)
print(fragment_l, overlaps_d, n_pairs)

# hcg_l : list of paired fragments
# promo_l : list to evaluate if last fragment of level m in hcg_l is promoted to level m+1
hcg_l, promo_l = make_hcl_l(n_pairs)

In [None]:
# maximal number of pairs/full-length models to assemble
kmax = 200
# MD fragments are sampled with or without end-capping groups
capping_groups = True

## run HCG

In [None]:
def estimate_run_time(data):
    """ Estimate run time for HCG - depends on kmax = number of full-length chains to grow 
                                     and chain length = number amino acids per chain

        Keeping one argument fixed and varying the other, the time evolutiob can be described by a linear function for both, kmax and chain length.
                Run time is ~ proportional to kmax * chain length.
        Therefore we fit a linear function for both arguments and multiply them.
        Parameters for functions from a least square fit.
        NOTE: The estimated time may overestimate the true run time slightly -  max. 1 minute.
        Parameters
        ----------
        data : numpy array
            x = kmax
            y = chain length
            
        Returns
        -------
        estimated time as float
        
        Thanks to Johannes Betz.
        
        """
    x, y = data
    def _fit_linear(p, a, b, c):
        return a  + b*p + c*p *np.log(p)
    fit_x = _fit_linear(x, 1.719e-01,  1.131e-02, -1.689e-05)
    fit_y = _fit_linear(y, 1.236e+01, -7.738e-02,  2.485e-02)
    return fit_x * fit_y

In [None]:
data = np.vstack([kmax, len(s)])
expected_time = estimate_run_time(data) # TODO: calc
print(expected_time)

In [None]:
start = perf_counter()
hierarchical_chain_growth(hcg_l, promo_l, overlaps_d, path0, path, kmax=kmax, capping_groups=capping_groups, verbose=True)
end = perf_counter()

In [None]:
execution_time = (end - start)
execution_time