# The probe phase

# HPC Cluster: gnuparallel

## Separated *whole* simulation directories

On a cluster, *whole* simulations are organized into *whole* directories, where each *whole* directory contains all the files for a given *whole* simulation. The **gnuparallel** is used to parallelize the **probe** phase at the **shell** level. For this purpose, all the python modules and scripts are separatedly installed and run on each core. For instance, if 32 cores are available, then the files in 32 *whole* directories are simulatenously installed. However, each *whole* directory may contains multiple topology and trajectory pairs. Thus, there is parallelization at the level of *whole* directories, not at the level of the *segment* or *whole* trajectories inside a *whole* directory. Inside each *whole* directory, a python **main_probe.py** script analyzes the trajectories in a sequential way.

- trj and all *segments* on a cluster

For each *whole* directory, the following script is executed by means of *gnuparallel*. See these scripts: *probe-1.7-all_trj_segments.py* and *probe-1.7-bug_trj_segments*

## Sum-rule project

#### whole bug trjs

In [None]:
from glob import glob
from polyphys.manage import organizer
from polyphys.probe import prober

# 24 minutes
# analyzing bug files.
group = 'bug'
lineage = 'whole'
save_to = './'
database = '/Users/amirhsi_mini/research_data/sumrule/'
#database = './'
hierarchy = 'N*'
bug_pairs = glob(database + hierarchy + group + '*')
bug_pairs = organizer.sort_filenames(
    bug_pairs,
    fmts=['.' + group + '.data', '.' + group + '.lammpstrj']
)
print(bug_pairs)
for (bug_topo, bug_trj) in bug_pairs:
    print(bug_topo)
    prober.sum_rule_bug_cyl(bug_topo, bug_trj, lineage, save_to=save_to)

#### segment bug trjs

In [None]:
from glob import glob
from polyphys.manage import organizer
from polyphys.probe import prober
from polyphys.manage.parser import SumRuleCyl


group = 'bug'
topo_lineage = 'whole'
lineage = 'segment'
save_to = './'
bug_trjs = glob('./N*' + group + '*')
bug_trjs = organizer.sort_filenames(
    bug_trjs,
    fmts=['.' + group + '.lammpstrj']
)
bug_trjs = [bug_trj[0] for bug_trj in bug_trjs]
bug_topo = glob('./N*' + group + '*')
bug_topo = organizer.sort_filenames(bug_topo, fmts=['.' + group + '.data'])
bug_topo = bug_topo[0][0]
max_segment_id = len(bug_trjs)
# analyzing all files
# it is assumed that the all trjs are numbers from 1 to max_segment_id
for bug_trj in bug_trjs:
    trj_info = SumRuleCyl(
        bug_trj, topo_lineage, 'cylindrical', group, 'linear'
        )
    # all the frames in the last segment are probed:
    if trj_info.segment_id == max_segment_id:
        prober.sum_rule_bug_cyl(bug_topo, bug_trj, lineage, save_to=save_to)
    else:
        prober.sum_rule_bug_cyl(
            bug_topo,
            bug_trj,
            lineage,
            save_to=save_to,
            continuous=True
        )

#### segment all trjs

In [None]:
from glob import glob
from polyphys.manage import organizer
from polyphys.probe import prober
from polyphys.manage.parser import SumRuleCyl


group = 'all'
topo_lineage = 'whole'
lineage = 'segment'
save_to = './'
all_trjs = glob('./N*' + group + '*')
all_trjs = organizer.sort_filenames(
    all_trjs,
    fmts=['.' + group + '.lammpstrj']
)
all_trjs = [all_trj[0] for all_trj in all_trjs]
all_topo = glob('./N*' + group + '*')
all_topo = organizer.sort_filenames(all_topo, fmts=['.' + group + '.data'])
all_topo = all_topo[0][0]
max_segment_id = len(all_trjs)
# analyzing all files
# it is assumed that the all trjs are numbers from 1 to max_segment_id
for all_trj in all_trjs:
    trj_info = SumRuleCyl(
        all_trj, topo_lineage, 'cylindrical', group, 'linear'
        )
    # all the frames in the last segment are probed:
    if trj_info.segment_id == max_segment_id:
        prober.sum_rule_all_cyl(all_topo, all_trj, lineage, save_to=save_to)
    else:
        prober.sum_rule_all_cyl(
            all_topo,
            all_trj,
            lineage,
            save_to=save_to,
            continuous=True
        )

## Trans-Foci project

### Cylindrical
#### whole bug trjs
Each *bug topology* comes with only **one** *bug trajectory*.

In [None]:
from glob import glob
from polyphys.manage import organizer
from polyphys.probe import prober
from polyphys.manage.parser import TransFociCyl

# analyzing bug files.
group = 'bug'
lineage = 'whole'
save_to = './'
bug_pairs = glob('./eps*' + group + '*')
bug_pairs = organizer.sort_filenames(
    bug_pairs,
    fmts=['.' + group + '.data', '.' + group + '.lammpstrj']
)
for (bug_topo, bug_trj) in bug_pairs:
    prober.trans_foci_bug_cyl(bug_topo, bug_trj, lineage, save_to=save_to)

#### segment all trjs

In [None]:
group = 'all'
topo_lineage = 'whole'
lineage = 'segment'
save_to = './'
all_trjs = glob('./eps*' + group + '*')
all_trjs = organizer.sort_filenames(
    all_trjs,
    fmts=['.' + group + '.lammpstrj']
)
all_trjs = [all_trj[0] for all_trj in all_trjs]
all_topo = glob('./eps*' + group + '*')
all_topo = organizer.sort_filenames(all_topo, fmts=['.' + group + '.data'])
all_topo = all_topo[0][0]
max_segment_id = len(all_trjs)
# analyzig all files
# it is assumed that the all trjs are numbers from 1 to max_segment_id
for all_trj in all_trjs:
    trj_info = TransFociCyl(all_trj, topo_lineage, 'cylindrical', group, 'ring')
    # all the frames in the last segment are probed:
    if trj_info.segment_id == max_segment_id:
        prober.trans_foci_all_cyl(all_topo, all_trj, lineage, save_to=save_to)
    else:
        prober.trans_foci_all_cyl(
            all_topo,
            all_trj,
            lineage,
            save_to=save_to,
            continuous=True
        )

### Cubic

#### whole bug trjs
Each *bug topology* comes with only **one** *bug trajectory*.

In [None]:
from glob import glob
from polyphys.manage import organizer
from polyphys.probe import prober
from polyphys.manage.parser import TransFociCub

# analyzing bug files.
geometry = 'cubic'
group='bug'
lineage = 'whole'
save_to = './'
bug_pairs = glob('/Users/amirhsi/research_data/TransFociCub-probe/al*/al*' + group + '*')
bug_pairs = organizer.sort_filenames(
    bug_pairs,
    fmts=['.' + group + '.data', '.' + group + '.lammpstrj']
)
for (bug_topo, bug_trj) in bug_pairs:
    print(bug_topo)
    print(bug_trj)
    #prober.trans_foci_bug_cub(bug_topo, bug_trj, lineage, save_to=save_to)

#### segment bug trjs

In [1]:
from glob import glob
from polyphys.manage import organizer
from polyphys.probe import prober
from polyphys.manage.parser import TransFociCub


geometry = 'cubic'
group = 'bug'
topo_lineage = 'whole'
lineage = 'segment'
save_to = './'
#all_trjs = glob('./al*' + group + '*')
all_dirs = glob('/Users/amirhsi/research_data/TransFociCub-probe/al*/')
for all_dir in all_dirs:
    all_trjs = glob(all_dir + 'al*' + group + '*')
    all_trjs = organizer.sort_filenames(
        all_trjs,
        fmts=['.' + group + '.lammpstrj']
    )
    all_trjs = [all_trj[0] for all_trj in all_trjs]
    all_topo = glob(all_dir + 'al*' + group + '*')
    all_topo = organizer.sort_filenames(all_topo, fmts=['.' + group + '.data'])
    all_topo = all_topo[0][0]
    max_segment_id = len(all_trjs)
    print(max_segment_id)
    # analyzig all files
    # it is assumed that the all trjs are numbers from 1 to max_segment_id
    for all_trj in all_trjs:
        trj_info = TransFociCub(all_trj, topo_lineage, geometry, group, 'ring')
        # all the frames in the last segment are probed:
        if trj_info.segment_id == max_segment_id:
            prober.trans_foci_bug_cub(
                all_topo, all_trj, lineage, save_to=save_to
            )
        else:
            prober.trans_foci_bug_cub(
                all_topo,
                all_trj,
                lineage,
                save_to=save_to,
                continuous=True
            )
    

In [None]:
for all_trj in all_trjs:
    print(all)
    trj_info = TransFociCub(all_trj, topo_lineage, 'cubic', group, 'ring')
    # all the frames in the last segment are probed:
    if trj_info.segment_id == max_segment_id:
        prober.trans_foci_all_cub(
            all_topo, all_trj, lineage, save_to=save_to
        )
    else:
        prober.trans_foci_all_cub(
            all_topo,
            all_trj,
            lineage,
            save_to=save_to,
            continuous=True
        )

In [None]:
group = 'all'
topo_lineage = 'whole'
lineage = 'segment'
save_to = './'
all_trjs = glob('./al*' + group + '*')
all_trjs = organizer.sort_filenames(
    all_trjs,
    fmts=['.' + group + '.lammpstrj']
)
all_trjs = [all_trj[0] for all_trj in all_trjs]
all_topo = glob('./al*' + group + '*')
all_topo = organizer.sort_filenames(all_topo, fmts=['.' + group + '.data'])
all_topo = all_topo[0][0]
max_segment_id = len(all_trjs)
# analyzig all files
# it is assumed that the all trjs are numbers from 1 to max_segment_id
for all_trj in all_trjs:
    trj_info = TransFociCub(all_trj, topo_lineage, 'cubic', group, 'ring')
    # all the frames in the last segment are probed:
    if trj_info.segment_id == max_segment_id:
        prober.trans_foci_all_cub(
            all_topo, all_trj, lineage, save_to=save_to
        )
    else:
        prober.trans_foci_all_cub(
            all_topo,
            all_trj,
            lineage,
            save_to=save_to,
            continuous=True
        )

#### segment all trjs

In [None]:
group = 'all'
topo_lineage = 'whole'
lineage = 'segment'
save_to = './'
all_trjs = glob('./al*' + group + '*')
all_trjs = organizer.sort_filenames(
    all_trjs,
    fmts=['.' + group + '.lammpstrj']
)
all_trjs = [all_trj[0] for all_trj in all_trjs]
all_topo = glob('./al*' + group + '*')
all_topo = organizer.sort_filenames(all_topo, fmts=['.' + group + '.data'])
all_topo = all_topo[0][0]
max_segment_id = len(all_trjs)
# analyzig all files
# it is assumed that the all trjs are numbers from 1 to max_segment_id
for all_trj in all_trjs:
    trj_info = TransFociCub(all_trj, topo_lineage, 'cubic', group, 'ring')
    # all the frames in the last segment are probed:
    if trj_info.segment_id == max_segment_id:
        prober.trans_foci_all_cub(
            all_topo, all_trj, lineage, save_to=save_to
        )
    else:
        prober.trans_foci_all_cub(
            all_topo,
            all_trj,
            lineage,
            save_to=save_to,
            continuous=True
        )

# PC Serial scheme

There are 4 different types of directories from which only one type can be in a *space* directory.

There are separated **whole** directories in each of which there **all** and **bug** **whole** trajectories; or, there are again separated **whole** directories in each of which there are **all** and **bug** **segment** trajectories. Below there are two groups of scrips for **serial** and **parallel** running schemes.


On a PC, the *whole* directories are located in a master *space-trjs* directory; however, one main python script probes all the *whole* directories in a parallel scheme via Dask. This is different from the *gnuparallel*-based approach in which each *whole* directory has its own copy of the required scripts and a main python script is run to probe that directory individually.

## TwoMonDep project
#### whole bug trjs

In [1]:
import warnings
from typing import Optional, Dict, Any, Union
import numpy as np
import MDAnalysis as mda
from MDAnalysis import transformations as mda_trans
from MDAnalysis.analysis import distances as mda_dist
from polyphys.manage.parser import (
    SumRuleCyl, TransFociCyl, TransFociCub, HnsCub, HnsCyl,
    SumRuleCubHeteroLinear, SumRuleCubHeteroRing, TwoMonDep
    )
from polyphys.manage.typer import ParserT
from polyphys.manage.organizer import invalid_keyword
from polyphys.analyze import clusters, correlations
from polyphys.analyze.measurer import transverse_size, fsd, end_to_end, pair_distance, apply_pbc_orthogonal


Due to the on going maintenance burden of keeping command line application
wrappers up to date, we have decided to deprecate and eventually remove these
modules.

We instead now recommend building your command line and invoking it directly
with the subprocess module.


In [2]:
def stamps_report_with_measures(
    report_name: str,
    sim_info: Union[TransFociCub, TransFociCyl],
    n_frames: int,
    measures: Dict[str, float],
) -> None:
    """
    Writes a summary of stamps (properties and attributes) of a simulation
    to file.

    `stamps_report` generates a dataset called `report_name` of the
    values of some attributes of `sim_info`, the number of frames
    `n_frames`, all the key and value pairs in all the given dictionaries
    `measures`.

    Parameters
    ----------
    report_name: str
        Name of the report.
    sim_info: ParserT
        A ParserT instant object that contains information about the name,
        parents,and physical attributes of a simulation.
    n_frames: int
        Number of frames/snapshots/configurations in a simulation.
    measures: Dict
        A dictionary of measures where a key and value pair is the name and
        value of a physical property.
    """
    with open(report_name, mode='w') as report:
        # write header
        for lineage_name in sim_info._genealogy:
            report.write(f"{lineage_name},")
        for attr_name in sim_info.attributes:
            report.write(f"{attr_name},")
        for measure_name in measures.keys():  # each measure is a dict
            report.write(f"{measure_name},")
        report.write("n_frames\n")
        # write values
        for lineage_name in sim_info.genealogy:
            attr_value = getattr(sim_info, lineage_name)
            report.write(f"{attr_value},")
        for attr_name in sim_info.attributes:
            attr_value = getattr(sim_info, attr_name)
            report.write(f"{attr_value},")
        for measure_value in measures.values():
            report.write(f"{measure_value},")
        report.write(f"{n_frames}")


def stamps_report(
    report_name: str,
    sim_info: ParserT,
    n_frames: int
) -> None:
    """
    Writes a summary of stamps (properties and attributes) of a simulation
    to file.

    `stamps_report` generates a dataset called `report_name` of the
    values of some attributes of `sim_info`, the number of frames
    `n_frames`, all the key and value pairs in all the given dictionaries
    `measures`.

    Parameters
    ----------
    report_name: str
        Name of the report.
    sim_info: ParserT
        A SumRule object that contains information about the name, parents,
        and physical attributes of a simulation.
    n_frames: int
        Number of frames/snapshots/configurations in a simulation.
    """
    with open(report_name, mode='w') as report:
        # write header
        for lineage_name in sim_info.genealogy:
            report.write(f"{lineage_name},")
        for attr_name in sim_info.attributes:
            report.write(f"{attr_name},")
        report.write("n_frames\n")
        # write values
        for lineage_name in sim_info.genealogy:
            attr_value = getattr(sim_info, lineage_name)
            report.write(f"{attr_value},")
        for attr_name in sim_info.attributes:
            attr_value = getattr(sim_info, attr_name)
            report.write(f"{attr_value},")
        report.write(f"{n_frames}")


def bin_create(
    sim_name: str,
    edge_name: str,
    bin_size: float,
    lmin: float,
    lmax: float,
    save_to: str
) -> tuple[np.ndarray, np.ndarray]:
    """
    Generates arrays of bins and histograms

    Parameters
    ----------
    sim_name: str
        Name of the simulation.
    edge_name: str
        Name of the variable for which the histogram is computed.
    bin_size : float
        Size of each bin.
    lmin : float
        Lower bound of the system in the direction of interest.
    lmax : float
        Upper bound of the system in the direction of interest.
    save_to : str
        Whether save outputs to memory as csv files or not.

    Return
    ------
    bin_edges : numpy array of float
        The edges to pass into a histogram. Save `bin_edges` to file if
        `save_to` is not None.
    hist: array of int
        An empty histogram
    """
    bin_edges = np.arange(lmin, lmax + bin_size, bin_size)
    hist = np.zeros(len(bin_edges) - 1, dtype=np.int16)
    np.save(save_to + sim_name + '-' + edge_name + '.npy', bin_edges)
    return bin_edges, hist


def fixedsize_bins(
    sim_name: str,
    edge_name: str,
    bin_size: float,
    lmin: float,
    lmax: float,
    bin_type: str = 'ordinary',
    save_to: Optional[str] = None,
) -> Dict[str, Any]:
    """
    Generates arrays of bins and histograms, ensuring that the `bin_size`
    guaranteed. To achieve this, it extends the `lmin` and `lmax` limits.

    To-do List
    ----------
    1. Following the idea used:
    https://docs.mdanalysis.org/1.1.1/_modules/MDAnalysis/lib/util.html#fixedwidth_bins
    Makes input array-like so bins can be calculated for 1D data (then all
    parameters are simple floats) or nD data (then parameters are supplied
    as arrays, with each entry corresponding to one dimension).
    2. Eliminate the if-statement for the periodic_bin_edges.

    Parameters
    ----------
    sim_name: str
        Name of the simulation.
    edge_name: str
        Name of the variable for which the histogram is computed.
    bin_size : float
        Size of each bin.
    lmin : float
        Lower bound of the system in the direction of interest.
    lmax : float
        Upper bound of the system in the direction of interest.
    bin_type: {'ordinary', 'nonnegative', 'periodic'}, default 'ordinary'
        The type of bin in a given direction in a given coordinate system:

        'ordinary'
            A bounded or unbounded coordinate such as any of the cartesian
            coordinates or the polar coordinate in the spherical coordinate
            system. For such coordinates, the `lmin` and `lmax` limits are
            equally extended to ensure the `bin_size`.

        'nonnegative'
            A nonnegative coordinate such as the r direction in the polar
            or spherical coordinate system. For such coordinates, ONLY `lmax`
            limit is extended to ensure the `bin_size`. `lmin` is either 0.0
            or a positive number smaller than `lmax`.

        'periodic'
            A periodic coordinate such as the azimuthal direction in the
            spherical coordinate. It is assumed that 'period'=`lmax`-`lmin`;
            therefore, if 'period' is not a multiple of `bin_size`, then an
            array of bin_edges is used; otherwise, n_bins is used.

    save_to : str, default None
        Whether save outputs to memory as npy files or not.

    Return
    ------
    bin_edges : numpy array of  float
        The edges to pass into a histogram. Save `bin_edges` to file if
        `save_to` is not None.
    hist: array of  int
        An empty histogram

    Reference:
    https://docs.mdanalysis.org/1.1.1/documentation_pages/lib/util.html#MDAnalysis.analysis.density.fixedwidth_bins
    """
    hist_collectors = 0
    bin_edges = 0
    bin_types = ['ordinary', 'nonnagative', 'periodic']
    if lmin >= lmax:
        raise ValueError('Boundaries are not sane: should be xmin < xmax.')
    _delta = bin_size
    _lmin = lmin
    _lmax = lmax
    _length = _lmax - _lmin
    n_bins: int = 0
    if bin_type == 'ordinary':
        n_bins = int(np.ceil(_length / _delta))
        dl = 0.5 * (n_bins * _delta - _length)  # excess length
        # add half of the excess to each end:
        _lmin = _lmin - dl
        _lmax = _lmax + dl
        # create empty grid with the right dimensions (and get the edges)
        hist_collectors, bin_edges = np.histogram(
            np.zeros(1),
            bins=n_bins,
            range=(_lmin, _lmax)
        )
    elif bin_type == 'nonnegative':
        n_bins = int(np.ceil(_length / _delta))
        dl = 0.5 * (n_bins * _delta - _length)
        _lmin = _lmin - dl
        _lmax = _lmax + dl
        if _lmin <= 0.0:
            _lmin = 0.0
            # add full of the excess to upper end:
            _lmax = _lmax + 2 * dl
        hist_collectors, bin_edges = np.histogram(
            np.zeros(1),
            bins=n_bins,
            range=(_lmin, _lmax)
        )
    elif bin_type == 'periodic':  # Assuming that the _length=period:
        n_bins = int(np.ceil(_length / _delta))
        warnings.warn(
            f"Number of bins (n_bins='{n_bins}')"
            " is more than or equal to the actual number of bins in "
            f"'periodic' bin type because the 'period=lmax-min={_length}'"
            f"and delta='{_delta}'"
            ",not 'n_bins', are used to created 'bin_edges'.",
            UserWarning
            )
        bin_edges = np.arange(_lmin, _lmax + _delta, _delta)
        hist_collectors, bin_edges = np.histogram(
            np.zeros(1),
            bins=bin_edges,
            range=(_lmin, _lmax)
        )
    else:
        invalid_keyword(bin_type, bin_types)
    hist_collectors = hist_collectors * 0
    hist_collectors_std = hist_collectors * 0
    if save_to is not None:
        np.save(save_to + sim_name + '-' + edge_name + '.npy', bin_edges)
    results = {
        'n_bins': n_bins,
        'bin_edges': bin_edges,
        'collector': hist_collectors,
        'collector_std': hist_collectors_std,
        'range': (_lmin, _lmax)
    }
    return results


def write_hists(
    hist_infos: Dict[str, Any],
    sim_name: str,
    save_to: str,
    std: bool = False
) -> None:
    """
    Writes histogram per species per direction to file.

    Parameters
    ----------
    hist_infos : Dict[str, Any]
        A dict of dicts that contains the information about direction, species,
         and histograms.
    sim_name: str
        The name of simulation file to which the `hist_infos` belongs.
    save_to: str
        The absolute/relative path of a directory to which outputs are saved.
    std : bool, default False
        _description_, by default False
    """
    for dir_ in hist_infos.keys():
        for species, hist in hist_infos[dir_].items():
            np.save(
                save_to + sim_name + '-' + dir_ + species + '.npy',
                hist['collector']
            )
            if std is True:
                np.save(
                    save_to + sim_name + '-' + dir_ + 'Std' + species + '.npy',
                    hist['collector_std']
                )
        # end of loop
    # end of loop


In [3]:
def adjusted_pair_distance(
    positions: np.ndarray,
    pbc: np.ndarray
) -> np.ndarray:
    """
    Computes the adjusted pair distance between two particles along each axis
    in the Cartesian coordinate system, applying the minimum image convention
    (MIC) and correcting for jumps across boundaries.

    Parameters
    ----------
    positions : numpy.ndarray
        A (2, n_dim) array containing the coordinates of the two atoms.
        The array should be sorted by atom number, with the first row 
        corresponding to the first atom and the second row to the second atom.

    pbc : numpy.ndarray
        An array where the index are the dimensions (0 for x, 1 for y, 
        and 2 for z) and the values are the lengths of the simulation box 
        in those dimensions.

    Returns
    -------
    np.ndarray
        A 1D numpy array containing the adjusted pair distances along each axis.
    """
    n_atoms, n_dims = positions.shape
    pbc_inv = 1/ pbc
    if n_atoms != 2:
        raise ValueError("'adjusted_pair_distance' only works for two atoms.")
    
    # Calculation in the center of geometry of the atom group
    positions = positions - np.mean(positions, axis=0)
    
    # Compute the raw pair distances
    dr = positions[1] - positions[0]
    # Apply MIC and correct for jumps
    dr_adj = np.where(np.abs(dr) <= pbc - np.abs(dr), dr, dr - pbc * np.around(pbc_inv * dr))
    return dr_adj

In [None]:
lengths = np.array([25, 25, 25])
dr = np.array([13, 10, 24])

np.where(dr < 0.5 * lengths, dr, np.sign(dr) * (lengths - abs(dr)))

In [None]:
np.sign(dr) 

In [29]:
def two_mon_dep_cub_bug(
    topology: str,
    trajectory: str,
    lineage: str,
    save_to: str = './',
    continuous: bool = False
) -> None:
    """Runs various analyses on a `lineage` simulation of a 'bug' atom group in
    the `geometry` of interest.

    Note
    ----
    In this project, coordinates are wrapped and unscaled in a
    trajectory or topology file; moreover, LAMMPS recenter is used to
    restrict the center of mass of "bug" (monomers) to the center of
    simulation box; and consequently, coordinates of all the particles in a
    trajectory or topology file is recentered to fulfill this constraint.

    In MDAnalysis, selections by `universe.select_atoms` always return an
    AtomGroup with atoms sorted according to their index in the topology.
    This feature is used below to measure the end-to-end distance (Flory
    radius), genomic distance (index differnce along the backbone), and any
    other measurement that needs the sorted indices of atoms,bonds, angles, and
    any other attribute of an atom.

    Parameters
    ----------
    topology: str
        Name of the topology file.
    trajectory: str
        Name of the trajectory file.
    lineage: {'segment', 'whole'}
        Type of the input file.
    save_to: str, default './'
        The absolute/relative path of a directory to which outputs are saved.
    continuous: bool, default False
        Whether a `trajectory` file is a part of a sequence of trajectory
        segments or not.
    """
    if (lineage == 'segment') & (continuous is False):
        warnings.warn(
            "lineage is "
            f"'{lineage}' "
            "and 'continuous' is "
            f"'{continuous}. "
            "Please ensure the "
            f"'{trajectory}' is NOT part of a sequence of trajectories.",
            UserWarning
        )
    print("Setting the name of analyze file...")
    sim_info = TwoMonDep(
        trajectory,
        lineage,
        'cubic',
        'bug',
        'atom'
    )
    sim_name = sim_info.lineage_name + "-" + sim_info.group
    print("\n" + sim_name + " is analyzing...\n")
    # LJ time difference between two consecutive frames:
    time_unit = sim_info.dcrowd * np.sqrt(
        sim_info.mcrowd * sim_info.eps_others)  # LJ time unit
    lj_nstep = sim_info.bdump  # Sampling steps via dump command in Lammps
    lj_dt = sim_info.dt
    sim_real_dt = lj_nstep * lj_dt * time_unit
    cell = mda.Universe(
        topology, trajectory, topology_format='DATA',
        format='LAMMPSDUMP', lammps_coordinate_convention='unscaled',
        atom_style="id type x y z", dt=sim_real_dt
        )
    #cell_pbc = {0: sim_info.lcube, 1: sim_info.lcube, 2: sim_info.lcube}
    cell_pbc = np.array([sim_info.lcube, sim_info.lcube, sim_info.lcube])
    # slicing trajectory based the continuous condition
    if continuous:
        sliced_trj = cell.trajectory[0: -1]
        n_frames = cell.trajectory.n_frames - 1
    else:
        sliced_trj = cell.trajectory
        n_frames = cell.trajectory.n_frames
    # selecting atom groups
    bug: mda.AtomGroup = cell.select_atoms('type 1')  # bug: small & large mon
    print(len(bug.atoms))
    # defining collectors
    # -bug:
    gyr_t = np.zeros(n_frames)
    bug_x_t = np.zeros(n_frames)
    bug_y_t = np.zeros(n_frames)
    bug_z_t = np.zeros(n_frames)
    for idx, _ in enumerate(sliced_trj):
        # bug:
        # various measures of chain size
        gyr_t[idx] = bug.radius_of_gyration()
        dr_ij = adjusted_pair_distance(bug.positions, cell_pbc)
        bug_x_t[idx] = dr_ij[0]
        bug_y_t[idx] = dr_ij[1] 
        bug_z_t[idx] = dr_ij[2]
    # Saving collectors to memory
    # -bug
    np.save(save_to + sim_name + '-gyrTMon.npy', gyr_t)
    np.save(save_to + sim_name + '-xTMon.npy', bug_x_t)
    np.save(save_to + sim_name + '-yTMon.npy', bug_y_t)
    np.save(save_to + sim_name + '-zTMon.npy', bug_z_t)
    # Simulation stamps:
    outfile = save_to + sim_name + "-stamps.csv"
    stamps_report(outfile, sim_info, n_frames)
    print('done.')

In [None]:
# analyzing bug files.

from glob import glob
from polyphys.manage import organizer
from polyphys.probe import prober
import os

group = 'bug'
lineage = 'whole'
save_to = '/Users/amirhsi/research_data/TwoMonDep-probes/'
#path =
#"/Users/amirhsi_mini/trjs/epss5.0epsl5.0r10.5al5.0nl5ml125ns200ac1.0nc*lz77.0dt0.005bdump5000adump5000ens1ring/*.bug*"
path = "/Users/amirhsi/research_data/TwoMonDep-all_simulations/am*"
bug_pairs = glob(path + '/am*' + group + '*')
bug_pairs = organizer.sort_filenames(
    bug_pairs,
    fmts=['.' + group + '.data', '.' + group + '.lammpstrj']
)
#print(bug_pairs)
for (bug_topo, bug_trj) in bug_pairs:
    parent = os.path.dirname(os.path.abspath(bug_topo))
    # Get the name of the parent directory
    parent = os.path.basename(parent)
    print(parent)
    two_mon_dep_cub_bug(
        bug_topo,
        bug_trj,
        lineage,
        continuous=False,
        save_to = save_to+parent+'/'
    )

## Sumrule project
### Cylindrical
#### whole bug trjs

In [None]:
# analyzing bug files.

from glob import glob
from polyphys.manage import organizer
from polyphys.probe import prober


group = 'bug'
lineage = 'whole'
save_to = './'
#path = "/Users/amirhsi_mini/trjs/epss5.0epsl5.0r10.5al5.0nl5ml125ns200ac1.0nc*lz77.0dt0.005bdump5000adump5000ens1ring/*.bug*"
path = "/Users/amirhsi/research_data/N*/N*"
bug_pairs = glob(path + '/N*' + group + '*')
bug_pairs = organizer.sort_filenames(
    bug_pairs,
    fmts=['.' + group + '.data', '.' + group + '.lammpstrj']
)
for (bug_topo, bug_trj) in bug_pairs:
    prober.sum_rule_bug_cyl(
        bug_topo,
        bug_trj,
        lineage,
        continuous=False,
        save_to = save_to
    )

#### segment bug trjs

In [None]:
# 1 min and 45 s for ~30000 particles with one all trj
input_path = "/Users/amirhsi/Downloads/N2000epsilon5.0r15.5lz379.5sig6.0nc1779dt0.005bdump1000adump5000ens7"
group = 'bug'
topo_lineage = 'whole'
lineage = 'segment'
save_to = './'
bug_trjs = glob(input_path + '/N*' + group + '*')
bug_trjs = organizer.sort_filenames(
    bug_trjs,
    fmts=['.' + group + '.lammpstrj']
)
bug_trjs = [bug_trj[0] for bug_trj in bug_trjs]
bug_topo = glob(input_path + '/N*' + group + '*')
bug_topo = organizer.sort_filenames(bug_topo, fmts=['.' + group + '.data'])
bug_topo = bug_topo[0][0]
max_segment_id = len(bug_trjs)
# analyzig all files
# it is assumed that the all trjs are numbers from 1 to max_segment_id
for bug_trj in bug_trjs:
    trj_info = SumRuleCyl(
        bug_trj,
        topo_lineage,
        'cylindrical',
        group,
        'linear'
    )
    # all the frames in the last segment are probed:
    if trj_info.segment_id == max_segment_id:
        prober.sum_rule_bug_cyl(
            bug_topo, bug_trj, lineage, save_to=save_to
        )
    else:
        prober.sum_rule_bug_cyl(
            bug_topo,
            bug_trj,
            lineage,
            save_to=save_to,
            continuous=True
        )

#### segment all trjs: hist_1d and hist_2d

In [None]:
# 4 mins for ~4000 particles with one all trj
parent = "/Users/amirhsi_mini/research_data/trjs"
#parent = "/Users/amirhsi/Downloads/N2000epsilon5.0r15.5lz379.5sig6.0nc1779dt0.005bdump1000adump5000ens7"
group = 'all'
topo_lineage = 'whole'
lineage = 'segment'
save_to = './'
all_trjs = glob(parent + '/N*' + group + '*')
all_trjs = organizer.sort_filenames(
    all_trjs,
    fmts=['.' + group + '.lammpstrj']
)
all_trjs = [all_trj[0] for all_trj in all_trjs]
print(all_trjs)
all_topo = glob(parent + '/N*' + group + '*')
print(all_topo)
all_topo = organizer.sort_filenames(all_topo, fmts=['.' + group + '.data'])
all_topo = all_topo[0][0]
max_segment_id = len(all_trjs)
# analyzig all files
# it is assumed that the all trjs are numbers from 1 to max_segment_id
for all_trj in all_trjs:
    trj_info = SumRuleCyl(
        all_trj,
        topo_lineage,
        'cylindrical',
        group,
        'linear'
    )
    # all the frames in the last segment are probed:
    if trj_info.segment_id == max_segment_id:
        prober.sum_rule_all_cyl(
            all_topo, all_trj, lineage, save_to=save_to
        )
    else:
        prober.sum_rule_all_cyl(
            all_topo,
            all_trj,
            lineage,
            save_to=save_to,
            continuous=True
        )

#### segment all trjs: hist_2d

In [None]:
%%time
# 10 mins for N2000epsilon5.0r13.0lz259.0sig1.0nc133547dt0.005bdump1000adump5000ens1 2 all trjs
trjs_db = "/Users/amirhsi_mini/research_data/trjs/N2000epsilon5.0r13.0lz259.0sig1.0nc133547dt0.005bdump1000adump5000ens1"
#trjs_db = "/Users/amirhsi/Downloads/N2000epsilon5.0r15.5lz379.5sig6.0nc1779dt0.005bdump1000adump5000ens7"
group = 'all'
topo_lineage = 'whole'
lineage = 'segment'
save_to = './'
all_trjs = glob(trjs_db + '/N*' + group + '*')
all_trjs = organizer.sort_filenames(
    all_trjs,
    fmts=['.' + group + '.lammpstrj']
)
all_trjs = [all_trj[0] for all_trj in all_trjs]
print(all_trjs)
all_topo = glob(trjs_db + '/N*' + group + '*')
print(all_topo)
all_topo = organizer.sort_filenames(all_topo, fmts=['.' + group + '.data'])
all_topo = all_topo[0][0]
max_segment_id = len(all_trjs)
# analyzig all files
# it is assumed that the all trjs are numbers from 1 to max_segment_id
for all_trj in all_trjs:
    trj_info = SumRuleCyl(
        all_trj,
        topo_lineage,
        'cylindrical',
        group,
        'linear'
    )
    # all the frames in the last segment are probed:
    if trj_info.segment_id == max_segment_id:
        prober.sum_rule_all_cyl_hist2d(
            all_topo, all_trj, lineage, save_to=save_to
        )

    else:
        prober.sum_rule_all_cyl_hist2d(
            all_topo,
            all_trj,
            lineage,
            save_to=save_to,
            continuous=True
        )

## SumRuleCubHeteroLinear

### whole bug trjs

In [None]:
# analyzing bug files.\
from glob import glob
from polyphys.manage import organizer
from polyphys.probe import prober
group = 'bug'
lineage = 'whole'
save_to = './'
#macmini_path = "/Users/amirhsi_mini/trjs/epss5.0epsl5.0r10.5al5.0nl5ml125ns200ac1.0nc*lz77.0dt0.005bdump5000adump5000ens1ring/*.bug*"
macbookpro_path = "/Users/amirhsi/research_data/"
bug_pairs = glob(macbookpro_path + '/al*' + group + '*')
bug_pairs = organizer.sort_filenames(
    bug_pairs,
    fmts=['.' + group + '.data', '.' + group + '.lammpstrj']
)
print(bug_pairs)
for (bug_topo, bug_trj) in bug_pairs:
    prober.sum_rule_hetero_linear_bug_cub(
        bug_topo,
        bug_trj,
        lineage,
        save_to = save_to
    )

## Trans-Foci project

### Cylindrical

#### whole bug trjs

In [None]:
# analyzing bug files.
group = 'bug'
linage = 'whole'
save_to = './'
#macmini_path = "/Users/amirhsi_mini/trjs/epss5.0epsl5.0r10.5al5.0nl5ml125ns200ac1.0nc*lz77.0dt0.005bdump5000adump5000ens1ring/*.bug*"
macbookpro_path = "/Users/amirhsi/Downloads/epss5epsl5r10.5al5nl5ml125ns400ac1nc27720lz77dt0.005bdump2000adump5000ens8"
bug_pairs = glob(macbookpro_path + '/eps*' + group + '*')
bug_pairs = organizer.sort_filenames(
    bug_pairs,
    fmts=['.' + group + '.data', '.' + group + '.lammpstrj']
)
for (bug_topo, bug_trj) in bug_pairs:
    prober.trans_foci_bug_cyl(
        bug_topo,
        bug_trj,
        lineage,
        save_to = save_to
    )

#### segment all trjs

In [None]:
# 1 min and 45 s for ~30000 particles with one all trj
#macbookpro_path = "/Users/amirhsi/Downloads/epss5epsl5r10.5al5nl5ml125ns400ac1nc27720lz77dt0.005bdump2000adump5000ens8"
macmini_path = '/Users/amirhsi_mini/research_data/test'
group = 'all'
topo_lineage = 'whole'
lineage = 'segment'
save_to = './'
all_trjs = glob(macmini_path + '/eps*' + group + '*')
all_trjs = organizer.sort_filenames(
    all_trjs,
    fmts=['.' + group + '.lammpstrj']
)
all_trjs = [all_trj[0] for all_trj in all_trjs]
all_topo = glob(macmini_path + '/eps*' + group + '*')
all_topo = organizer.sort_filenames(all_topo, fmts=['.' + group + '.data'])
all_topo = all_topo[0][0]
max_segment_id = len(all_trjs)
# analyzig all files
# it is assumed that the all trjs are numbers from 1 to max_segment_id
for all_trj in all_trjs:
    trj_info = TransFociCyl(
        all_trj,
        topo_lineage,
        'cylindrical',
        group,
        'ring'
    )
    # all the frames in the last segment are probed:
    if trj_info.segment_id == max_segment_id:
        prober.trans_foci_all_cyl(
            all_topo, all_trj, lineage, save_to=save_to
        )
    else:
        prober.trans_foci_all_cyl(
            all_topo,
            all_trj,
            lineage,
            save_to=save_to,
            continuous=True
        )

### Cubic

#### whole bug trjs

In [None]:
# analyzing bug files.\
from glob import glob
from polyphys.manage import organizer
from polyphys.probe import prober
group = 'bug'
lineage = 'whole'
save_to = './'
#macmini_path = "/Users/amirhsi_mini/trjs/epss5.0epsl5.0r10.5al5.0nl5ml125ns200ac1.0nc*lz77.0dt0.005bdump5000adump5000ens1ring/*.bug*"
macbookpro_path = "/Users/amirhsi_mini/research_data/"
bug_pairs = glob(macbookpro_path + '/al*' + group + '*')
bug_pairs = organizer.sort_filenames(
    bug_pairs,
    fmts=['.' + group + '.data', '.' + group + '.lammpstrj']
)
print(bug_pairs)
for (bug_topo, bug_trj) in bug_pairs:
    prober.trans_foci_bug_cub(
        bug_topo,
        bug_trj,
        lineage,
        save_to = save_to
    )

#### segment all trjs

In [None]:
%%time
# 1 min and 45 s for ~30000 particles with one all trj
macbookpro_path = "/Users/amirhsi_mini/research_data/al1nl5ml1ns400ac1nc160392l36dt0.005bdump2000adump5000ens1.ring"
#macmini_path = '/Users/amirhsi_mini/research_data/test'
group = 'all'
topo_lineage = 'whole'
lineage = 'segment'
save_to = './'
all_trjs = glob(macbookpro_path + '/al*' + group + '*')
all_trjs = organizer.sort_filenames(
    all_trjs,
    fmts=['.' + group + '.lammpstrj.gz']
)
all_trjs = [all_trj[0] for all_trj in all_trjs]
all_topo = glob(macbookpro_path + '/al*' + group + '*')
all_topo = organizer.sort_filenames(all_topo, fmts=['.' + group + '.data'])
all_topo = all_topo[0][0]
max_segment_id = len(all_trjs)
# analyzig all files
# it is assumed that the all trjs are numbers from 1 to max_segment_id
for all_trj in all_trjs:
    trj_info = TransFociCub(all_trj, topo_lineage, 'cubic', group, 'ring')
    # all the frames in the last segment are probed:
    if trj_info.segment_id == max_segment_id:
        prober.trans_foci_all_cub(all_topo, all_trj, lineage, save_to=save_to)
    else:
        prober.trans_foci_all_cub(all_topo, all_trj, lineage, save_to=save_to,
            continuous=True
        )

## HNS project
### Cubic

#### whole bug trjs

In [None]:
%%time
# 3 min per nucleoid: 
from glob import glob
from polyphys.manage import organizer
from polyphys.probe import prober
from polyphys.manage.parser import HnsCub
# analyzing bug files.
group = 'nucleoid'
lineage = 'whole'
save_to = './'
save_to = '/Users/amirhsi_mini/research_data/probes/' 
#probe_path = "/Users/amirhsi_mini/research_data/hns_cubic-trjs/N*/N*"
#probe_path = "../../Datasets/N200epshm29kbmm2nh48ac1nc95493l25dt0.005ndump2000adump5000ens1.ring"
#probe_path = "../../Datasets/TestTrajectories/N200epshm29kbmm2nh48ac1nc95493l25dt0.005ndump2000adump5000ens1.ring-truncated_for_tests-No_backup-Keep_it"
probe_path = "/Users/amirhsi_mini/research_data/trjs/N*"
nuc_pairs = glob(probe_path + '/N*' + group + '*')
nuc_pairs = organizer.sort_filenames(
    nuc_pairs,
    fmts=['.' + group + '.data', '.' + group + '.lammpstrj']
)
for (nuc_topo, nuc_trj) in nuc_pairs:
    prober.hns_nucleoid_cub(
        nuc_topo,
        nuc_trj,
        lineage,
        save_to = save_to
    )

#### segment all trjs

In [None]:
%%time
# 4 min the largest.
from glob import glob
from polyphys.manage import organizer
from polyphys.probe import prober
from polyphys.manage.parser import HnsCub
# ~6 min for 3 trjs, each with 100 frames
probe_path = "/Users/amirhsi_mini/research_data/trjs/N*" 
group = 'all'
topo_lineage = 'whole'
lineage = 'segment'
save_to = './'
save_to = '/Users/amirhsi_mini/research_data/probes/'
all_topo = glob(probe_path + '/N*' + group + '*')
all_topo = organizer.sort_filenames(all_topo, fmts=['.' + group + '.data'])
all_topo = all_topo[0][0]

all_trjs = glob(probe_path + '/N*' + group + '*')
all_trjs = organizer.sort_filenames(
    all_trjs,
    fmts=['.' + group + '.lammpstrj']
)
all_trjs = [all_trj[0] for all_trj in all_trjs]
max_segment_id = len(all_trjs)
# analyzig all files
# it is assumed that the all trjs are numbers from 1 to max_segment_id
for all_trj in all_trjs:
    trj_info = HnsCub(all_trj, topo_lineage, 'cubic', group, 'ring')
    # all the frames in the last segment are probed:
    if trj_info.segment_id == max_segment_id:
        prober.hns_all_cub(all_topo, all_trj, lineage, save_to=save_to)
    else:
        prober.hns_all_cub(all_topo, all_trj, lineage, save_to=save_to,
            continuous=True
        )

### Cylindrical

#### whole bug trjs

In [None]:
%%time
from glob import glob
from polyphys.manage import organizer
from polyphys.probe import prober
from polyphys.manage.parser import HnsCyl
# analyzing bug files.
group = 'nucleoid'
lineage = 'whole'
#save_to = './'
save_to = '/Users/amirhsi_mini/research_data/HnsCyl-trjs/'
probe_path = "/Users/amirhsi_mini/research_data/HnsCyl-trjs/N*"
#probe_path = "../../Datasets/N200kbmm2r4.5nh12ac2lz75nc552ens2.ring"
#probe_path = "../../Datasets/TestTrajectories/N200epshm29kbmm2nh48ac1nc95493l25dt0.005ndump2000adump5000ens1.ring-truncated_for_tests-No_backup-Keep_it"
nuc_pairs = glob(probe_path + '/N*' + group + '*')
nuc_pairs = organizer.sort_filenames(
    nuc_pairs,
    fmts=['.' + group + '.data', '.' + group + '.lammpstrj']
)
print(nuc_pairs)
for (nuc_topo, nuc_trj) in nuc_pairs:
    prober.hns_nucleoid_cyl(
        nuc_topo,
        nuc_trj,
        lineage,
        save_to = save_to
    )

##### hns_nucleoid_cyl_dis_matrix

In [None]:
%%time
from glob import glob
from polyphys.manage import organizer
from polyphys.probe import prober
from polyphys.manage.parser import HnsCyl
# analyzing bug files.
group = 'nucleoid'
lineage = 'whole'
#save_to = './'
save_to = '/Users/amirhsi_mini/research_data/HnsCyl-trjs/'
probe_path = "/Users/amirhsi_mini/research_data/HnsCyl-trjs/N*"
#probe_path = "../../Datasets/N200kbmm2r4.5nh12ac2lz75nc552ens2.ring"
#probe_path = "../../Datasets/TestTrajectories/N200epshm29kbmm2nh48ac1nc95493l25dt0.005ndump2000adump5000ens1.ring-truncated_for_tests-No_backup-Keep_it"
nuc_pairs = glob(probe_path + '/N*' + group + '*')
nuc_pairs = organizer.sort_filenames(
    nuc_pairs,
    fmts=['.' + group + '.data', '.' + group + '.lammpstrj']
)
print(nuc_pairs)
for (nuc_topo, nuc_trj) in nuc_pairs:
    prober.hns_nucleoid_cyl_dis_matrix(
        nuc_topo,
        nuc_trj,
        lineage,
        save_to = save_to
    )

#### segment all trj

In [None]:
%%time
from glob import glob
from polyphys.manage import organizer
from polyphys.probe import prober
from polyphys.manage.parser import HnsCyl
# ~6 min for 3 trjs, each with 100 frames
group = 'all'
topo_lineage = 'whole'
lineage = 'segment'
save_to = '/Users/amirhsi_mini/research_data/hns_cyl-trjs/'
probe_path = "/Users/amirhsi_mini/research_data/hns_cyl-trjs/N*"
all_trjs = glob(probe_path + '/N*' + group + '*')
all_trjs = organizer.sort_filenames(
    all_trjs,
    fmts=['.' + group + '.lammpstrj']
)
all_trjs = [all_trj[0] for all_trj in all_trjs]
all_topo = glob(probe_path + '/N*' + group + '*')
all_topo = organizer.sort_filenames(all_topo, fmts=['.' + group + '.data'])
all_topo = all_topo[0][0]
max_segment_id = len(all_trjs)
# analyzig all files
# it is assumed that the all trjs are numbers from 1 to max_segment_id

for all_trj in all_trjs:
    print(all_trj)
    trj_info = HnsCyl(all_trj, topo_lineage, 'cylindrical', group, 'ring')
    # all the frames in the last segment are probed:
    if trj_info.segment_id == max_segment_id:
        prober.hns_all_cyl(all_topo, all_trj, lineage, save_to=save_to)
    else:
        prober.hns_all_cyl(all_topo, all_trj, lineage, save_to=save_to,
            continuous=True
        )

##### Loop on all topos

In [None]:
%%time
from glob import glob
from polyphys.manage import organizer
from polyphys.probe import prober
from polyphys.manage.parser import HnsCyl
# ~6 min for 3 trjs, each with 100 frames
group = 'all'
topo_lineage = 'whole'
lineage = 'segment'
save_to = '/Users/amirhsi_mini/research_data/HnsCyl-trjs/'
probe_path = "/Users/amirhsi_mini/research_data/HnsCyl-trjs/N*"
all_topo = glob(probe_path + '/N*' + group + '*')
all_topos = organizer.sort_filenames(all_topo, fmts=['.' + group + '.data'])
all_topos = [all_topo[0] for all_topo in all_topos]
# analyzig all files
# it is assumed that the all trjs are numbers from 1 to max_segment_id
for all_topo in all_topos:
    print(all_topo)
    dir_name = all_topo.split('/')[-1].split('.all')[0]
    all_trjs = glob(probe_path[:-2] + '/'+ dir_name + '/N*' + group + '*')
    all_trjs = organizer.sort_filenames(
        all_trjs,
        fmts=['.' + group + '.lammpstrj']
    )
    all_trjs = [all_trj[0] for all_trj in all_trjs]
    max_segment_id = len(all_trjs)
    for all_trj in all_trjs:
        trj_info = HnsCyl(all_trj, topo_lineage, 'cylindrical', group, 'ring')
        # all the frames in the last segment are probed:
        if trj_info.segment_id == max_segment_id:
            prober.hns_all_cyl(all_topo, all_trj, lineage, save_to=save_to)
        else:
            prober.hns_all_cyl(all_topo, all_trj, lineage, save_to=save_to,
                continuous=True
            )

In [None]:
import MDAnalysis as mda

In [None]:
mda.__version__