# Example WE Notebook for lpath

This notebook details steps towards running the whole lpath analysis with a WE simulation. Check our [Sphinx documentation](https://lpath.readthedocs.io) for more up-to-date information about each function.

By Jeremy Leung
Last updated: May 15th, 2023

## Common cells

In [1]:
# Imports for all steps
import argparse
from lpath import discretize, extract, match, lpath

  from .autonotebook import tqdm as notebook_tqdm


## Discretize

In [8]:
# Arguments for `discretize` step.
discretize_args = argparse.Namespace(
    # Common and I/O
    we=True,  # Analyzing a WE simulation
    stats=True,  # Output results statistics
    debug=False,  # Debug mode
    out_dir='succ_traj',  # Name of directory to output the trajectories
    stride=1,  # Number of frames to output per WE tau interval
    west_name='west.h5',  # Name of input HDF5 file (e.g., west.h5)
    assign_name='ANALYSIS/C7_EQ/assign.h5',  # Name of input assign.h5 file
    rcfile='west.cfg',  # west.cfg file
    input_name='west.h5',  # Input data for state assignment. Ignored for WE.
    extract_input='assign.h5',  # Output file name for the state assignment. Ignored for WE.
    assign_func='default_assign',  # Assign function that dictates how to assign states
    assign_args=argparse.Namespace(  # These are arguments for w_assign
        verbosity='verbose',  # Verbose or debug
        rcfile="west.cfg",  # west.cfg
        max_queue_length=None,
        we_h5filename='west.h5',  # west.h5 path. Should match west_name
        construct_dataset=None,  # If you need some custom auxiliary dataset
        dsspecs=None,
        output='assign.h5',  # Output file. Should match assign_name.
        subsample=None,
        config_from_file=True,  # Read config from rcfile
        scheme='TEST',  # Scheme name
    ),
)

In [9]:
# Run discretize with the parameters defined in the cell above.
discretize.main(discretize_args)

lpath.discretize — [1;32mINFO[0m — Replaced parameter output file name with ``assign_name``


FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = 'west.h5', errno = 2, error message = 'No such file or directory', flags = 1, o_flags = 2)

## Extract

In [12]:
# Arguments for the `extract` step.
extract_args = argparse.Namespace(
    # Common and I/O
    we=True,  # Analyzing a WE simulation
    stats=True,  # Output results statistics
    debug=False,  # Debug mode
    out_dir='succ_traj',  # Name of directory to output the trajectories
    stride=1,  # Number of frames to output per WE tau interval
    exclude_short=0,  # Exclude trajectories shorter than provided value during matching. 0 excludes none.
    west_name='west.h5',  # Name of input HDF5 file (e.g., west.h5)
    assign_name='ANALYSIS/C7_EQ/assign.h5',  # Name of input assign.h5 file
    rcfile='west.cfg',  # west.cfg file

    # Extract specific things
    source_state_num=0,  # Index of the source state as defined in assign.h5.
    target_state_num=1,  # Index of the target state as defined in assign.h5.
    first_iter=1,  # First iteration to analyze. Inclusive
    last_iter=200,  # Last iteration to analyze. Inclusive. 0 implies it will analyze all labeled iterations.
    trace_basis=True,  # Option to analyze each successful trajectory up till its basis state.
    out_traj=False,  # Option to output trajectory files into `out_dir`. Will take much longer.
    out_traj_ext='.nc',  # Extension of the segment files. Defaults to `seg{out_traj_ext}`.
    out_state_ext='.ncrst',  # Extension of the restart files. Defaults to `seg{out_state_ext}`.
    out_top='system.prmtop',  # Name of the parameter file. Name relative to `$WEST_SIM_ROOT/common_files`.
    extract_output="succ_traj/output.pickle",  # Name of the pickle file to be outputted.
    hdf5=False,  # Enable if trajectories are saved with the HDF5 Framework in WESTPA.
    rewrite_weights=False,  # Option to zero out the weights of all segments that are not a successful trajectory.
    pcoord=True,  # Option to output the pcoord into the `output.pickle`.
    auxdata=['phi', 'psi'],  # Additional auxiliary data to save into `output.pickle`.
    use_ray=False,  # Enable Ray.
    threads=0,  # How many Ray threads/actors to use. Defaults to 0, which wil use all auto-detected resources.
)

In [13]:
# Run match with the parameters defined in the cell above.
extract.main(extract_args)

FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = 'west.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

## Pattern Match

In [22]:
# Arguments for the `match` step.
pattern_args = argparse.Namespace(
    we=True,  # Analyzing a WE simulation
    stats=True,  # Output results statistics
    out_dir='succ_traj',  # Name of directory to output the trajectories
    extract_output='succ_traj/output.pickle',  # Input file name of the pickle from `lpath.extract`
    output_pickle='succ_traj/pathways.pickle',  # Output file name of the new reassigned pathways from `lpath.match`
    west_name='west.h5',  # Name of input HDF5 file (e.g., west.h5)
    assign_name='ANALYSIS/ALL/assign.h5',  # Name of input assign.h5 file
    reassign_method='reassign_identity',  # Reassign method. Could be a module to be loaded
    match_metric='longest_common_subsequence',  # Use the longest common subsequence metric.
    match_vanilla=False, # Whether to use the metric with a correction term
    dmatrix_remake=True,  # Enable to remake the distance matrix. If false, will proceed to load `dmatrix_save`.
    dmatrix_save='distmap.npy',  # If dmatrix_remake is False, load this file instead. Assumed located in {out_dir}.
    dendrogram_threshold=0.5,  # Threshold for the Dendrogram
    dendrogram_show=True,  # Show the Dendrogram using plt.show()
    clusters=None,  # Cluster index to output... otherwise None --> All
    cl_output='succ_traj/cluster_labels.npy',  # Output path for cluster labels
    export_h5=True,  # Export H5 files
    file_pattern='west_succ_c{}.h5',  # Pattern to name cluster files
)

In [23]:
# Run discretize with the parameters defined in the cell above.
match.main(pattern_args)

FileNotFoundError: succ_traj/output.pickle not found.

## Run All

In [25]:
# For calling all steps directly. Note all parameters are specified manually here.
import argparse
from lpath.lpath import main

all_args = argparse.Namespace(
    # Common Parameters
    out_dir="succ_traj",  # Name of directory to output the trajectories.
    debug=False,  # Debug mode
    west_name='west.h5',  # Name of input HDF5 file (e.g., west.h5)
    assign_name='ANALYSIS/TEST/assign.h5',  # Name of output assign.h5 file
    rcfile='west.cfg',  # west.cfg file
    we=True,  # Analyzing a WE simulation.

    # Discretize Parameters
    input_name='dihedral.npy',  # Input data for state assignment. Something like 'dihedral.npy'.
    extract_input='discretized.npy',  # Output file name for the state assignment.
    assign_func='assign_func',  # Assign function that dictates how to assign states
    assign_args=argparse.Namespace(  # These are arguments for w_assign
        verbosity='verbose',  # Verbose or debug
        rcfile='west.cfg',  # west.cfg
        max_queue_length=None,
        we_h5filename='west.h5',  # west.h5 path
        construct_dataset=None,  # If you need some custom auxiliary dataset
        dsspecs=None,
        output='assign.h5',  # Output file
        subsample=None,
        config_from_file=True,  # Read config from rcfile
        scheme='TEST',  # Scheme name
    ),

    # Extract Parameters
    # Note west_name and assign_name are repeated from above and removed
    source_state_num=0,  # Index of the source state as defined in assign.h5.
    target_state_num=1,  # Index of the target state as defined in assign.h5.
    first_iter=1,  # First iteration to analyze. Inclusive
    last_iter=200,  # Last iteration to analyze. Inclusive. 0 implies it will analyze all labeled iterations.
    trace_basis=True,  # Option to analyze each successful trajectory up till its basis state.
    out_traj=False,  # Option to output trajectory files into `out_dir`. Will take much longer.
    out_traj_ext=".nc",  # Extension of the segment files. Defaults to `seg{out_traj_ext}`.
    out_state_ext=".ncrst",  # Extension of the restart files. Defaults to `seg{out_state_ext}`.
    out_top="system.prmtop",  # Name of the parameter file. Name relative to `$WEST_SIM_ROOT/common_files`.
    hdf5=False,  # Enable if trajectories are saved with the HDF5 Framework in WESTPA.
    rewrite_weights=False,  # Option to zero out the weights of all segments that are not a successful trajectory.
    pcoord=True,  # Option to output the pcoord into the `output.pickle`.
    auxdata=['phi', 'psi'],  # Additional auxiliary data to save into `output.pickle`.
    use_ray=False,  # Enable Ray.
    threads=0,  # How many Ray threads/actors to use. Defaults to 0, which wil use all auto-detected resources.
    extract_output="succ_traj/pathways.pickle",  # Name of the pickle file to be outputted.
    exclude_short=0,  # Exclude trajectories shorter than provided value during matching. 0 excludes none.


    # Match Parameters
    # Note west_name, assign_name and out_dir are repeated from above and removed
    match_output='succ_traj/pathways.pickle',  # Output file name of the new reassigned pathways from `lpath.match`
    reassign_method='reassign_identity',  # Reassign method. Could be a module to be loaded
    match_metric='longest_common_subsequence',  # Use the longest common subsequence metric.
    match_vanilla=False, # Whether to use the metric with a correction term
    dmatrix_remake=True,  # Enable to remake the distance matrix. If false, will proceed to load `dmatrix_save`.
    dmatrix_save='distmap.npy',  # If dmatrix_remake is False, load this file instead. Assumed located in {out_dir}.
    dmatrix_parallel=-1,  # Number of jobs to submit for distance matrix calculation. Set to -1 to use everything.
    dendrogram_threshold=0.5,  # Threshold for the Dendrogram
    dendrogram_show=True,  # Show the Dendrogram using plt.show()
    clusters=None,  # Cluster index to output... otherwise None --> All
    cl_output='succ_traj/cluster_labels.npy',  # Output path for cluster labels
    export_h5=True,  # Export H5 files
    file_pattern='west_succ_c{}.h5',  # Pattern to name cluster files
)

In [26]:
lpath.main(all_args)

FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = 'west.h5', errno = 2, error message = 'No such file or directory', flags = 1, o_flags = 2)