# Example WE Notebook for mPHAT

This notebook details steps towards running the whole mPHAT analysis with a WE simulation. Check our [Sphinx documentation](https://mphat.readthedocs.io) for more up-to-date information about each function.

By Jeremy Leung
Last updated: Apr 13th, 2023

## Common cells

In [None]:
# Imports for all steps
import argparse
from mphat import discretize, extract, match

## Discretize

In [None]:
# Arguments for `discretize` step.
discretize_args = argparse.Namespace(
    input_name='west.h5',  # Input data for state assignment. Something like 'dihedral.npy'.
    output_name='assign.h5',  # Output file name for the state assignment.
    assign_func='default_assign',  # Assign function that dictates how to assign states
    west_name='west.h5',  # Name of input HDF5 file (e.g., west.h5)
    assign_name='ANALYSIS/TEST/assign.h5',  # Name of output assign.h5 file
    rcfile='west.cfg', # west.cfg file
    assign_args=argparse.Namespace(  # These are arguments for w_assign
        verbosity='verbose',  # Verbose or debug
        rcfile="west.cfg",  # west.cfg
        max_queue_length=None,
        we_h5filename='west.h5',  # west.h5 path
        construct_dataset=None,  # If you need some custom auxiliary dataset
        dsspecs=None,
        output='assign.h5',  # Output file
        subsample=None,
        config_from_file=True,  # Read config from rcfile
        scheme='TEST',  # Scheme name
    ),
)

In [None]:
# Run discretize with the parameters defined in the cell above.
discretize.main(discretize_args)

## Extract

In [None]:
# Arguments for the `extract` step.
extract_args = argparse.Namespace(
    west_name='west.h5',  # Name of input HDF5 file (e.g., west.h5)
    assign_name='ANALYSIS/C7_EQ/assign.h5',  # Name of input assign.h5 file
    source_state_num=0,  # Index of the source state as defined in assign.h5.
    target_state_num=1,  # Index of the target state as defined in assign.h5.
    first_iter=1,  # First iteration to analyze. Inclusive
    last_iter=200,  # Last iteration to analyze. Inclusive. 0 implies it will analyze all labeled iterations.
    trace_basis=True,  # Option to analyze each successful trajectory up till its basis state.
    out_traj=False,  # Option to output trajectory files into `out_dir`. Will take much longer.
    out_traj_ext='.nc',  # Extension of the segment files. Defaults to `seg{out_traj_ext}`.
    out_state_ext='.ncrst',  # Extension of the restart files. Defaults to `seg{out_state_ext}`.
    out_top='system.prmtop',  # Name of the parameter file. Name relative to `$WEST_SIM_ROOT/common_files`.
    out_dir='succ_traj',  # Name of directory to output the trajectories.
    hdf5=False,  # Enable if trajectories are saved with the HDF5 Framework in WESTPA.
    rewrite_weights=False,  # Option to zero out the weights of all segments that are not a successful trajectory.
    pcoord=True,  # Option to output the pcoord into the `output.pickle`.
    auxdata=['phi', 'psi'],  # Additional auxiliary data to save into `output.pickle`.
    use_ray=False,  # Enable Ray.
    threads=0,  # How many Ray threads/actors to use. Defaults to 0, which wil use all auto-detected resources.
)

In [None]:
# Run match with the parameters defined in the cell above.
extract.main(extract_args)

## Pattern Match

In [None]:
# Arguments for the `match` step.
pattern_args = argparse.Namespace(
    input_pickle='succ_traj/output.pickle',  # Input file name of the pickle from `extract.py`
    west_name='multi.h5',  # Name of input HDF5 file (e.g., west.h5)
    assign_name='ANALYSIS/ALL/assign.h5',  # Name of input assign.h5 file
    dmatrix_remake=True,  # Enable to remake the distance Matrix
    dmatrix_save='distmap.npy',  # If dmatrix_remake is False, load this file instead. Assumed located in {out_dir}.
    dendrogram_threshold=0.5,  # Threshold for the Dendrogram
    dendrogram_show=True,  # Show the Dendrogram using plt.show()
    out_dir='succ_traj',  # Output for the distance Matrix
    cl_output='succ_traj/cluster_labels.npy',  # Output path for cluster labels
    file_pattern='west_succ_c{}.h5',  # Pattern to name cluster files
    clusters=None,  # Cluster index to output... otherwise None --> All
    reassign_method='reassign_identity',  # Reassign method. Could be a module to be loaded.
)

In [None]:
# Run discretize with the parameters defined in the cell above.
match.main(match_args)