In [5]:
import pandas as pd
import os
import subprocess

## Check out the commands

What are the commands that we're running?

In [6]:
commands = pd.read_csv('data/dms-viz/datasets.csv')
commands.head()

Unnamed: 0,input,sitemap,name,metric,metric_name,structure,included_chains,excluded_chains,title,exclude_amino_acids
0,data/dms-viz/input/ZIKV_NS2B_NS3_DMS.csv,data/dms-viz/sitemap/5GJ4_sitemap.csv,ZIKV NS2B-NS3 (Open),log2effect,Log2(Effect),5GJ4,polymer,none,ZIKV NS2B-NS3 (Open) DMS,*
1,data/dms-viz/input/ZIKV_NS2B_NS3_DMS.csv,data/dms-viz/sitemap/5LC0_sitemap.csv,ZIKV NS2B-NS3 (Closed),log2effect,Log2(Effect),5LC0,polymer,none,ZIKV NS2B-NS3 (Closed) DMS,*


## Run `configure-dms-viz`

Run the `configure-dms-viz` command for each structure that we want to visualize.

In [7]:

def create_viz_json(input_df, sitemap_df, output_path, **kwargs):
    """
    Creates a visualization JSON file for a given experiment/dataset.

    Parameters
    ----------
    input_df : str
        Path to the input dataframe CSV (--input).
    sitemap_df : str
        Path to the sitemap dataframe CSV (--sitemap).
    output_path : str
        Path to the output path JSON (--output).

    Returns
    -------
    None
        Executes a subprocess command and does not return any value.
    """

    command = f"""
    configure-dms-viz format \
        --input "{input_df}" \
        --sitemap "{sitemap_df}" \
        --output "{output_path}" \
    """
    for key, value in kwargs.items():
        command += f' --{key} "{value}"'
    subprocess.run(command, shell=True, check=True)


def combine_jsons(input_files, output_file, description_file):
    """
    Combines multiple JSON files into a single file.

    Parameters
    ----------
    input_files : list of str
        List of paths to the input JSON files.
    output_file : str
        Path where the combined JSON should be saved.

    Returns
    -------
    None
        Executes a subprocess command and does not return any value.
    """
    command = f"""
    configure-dms-viz join \
        --input "{', '.join(input_files)}" \
        --output "{output_file}" \
        --description "{description_file}" \
    """
    subprocess.run(command, shell=True, check=True)

In [8]:
# Generate JSONs for each command
viz_jsons = []
for row in commands.itertuples():
    arguments = {
        key.replace("_", "-"): value
        for key, value in row._asdict().items()
        if key not in ["input", "sitemap", "Index"]
    }
    output_path = f"data/dms-viz/output/{row.name}.json"
    create_viz_json(row.input, row.sitemap, output_path, **arguments)
    viz_jsons.append(output_path)

# Combine JSONs into a single file
combine_jsons(viz_jsons, "data/dms-viz/output/ZIKV-NS2B-NS3-DMS.json", "data/dms-viz/README.md")


Formatting data for visualization using the 'log2effect' column from 'data/dms-viz/input/ZIKV_NS2B_NS3_DMS.csv'...

Using sitemap from 'data/dms-viz/sitemap/5GJ4_sitemap.csv'.
About 63.31% of the wildtype residues in the data match the corresponding residues in the structure.
About 36.36% of the data sites are missing from the structure.

Success! The visualization JSON was written to 'data/dms-viz/output/ZIKV NS2B-NS3 (Open).json'

Formatting data for visualization using the 'log2effect' column from 'data/dms-viz/input/ZIKV_NS2B_NS3_DMS.csv'...

Using sitemap from 'data/dms-viz/sitemap/5LC0_sitemap.csv'.
About 61.36% of the wildtype residues in the data match the corresponding residues in the structure.
About 37.66% of the data sites are missing from the structure.

Success! The visualization JSON was written to 'data/dms-viz/output/ZIKV NS2B-NS3 (Closed).json'

Success! 2 JSON files were merged and saved to 'data/dms-viz/output/ZIKV-NS2B-NS3-DMS.json'
