In [1]:
import modules.secret
from modules.Graph import Graph
import modules.ScoreDistribution as sc_dist
from modules.Style import Style
import os
from typing import Dict

SCORING_PATH = modules.secret.PRODUCTION_RUNS_PATH + "5. Scoring/scored_dataframes/"
LIGAND_EXPORT_PATH = os.path.join(os.getcwd(), "exports", "ligand_distribution", "")
CLUSTER_EXPORT_PATH = os.path.join(os.getcwd(), "exports", "cluster_distribution", "")

# Ligand Score Distribution


## No-Filters


In [5]:
configs = [
    ("mix", 100, 5, "softsub", 5, 0),
    ("mix", 100, 5, "softdiv", 5, 0),
    ("mix", 100, 5, "linear", 5, 0),
    ("mix", 100, 5, "diffusion", 5, 0),
    ("mix", 10, 5, "softsub", 5, 0),
    ("mqn", 100, 5, "softsub", 5, 0),
    ("mqn", 100, 5, "diffusion", 5, 0),
    ("mqn", 10, 5, "softsub", 5, 0),
    ("", None, 5, "random", 5, 0),
]
clusterize = False
for descriptors_type, n_clusters, n_iters, channel, xshift, yshift in configs:
    prefix = "model7"
    fnames, labels = sc_dist.prepare_cycle_config(
        prefix=prefix,
        descriptors_type=descriptors_type,
        n_clusters=n_clusters,
        n_iters=n_iters,
        channel=channel,
    )
    traces, metrics = sc_dist.prepare_score_distribution_traces(
        scoring_path=SCORING_PATH,
        fnames=fnames,
        labels=labels,
        colors=["dark_grey", "teal", "purple", "orange", "red", "green"],
        clusterize=clusterize,
        aggregation_mode=agg_mode,
        threshold=11,
        bin_step=1 if clusterize else 2,
        trace_opacity=0.7,
        show_bars=False,
        show_density=True,
        density_line_opacity=0.8,
        density_fill="tozeroy",
        density_fill_opacity=0.2,
    )
    mode = f"clusters_{agg_mode}" if clusterize else "ligands"
    sc_dist.export_metrics_to_table(
        data_list=metrics,
        threshold=11,
        save_path=CLUSTER_EXPORT_PATH if clusterize else LIGAND_EXPORT_PATH,
        save_fname=f"{mode}_{prefix}_{descriptors_type}_k{n_clusters}_{channel}_al{n_iters}",
    )
    fig = sc_dist.plot_hist_density(
        traces=traces,
        threshold=11,
        threshold_xshift=xshift,
        threshold_yshift=yshift,
    )
    graph = Graph()
    graph.update_parameters(
        dict(
            xaxis_title="Cluster Score"
            if clusterize
            else "Attractive Interaction Score",
            yaxis_title="Relative Frequency",
            title_size=36,
            axis_title_size=28,
            tick_font_size=24,
            yaxis_standoff=10,
            title_ycoord=0.97,
            t_margin=60,
            annotation_size=24,
            annotation_color=Style.biscale["dark_grey"][0](0.6),
            width=1280,
            height=600,
            yrange=[0, 0.1],
            xrange=[0, 50],
        )
    )
    graph.style_figure(fig)
    fig.update_layout(
        legend=dict(
            x=0.9,
            y=0.55,
            xanchor="left",
            yanchor="middle",
            font=dict(size=32),
            # itemsizing="constant",
        )
    )
    graph.save_figure(
        figure=fig,
        path=CLUSTER_EXPORT_PATH if clusterize else LIGAND_EXPORT_PATH,
        fname=f"{descriptors_type}_k{n_clusters}_{channel}_al{n_iters}",
        svg=True,
    )

## Filter Runs


In [3]:
configs = [
    # ("model7_hnh_admet", "HNH", "ADMET", "softsub", 5, 0),
    # ("model7_hnh_admetfg", "HNH", "ADMET+FGs", "softsub", 5, 0),
    # ("model2_hnh", "HNH", "ADMET+FGs", "admetfg_softsub", 5, 0),
    # ("model7_1iep_admet", "1IEP", "ADMET", "softsub", 10, 0.02),
    # ("model7_1iep_admetfg", "1IEP", "ADMET+FGs", "softsub", 10, 0.02),
    # ("model2_1iep", "1IEP", "ADMET+FGs", "admetfg_softsub", 20, 0),
    ("model7_1iep_admetfg", "1IEP", "ADMET+FGs", "random", 5, 0),
    ("model7_1iep_admetfg", "1IEP", "ADMET+FGs", "randomwsampling", 5, 0),
    ("model7_1iep_admetfg", "1IEP", "ADMET+FGs", "diffusion", 5, 0),
]
target_to_params: Dict[str, Dict[str, int]] = {
    "threshold": {"HNH": 11, "1IEP": 37},
    "xrange": {"HNH": [0, 50], "1IEP": [0, 100]},
    "yrange": {"HNH": [0, 0.13], "1IEP": [0, 0.07]},
}
clusterize = False
for prefix, protein, filters, channel, xshift, yshift in configs:
    fnames, labels = sc_dist.prepare_cycle_config(
        prefix=prefix,
        descriptors_type="mix",
        n_iters=5,
        channel=channel,
        n_clusters=100,
        filters=filters,
        target=protein,
    )
    traces, metrics = sc_dist.prepare_score_distribution_traces(
        scoring_path=SCORING_PATH,
        fnames=fnames,
        labels=labels,
        colors=["dark_grey", "teal", "purple", "orange", "red", "green"],
        clusterize=clusterize,
        threshold=target_to_params["threshold"][protein],
        bin_step=1 if clusterize else 2,
        trace_opacity=0.7,
        show_bars=False,
        show_density=True,
        density_line_opacity=0.8,
        density_fill="tozeroy",
        density_fill_opacity=0.2,
    )
    mode = "clusters" if clusterize else "ligands"
    sc_dist.export_metrics_to_table(
        data_list=metrics,
        threshold=target_to_params["threshold"][protein],
        save_path=LIGAND_EXPORT_PATH,
        save_fname=f"{mode}_{prefix}_k100_{channel}_al5",
    )
    fig = sc_dist.plot_hist_density(
        traces=traces,
        threshold=target_to_params["threshold"][protein],
        threshold_xshift=xshift,
        threshold_yshift=yshift,
    )
    graph = Graph()
    graph.update_parameters(
        dict(
            xaxis_title="Cluster Score"
            if clusterize
            else "Attractive Interaction Score",
            yaxis_title="Relative Frequency",
            title_size=36,
            axis_title_size=28,
            tick_font_size=24,
            yaxis_standoff=10,
            title_ycoord=0.97,
            t_margin=60,
            annotation_size=24,
            annotation_color=Style.biscale["dark_grey"][0](0.6),
            width=1280,
            height=600,
            yrange=target_to_params["yrange"][protein],
            xrange=target_to_params["xrange"][protein],
        )
    )
    graph.style_figure(fig)
    fig.update_layout(
        legend=dict(
            x=0.9,
            y=0.55,
            xanchor="left",
            yanchor="middle",
            font=dict(size=32),
            # itemsizing="constant",
        )
    )
    graph.save_figure(
        figure=fig,
        path=CLUSTER_EXPORT_PATH if clusterize else LIGAND_EXPORT_PATH,
        fname=f"{prefix}_{protein}_{filters}_{channel}_al5",
        svg=True,
    )

# Cluster Score Distribution


## No Filters


In [8]:
configs = [
    ("mix", 100, 5, "softsub", 5, 0),
    ("mix", 100, 5, "softdiv", 5, 0),
    ("mix", 100, 5, "linear", 5, 0),
    ("mix", 100, 5, "diffusion", 5, 0),
    ("mix", 10, 5, "softsub", 5, 0),
    ("mqn", 100, 5, "softsub", 5, 0),
    ("mqn", 100, 5, "diffusion", 5, 0),
    ("mqn", 10, 5, "softsub", 5, 0),
]
clusterize = True
for agg_mode in ("mean", "median"):
    for descriptors_type, n_clusters, n_iters, channel, xshift, yshift in configs:
        prefix = "model7"
        fnames, labels = sc_dist.prepare_cycle_config(
            prefix=prefix,
            descriptors_type=descriptors_type,
            n_clusters=n_clusters,
            n_iters=n_iters,
            channel=channel,
        )
        traces, metrics = sc_dist.prepare_score_distribution_traces(
            scoring_path=SCORING_PATH,
            fnames=fnames,
            labels=labels,
            colors=["dark_grey", "teal", "purple", "orange", "red", "green"],
            clusterize=clusterize,
            aggregation_mode=agg_mode,
            threshold=11,
            bin_step=1 if clusterize else 2,
            trace_opacity=0.7,
            show_bars=False,
            show_density=True,
            density_line_opacity=0.8,
            density_fill="tozeroy",
            density_fill_opacity=0.2,
        )
        mode = f"clusters_{agg_mode}" if clusterize else "ligands"
        sc_dist.export_metrics_to_table(
            data_list=metrics,
            threshold=11,
            save_path=CLUSTER_EXPORT_PATH if clusterize else LIGAND_EXPORT_PATH,
            save_fname=f"{mode}_{prefix}_{descriptors_type}_k{n_clusters}_{channel}_al{n_iters}",
        )
        fig = sc_dist.plot_hist_density(
            traces=traces,
            threshold=11,
            threshold_xshift=xshift,
            threshold_yshift=yshift,
        )
        if n_clusters == 100:
            if descriptors_type == "mix":
                yrange = [0, 0.18]
            else:
                yrange = [0, 0.2]
        elif n_clusters == 10:
            if descriptors_type == "mix":
                yrange = [0, 0.22]
            else:
                yrange = [0, 0.4]
        graph = Graph()
        graph.update_parameters(
            dict(
                xaxis_title="Cluster Score"
                if clusterize
                else "Attractive Interaction Score",
                yaxis_title="Relative Frequency",
                title_size=36,
                axis_title_size=28,
                tick_font_size=24,
                yaxis_standoff=10,
                title_ycoord=0.97,
                t_margin=60,
                annotation_size=24,
                annotation_color=Style.biscale["dark_grey"][0](0.6),
                width=1280,
                height=600,
                yrange=yrange,
                xrange=[0, 50],
            )
        )
        graph.style_figure(fig)
        fig.update_layout(
            legend=dict(
                x=0.9,
                y=0.55,
                xanchor="left",
                yanchor="middle",
                font=dict(size=32),
                # itemsizing="constant",
            )
        )
        graph.save_figure(
            figure=fig,
            path=CLUSTER_EXPORT_PATH if clusterize else LIGAND_EXPORT_PATH,
            fname=f"k{n_clusters}_{descriptors_type}_{agg_mode}_{channel}_al{n_iters}",
            svg=True,
        )

## Filter Runs


In [4]:
configs = [
    # ("model7_hnh_admet", "HNH", "ADMET", "softsub", 5, 0),
    # ("model7_hnh_admetfg", "HNH", "ADMET+FGs", "softsub", 5, 0),
    # ("model2_hnh", "HNH", "ADMET+FGs", "admetfg_softsub", 5, 0),
    # ("model7_1iep_admet", "1IEP", "ADMET", "softsub", 10, 0.02),
    # ("model7_1iep_admetfg", "1IEP", "ADMET+FGs", "softsub", 10, 0.02),
    # ("model2_1iep", "1IEP", "ADMET+FGs", "admetfg_softsub", 20, 0),
    ("model7_1iep_admetfg", "1IEP", "ADMET+FGs", "diffusion", 5, 0),
]
target_to_params: Dict[str, Dict[str, int]] = {
    "threshold": {"HNH": 11, "1IEP": 37},
    "xrange": {"HNH": [0, 35], "1IEP": [0, 70]},
    "yrange": {"HNH": [0, 0.35], "1IEP": [0, 0.13]},
}
clusterize = True
for agg_mode in ("mean", "median"):
    for prefix, protein, filters, channel, xshift, yshift in configs:
        fnames, labels = sc_dist.prepare_cycle_config(
            prefix=prefix,
            descriptors_type="mix",
            n_iters=5,
            channel=channel,
            n_clusters=100,
            filters=filters,
            target=protein,
        )
        traces, metrics = sc_dist.prepare_score_distribution_traces(
            scoring_path=SCORING_PATH,
            fnames=fnames,
            labels=labels,
            colors=["dark_grey", "teal", "purple", "orange", "red", "green"],
            clusterize=clusterize,
            aggregation_mode=agg_mode,
            threshold=target_to_params["threshold"][protein],
            bin_step=1 if clusterize else 2,
            trace_opacity=0.7,
            show_bars=False,
            show_density=True,
            density_line_opacity=0.8,
            density_fill="tozeroy",
            density_fill_opacity=0.2,
        )
        mode = "clusters" if clusterize else "ligands"
        sc_dist.export_metrics_to_table(
            data_list=metrics,
            threshold=target_to_params["threshold"][protein],
            save_path=CLUSTER_EXPORT_PATH if clusterize else LIGAND_EXPORT_PATH,
            save_fname=f"{mode}_{prefix}_k100_{channel}_al5",
        )
        fig = sc_dist.plot_hist_density(
            traces=traces,
            threshold=target_to_params["threshold"][protein],
            threshold_xshift=xshift,
            threshold_yshift=yshift,
        )
        graph = Graph()
        graph.update_parameters(
            dict(
                xaxis_title="Cluster Score"
                if clusterize
                else "Attractive Interaction Score",
                yaxis_title="Relative Frequency",
                title_size=36,
                axis_title_size=28,
                tick_font_size=24,
                yaxis_standoff=10,
                title_ycoord=0.97,
                t_margin=60,
                annotation_size=24,
                annotation_color=Style.biscale["dark_grey"][0](0.6),
                width=1280,
                height=600,
                yrange=target_to_params["yrange"][protein],
                xrange=target_to_params["xrange"][protein],
            )
        )
        graph.style_figure(fig)
        fig.update_layout(
            legend=dict(
                x=0.9,
                y=0.55,
                xanchor="left",
                yanchor="middle",
                font=dict(size=32),
                # itemsizing="constant",
            )
        )
        graph.save_figure(
            figure=fig,
            path=CLUSTER_EXPORT_PATH if clusterize else LIGAND_EXPORT_PATH,
            fname=f"{prefix}_{protein}_{filters}_{channel}_{agg_mode}",
            svg=True,
        )