In [6]:
import os
import json
import glob
from pathlib import Path
import pandas as pd

In [7]:
plot_dir = "/home/bbc8731/HSV/6_drug_repurposing/data/plot/"

In [8]:
path = Path("/home/bbc8731/HSV/3_module_expansion/data/categories_methods/")
dirs = path.rglob("drug_repurposing/trustrank/uniprot_ppi/validation")

approved_rows = []
random_rows = []

for d in dirs:
    experiment = d.parents[3].name

    approved_path = d / "approved_drugs" / "uniprot_ppi_drug_validation.json"
    random_path   = d / "approved_experimental_drugs" / "uniprot_ppi_drug_validation.json"

    if approved_path.exists():
        with open(approved_path) as f:
            approved = json.load(f)

        approved_rows.append({
            "experiment": experiment,
            "p_value": approved["empirical DCG-based p-value"],
            "recall": approved["recall"]
        })

    if random_path.exists():
        with open(random_path) as f:
            random = json.load(f)

        random_rows.append({
            "experiment": experiment,
            "p_value": random["empirical DCG-based p-value"],
            "recall": random["recall"]
        })


In [10]:
import matplotlib.pyplot as plt

# Sort by recall
data = sorted(approved_rows, key=lambda x: x['recall'], reverse=True)

experiments = [d['experiment'] for d in data]
recalls = [d['recall'] for d in data]
pvals = [d['p_value'] for d in data]

plt.figure()

bars = plt.barh(range(len(experiments)), recalls)

plt.yticks(range(len(experiments)), experiments)
plt.xlabel("Recall")

# Invert y-axis so highest recall is on top
plt.gca().invert_yaxis()

# Annotate p-values
for i, (recall, pval) in enumerate(zip(recalls, pvals)):
    plt.text(recall + 0.005, i, f"p={pval:.4g}", va='center')

ax = plt.gca()

# Keep only left and bottom axes
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

plt.tight_layout()
# plt.show()

plt.savefig(os.path.join(plot_dir, "recall_pval_DR_validation.pdf"), format="pdf")
plt.close()


1️⃣ CC_Host_Nucleus → Highest relevance

HSV is a DNA virus. After entry, the capsid docks at the nuclear pore and the viral genome is released into the host nucleus, where:

Viral DNA replication occurs

Viral transcription is carried out using host RNA polymerase II


Capsid assembly initiates

Anything modulating:

Nuclear import/export

Chromatin regulation

DNA replication machinery

Transcription factors

is directly relevant to HSV biology.
From a mechanistic standpoint, this is the central compartment of HSV infection.

2️⃣ CC_Host_Membrane → Strong for entry-focused strategies

HSV entry depends on:

Host membrane receptors (e.g., nectins, HVEM)

Membrane fusion machinery

If your repurposing angle is blocking infection at the earliest step, membrane-associated proteins are very relevant.

3️⃣ CC_Host_Cytoplasm → Moderate relevance

The cytoplasm is involved in:

Capsid transport along microtubules

Tegument protein activity

Innate immune signaling

Relevant, but less central than the nucleus.

4️⃣ CC_Virion_Components → Virus-specific, not cellular

Important for direct antiviral targeting, but this is not a host compartment. Repurposing approved drugs against virion structural proteins is harder unless there’s enzymatic activity.