In [1]:
import os
import glob
import json
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
workdir = os.path.dirname(os.getcwd())
input_dir = Path(f"{workdir}/9_drug_protein_moa/data/")
module_path = Path("/home/bbc8731/HSV/3_module_expansion/data/categories_methods")

drug_target = pd.read_csv(os.path.join(input_dir, "drug_target_nedrex.csv"))
drug_target["drugbank_id"] = drug_target["sourceDomainId"].str.replace("drugbank.", "", regex=False)
drug_target["uniprot_id"] = drug_target["targetDomainId"].str.replace("uniprot.", "", regex=False)
drug_target["drug|protein"] = (drug_target["drugbank_id"].astype(str) + "|" + drug_target["uniprot_id"].astype(str))
drug_target = drug_target[["drug|protein", "interaction_label", "actions"]]
drug_target = drug_target[drug_target["actions"] != "[]"]
drug_target["actions"] = (drug_target["actions"].str.strip("[]").str.replace("'", "", regex=False))
drug_target["interaction_label"].unique()

array(['target_negativeEffect', 'target', 'target_positiveEffect'],
      dtype=object)

In [13]:
files = sorted(p for p in module_path.glob("*/") if p.name.startswith(("BP_", "CC_")))

# files = [('/home/bbc8731/HSV/3_module_expansion/data/categories_methods/BP_Egress_and_Envelopment')]

for p in files:
    candidate_drugs = pd.read_csv(f"{p}/drug_repurposing/trustrank/uniprot_ppi/validation/approved_drugs/candidate_drugs_scored_atc_code.csv")
    candidate_drugs = candidate_drugs.rename(columns={"score": "drug_score", 
                                                      "neg_log_score": "drug_neg_log_score", 
                                                      "cmpdname": "drug_name", 
                                                      "atc_code":"drug_atc_code",
                                                     "atc_label": "drug_atc_label"})

    protein_modules = pd.read_csv(f"{p}/drugability/protein_drugability_cutoff_4.csv")
    protein_modules = protein_modules[protein_modules["druggability_rank"] == 1]       # extract proteins with approved drugs
    protein_modules = protein_modules[["uniprot_id", "symbol", "ensembl_id"]]

    # identify all the drug-target combinations
    drug_protein_df = candidate_drugs.merge(protein_modules, how="cross")
    drug_protein_df["drug|protein"] = (drug_protein_df["drugbank_id"].astype(str) + "|" + drug_protein_df["uniprot_id"].astype(str))
    drug_protein = drug_protein_df.merge(drug_target, on = "drug|protein", how = "left")
    
    if drug_protein.empty:
        print(f"Skipping {p.name} — no valid drug-target actions.")
        continue

    output_dir = p / "repurposing_candidates"
    output_dir.mkdir(parents=True, exist_ok=True)
    drug_protein.to_csv(f"{output_dir}/drug_protein_candidates.csv", index=False)

In [11]:
drug_protein

Unnamed: 0,drugbank_id,drug_score,drug_neg_log_score,trustrank,drug_name,drug_atc_code,drug_atc_label,uniprot_id,symbol,ensembl_id,drug|protein,interaction_label,actions
0,DB12010,0.000060,4.220054,1,Fostamatinib,B02,Antihemorrhagics,Q13547,HDAC1,ENSG00000116478,DB12010|Q13547,,
1,DB01268,0.000053,4.278910,2,Sunitinib,L01,Antineoplastic agents,Q13547,HDAC1,ENSG00000116478,DB01268|Q13547,,
2,DB09079,0.000045,4.347601,3,Nintedanib,L01,Antineoplastic agents,Q13547,HDAC1,ENSG00000116478,DB09079|Q13547,,
3,DB06616,0.000042,4.381922,4,Bosutinib,L01,Antineoplastic agents,Q13547,HDAC1,ENSG00000116478,DB06616|Q13547,,
4,DB08865,0.000032,4.491486,5,Crizotinib,L01,Antineoplastic agents,Q13547,HDAC1,ENSG00000116478,DB08865|Q13547,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,DB08868,0.000002,5.614416,61,,,,Q13547,HDAC1,ENSG00000116478,DB08868|Q13547,target_negativeEffect,inhibitor
66,DB05223,0.000002,5.645760,62,,,,Q13547,HDAC1,ENSG00000116478,DB05223|Q13547,,
67,DB11830,0.000002,5.676191,63,,,,Q13547,HDAC1,ENSG00000116478,DB11830|Q13547,,
68,DB00061,0.000002,5.712086,64,,,,Q13547,HDAC1,ENSG00000116478,DB00061|Q13547,,


In [6]:
protein_modules

Unnamed: 0,uniprot_id,symbol,ensembl_id
0,Q13547,HDAC1,ENSG00000116478
