*this notebook uses a venv created by using uv*
- https://docs.astral.sh/uv/guides/integration/jupyter/#using-jupyter-from-vs-code

In [1]:
import pandas as pd

In [2]:
data = pd.read_csv("CYP3A4_strong_substrates")
data

Unnamed: 0,generic_drug_name,cyp_strength_of_evidence,drug_class,common_adverse_effects^^,less_common_adverse_effects^,first_ref,second_ref,date_checked
0,carbamazepine,strong,antiepileptics,"constipation^^, leucopenia^^, dizziness^^, som...","eosinophilia^, thrombocytopenia^, neutropenia^...",drugs.com,nzf,211024
1,eliglustat,strong,metabolic_agents,"diarrhea^^, oropharyngeal_pain^^, arthralgia^^...","rash^, flatulence^, dyspepsia^, gastroesophage...",drugs.com,emc,151124
2,flibanserin,strong,CNS_agents,"dizziness^^, somnolence^^","sedation^, fatigue^, vertigo^, accidental_inju...",drugs.com,Drugs@FDA,161124
3,imatinib,strong,tyrosine_kinase_inhibitor,"rash^^, diarrhea^^, abdominal_pain^^, constipa...","flushing^, pruritus^, face_edema^, dry skin^, ...",drugs.com,nzf,181124
4,ibrutinib,strong,tyrosine_kinase_inhibitor,"hypertension^^, atrial_fibrillation^^, sinus_t...","atrial_flutter^, cardiac_failure(pm)^, ventric...",drugs.com,nzf,191124
5,neratinib,strong,tyrosine_kinase_inhibitor,"diarrhea^^, abdominal_pain^^, stomatitis^^, dy...","abdominal_distention^, dry_mouth^, nail_disord...",drugs.com,nzf,201124
6,esomeprazole,strong,proton_pump_inhibitors,"headache^^, flatulence^^","dizziness^, somnolence^, taste_disturbance/per...",drugs.com,emc,161124
7,omeprazole,strong,proton_pump_inhibitors,"fever^^, otitis_media^^, respiratory_system_re...","accidental_injury^, asthenia^, pain(pm), fatig...",drugs.com,nzf,181124
8,ivacaftor,strong,CFTR_potentiator,"rash^^, oropharyngeal_pain^^, abdominal_pain^^...","acne^, increased_hepatic_enzymes^, increased_b...",drugs.com,nzf,201124
9,naloxegol,strong,peripheral_opioid_receptor_antagonists,abdominal pain^^,"possible_opioid_withdrawal_syndrome^, diarrhea...",drugs.com,emc,211124


In [3]:
# drop some columns
df = data.drop([
    "cyp_strength_of_evidence", 
    "drug_class", 
    "less_common_adverse_effects^", 
    "first_ref", 
    "second_ref", 
    "date_checked"
    ], axis=1)
df

Unnamed: 0,generic_drug_name,common_adverse_effects^^
0,carbamazepine,"constipation^^, leucopenia^^, dizziness^^, som..."
1,eliglustat,"diarrhea^^, oropharyngeal_pain^^, arthralgia^^..."
2,flibanserin,"dizziness^^, somnolence^^"
3,imatinib,"rash^^, diarrhea^^, abdominal_pain^^, constipa..."
4,ibrutinib,"hypertension^^, atrial_fibrillation^^, sinus_t..."
5,neratinib,"diarrhea^^, abdominal_pain^^, stomatitis^^, dy..."
6,esomeprazole,"headache^^, flatulence^^"
7,omeprazole,"fever^^, otitis_media^^, respiratory_system_re..."
8,ivacaftor,"rash^^, oropharyngeal_pain^^, abdominal_pain^^..."
9,naloxegol,abdominal pain^^


In [5]:
# Change data types for ADRs
df = df.astype({"generic_drug_name": "string", "common_adverse_effects^^": "string"})
df.dtypes

generic_drug_name           string[python]
common_adverse_effects^^    string[python]
dtype: object

In [6]:
# Expand the common ADR column
# alternative way is to explode:
#data["common_adverse_effects^^"].explode()

adr = df["common_adverse_effects^^"].str.split(expand=True)
adr


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,58,59,60,61,62,63,64,65,66,67
0,"constipation^^,","leucopenia^^,","dizziness^^,","somnolence^^,","ataxia^^,",elevated,"GGT^^,",allergic_skin_reactions^^,,,...,,,,,,,,,,
1,"diarrhea^^,","oropharyngeal_pain^^,","arthralgia^^,","back_pain^^,","pain_in_extremity^^,","upper_abdominal_pain^^,","headache^^,","migraine^^,",fatigue^^,,...,,,,,,,,,,
2,"dizziness^^,",somnolence^^,,,,,,,,,...,,,,,,,,,,
3,"rash^^,","diarrhea^^,","abdominal_pain^^,","constipation^^,","dyspepsia^^,","hemorrhage^^,","neutropenia^^,","thrombocytopenia^^,","anemia^^,","influenza^^,",...,,,,,,,,,,
4,"hypertension^^,","atrial_fibrillation^^,","sinus_tachycardia^^,","rash^^,","skin_infection^^,","pruritus^^,","diarrhea^^,","stomatitis^^,","abdominal_pain^^,","constipation^^,",...,"upper_RTI^^,","cough^^,","pneumonia^^,","dyspnea^^,","sinusitis^^,","oropharyngeal_pain^^,","bronchitis^^,","nasopharyngitis^^,","influenza^^,",viral_upper_RTI^^
5,"diarrhea^^,","abdominal_pain^^,","stomatitis^^,","dyspepsia^^,","fatigue^^,","rash^^,","decreased_appetite^^,",muscle_spasms^^,,,...,,,,,,,,,,
6,"headache^^,",flatulence^^,,,,,,,,,...,,,,,,,,,,
7,"fever^^,","otitis_media^^,","respiratory_system_reactions^^,","taste_disturbance^^,",diarrhea^^,,,,,,...,,,,,,,,,,
8,"rash^^,","oropharyngeal_pain^^,","abdominal_pain^^,","diarrhea^^,","transaminase_elevations^^,","headache^^,","pyrexia^^,","upper_RTI^^,","nasal_congestion^^,","nasopharyngitis^^,",...,,,,,,,,,,
9,abdominal,pain^^,,,,,,,,,...,,,,,,,,,,


In [7]:
# Merge dfs
df = df.join(adr)
df

Unnamed: 0,generic_drug_name,common_adverse_effects^^,0,1,2,3,4,5,6,7,...,58,59,60,61,62,63,64,65,66,67
0,carbamazepine,"constipation^^, leucopenia^^, dizziness^^, som...","constipation^^,","leucopenia^^,","dizziness^^,","somnolence^^,","ataxia^^,",elevated,"GGT^^,",allergic_skin_reactions^^,...,,,,,,,,,,
1,eliglustat,"diarrhea^^, oropharyngeal_pain^^, arthralgia^^...","diarrhea^^,","oropharyngeal_pain^^,","arthralgia^^,","back_pain^^,","pain_in_extremity^^,","upper_abdominal_pain^^,","headache^^,","migraine^^,",...,,,,,,,,,,
2,flibanserin,"dizziness^^, somnolence^^","dizziness^^,",somnolence^^,,,,,,,...,,,,,,,,,,
3,imatinib,"rash^^, diarrhea^^, abdominal_pain^^, constipa...","rash^^,","diarrhea^^,","abdominal_pain^^,","constipation^^,","dyspepsia^^,","hemorrhage^^,","neutropenia^^,","thrombocytopenia^^,",...,,,,,,,,,,
4,ibrutinib,"hypertension^^, atrial_fibrillation^^, sinus_t...","hypertension^^,","atrial_fibrillation^^,","sinus_tachycardia^^,","rash^^,","skin_infection^^,","pruritus^^,","diarrhea^^,","stomatitis^^,",...,"upper_RTI^^,","cough^^,","pneumonia^^,","dyspnea^^,","sinusitis^^,","oropharyngeal_pain^^,","bronchitis^^,","nasopharyngitis^^,","influenza^^,",viral_upper_RTI^^
5,neratinib,"diarrhea^^, abdominal_pain^^, stomatitis^^, dy...","diarrhea^^,","abdominal_pain^^,","stomatitis^^,","dyspepsia^^,","fatigue^^,","rash^^,","decreased_appetite^^,",muscle_spasms^^,...,,,,,,,,,,
6,esomeprazole,"headache^^, flatulence^^","headache^^,",flatulence^^,,,,,,,...,,,,,,,,,,
7,omeprazole,"fever^^, otitis_media^^, respiratory_system_re...","fever^^,","otitis_media^^,","respiratory_system_reactions^^,","taste_disturbance^^,",diarrhea^^,,,,...,,,,,,,,,,
8,ivacaftor,"rash^^, oropharyngeal_pain^^, abdominal_pain^^...","rash^^,","oropharyngeal_pain^^,","abdominal_pain^^,","diarrhea^^,","transaminase_elevations^^,","headache^^,","pyrexia^^,","upper_RTI^^,",...,,,,,,,,,,
9,naloxegol,abdominal pain^^,abdominal,pain^^,,,,,,,...,,,,,,,,,,


In [8]:
df = df.drop(["common_adverse_effects^^"], axis=1)
df

Unnamed: 0,generic_drug_name,0,1,2,3,4,5,6,7,8,...,58,59,60,61,62,63,64,65,66,67
0,carbamazepine,"constipation^^,","leucopenia^^,","dizziness^^,","somnolence^^,","ataxia^^,",elevated,"GGT^^,",allergic_skin_reactions^^,,...,,,,,,,,,,
1,eliglustat,"diarrhea^^,","oropharyngeal_pain^^,","arthralgia^^,","back_pain^^,","pain_in_extremity^^,","upper_abdominal_pain^^,","headache^^,","migraine^^,",fatigue^^,...,,,,,,,,,,
2,flibanserin,"dizziness^^,",somnolence^^,,,,,,,,...,,,,,,,,,,
3,imatinib,"rash^^,","diarrhea^^,","abdominal_pain^^,","constipation^^,","dyspepsia^^,","hemorrhage^^,","neutropenia^^,","thrombocytopenia^^,","anemia^^,",...,,,,,,,,,,
4,ibrutinib,"hypertension^^,","atrial_fibrillation^^,","sinus_tachycardia^^,","rash^^,","skin_infection^^,","pruritus^^,","diarrhea^^,","stomatitis^^,","abdominal_pain^^,",...,"upper_RTI^^,","cough^^,","pneumonia^^,","dyspnea^^,","sinusitis^^,","oropharyngeal_pain^^,","bronchitis^^,","nasopharyngitis^^,","influenza^^,",viral_upper_RTI^^
5,neratinib,"diarrhea^^,","abdominal_pain^^,","stomatitis^^,","dyspepsia^^,","fatigue^^,","rash^^,","decreased_appetite^^,",muscle_spasms^^,,...,,,,,,,,,,
6,esomeprazole,"headache^^,",flatulence^^,,,,,,,,...,,,,,,,,,,
7,omeprazole,"fever^^,","otitis_media^^,","respiratory_system_reactions^^,","taste_disturbance^^,",diarrhea^^,,,,,...,,,,,,,,,,
8,ivacaftor,"rash^^,","oropharyngeal_pain^^,","abdominal_pain^^,","diarrhea^^,","transaminase_elevations^^,","headache^^,","pyrexia^^,","upper_RTI^^,","nasal_congestion^^,",...,,,,,,,,,,
9,naloxegol,abdominal,pain^^,,,,,,,,...,,,,,,,,,,


In [9]:
df.stack(future_stack=True)

0   generic_drug_name      carbamazepine
    0                    constipation^^,
    1                      leucopenia^^,
    2                       dizziness^^,
    3                      somnolence^^,
                              ...       
12  63                              <NA>
    64                              <NA>
    65                              <NA>
    66                              <NA>
    67                              <NA>
Length: 897, dtype: string

More ideas needed re. **ADRs <-> dense vectors <-> 2D drug structures**
(SAR: drug activities <-> 2d drug structures)

1. may try to achieve ADRs -> dense vectors part first...
- do a ADRs EDA & classifications initially
- separate into diff. ADR lists for diff. drugs? -> convert into tensors

2. 2D drug structures part - graph neural networks (GNN): molecules as undirected graphs where the connections between nodes (atoms) and edges (bonds) don't matter (i.e. don't need to be in particular orders or sequences)