In [39]:
import pandas as pd
import numpy as np

ruiz_df = pd.read_excel('../data/indication_gene_link/ruiz.xlsx')
ruiz_df.head(30)

Unnamed: 0,drug,drug_name,indication,indication_name
0,DB00001,Lepirudin,C0040038,Thromboembolism
1,DB00001,Lepirudin,C0002965,"Angina, Unstable"
2,DB00001,Lepirudin,C0040053,thrombosis
3,DB00002,Cetuximab,C0007102,Malignant Tumor Of Colon
4,DB00002,Cetuximab,C1168401,head and neck squamous cell carcinoma (HNSCC)
5,DB00004,Denileukin diftitox,C0079773,cutaneous T-cell lymphoma (CTCL)
6,DB00005,Etanercept,C0003873,rheumatoid arthritis
7,DB00005,Etanercept,C3495559,Juvenile Arthritis
8,DB00005,Etanercept,C0003872,psoriatic arthritis
9,DB00005,Etanercept,C0038013,ankylosing spondylitis


In [40]:
disease_synonyms = {
    "Attention Deficit Hyperactivity Disorder": ["ADHD", "Attention", "Hyperkinetic Syndrome", "Brain Dysfunction"],
    "Alzheimer’s Disease": ["Alzheimer"],
    "Anxiety Disorder": ["Anxiety", "Hypervigilance", "Nervousness", "Anxiousness", "Anxieties"],
    "Autism Spectrum Disorder": ["ASD", "Autism", "Autistic"],
    "Bipolar Disorder": ["Manic Depression", "Bipolar", "BPAD", "Bipolar disorder"],
    "Eating Disorders": ["Eating", "Feeding"],
    "Major Depressive Disorder": ["Depressive", "MDD", "Involutional Paraphrenia", "depression"],
    "OCD & Tourette Syndrome": ["Obsessive", "OCD", "Tourette", "Compulsive"],
    "Post Traumatic Stress Disorder": ["Post Traumatic Stress Disorder", "PTSD", "Post-Traumatic Stress"],
    "Schizophrenia": ["Schizoaffective Disorder", "Schizophrenia"],
    "Substance Use Disorders": ["Substance Use Disorders", "SUD", "Drug Use", "Drug Abuse", "Substance Dependence", "Opioid dependence", "Opioid use","Cannabis use disorder", "Alcohol use", "Alcohol dependence"]
}

def find_main_disease(indication_name):
    indication_name_lower = indication_name.lower()
    for main_disease, synonyms in disease_synonyms.items():
        for synonym in synonyms:
            if synonym.lower() in indication_name_lower:
                return main_disease
    return None

ruiz_df['Disease'] = ruiz_df['indication_name'].apply(find_main_disease)

filtered_df = ruiz_df.dropna(subset=['Disease'])

print(filtered_df)

         drug        drug_name indication  \
264   DB00130    glutamine-(l)   C0003467   
326   DB00163        Vitamin E   C0002395   
354   DB00176      fluvoxamine   C0028768   
379   DB00182      Amphetamine   C1263846   
385   DB00186        lorazepam   C0003467   
...       ...              ...        ...   
5887  DB01622  thioproperazine   C0036341   
5910  DB00150       tryptophan   C0003467   
5922  DB09225         zotepine   C0036341   
5923  DB01624   zuclopenthixol   C0036341   
5924  DB01624   zuclopenthixol   C0005586   

                                      indication_name  \
264                                           anxiety   
326                               Alzheimer's disease   
354               obsessive compulsive disorder (OCD)   
379   attention-deficit/hyperactivity disorder (ADHD)   
385                                           anxiety   
...                                               ...   
5887                                    schizophrenia   
591

In [41]:
filtered_df.to_csv('../data/indication_gene_link/ruiz_filtered_data.csv', index=False)

In [42]:
specific_ids = {
    'ADHD': ['C1263846'],
    'Alzheimer’s Disease': ['C0002395', 'C1843013', 'C1847200', 'C1863051'],
    'Anxiety Disorder': ['C0003469', 'C0270549'],
    'Autism Spectrum Disorder': ['C1510586','C4014538', 'C3809910'],
    'Bipolar Disorder': ['C1852197','C0005586', 'C2700438', 'C0853193', 'C0236788'],
    'Eating Disorders': ['C0003125', 'C1847492', 'C5568567', 'CN372383', 'C1843776'],
    'MDD': ['C1269683'],
    'Tourette syndrome': ['C0040517', 'C0028768'],
    'Post Traumatic Stress Disorder': ['C0038436','C5539757'],
    'Schizophrenia': ['C0036341', 'C1833247', 'C3151380'],
    'Substance Use Disorders': ['C1864733', 'C5419030', 'C4751523', 'C0001973']
}

all_ids = [id for ids in specific_ids.values() for id in (ids if isinstance(ids, list) else [ids])]

filtered_df2 = ruiz_df[ruiz_df['indication'].isin(all_ids)]

print(filtered_df2)

         drug        drug_name indication  \
326   DB00163        Vitamin E   C0002395   
354   DB00176      fluvoxamine   C0028768   
379   DB00182      Amphetamine   C1263846   
470   DB00215       citalopram   C1269683   
517   DB00234       reboxetine   C1269683   
...       ...              ...        ...   
5868  DB00382          tacrine   C0002395   
5887  DB01622  thioproperazine   C0036341   
5922  DB09225         zotepine   C0036341   
5923  DB01624   zuclopenthixol   C0036341   
5924  DB01624   zuclopenthixol   C0005586   

                                      indication_name  \
326                               Alzheimer's disease   
354               obsessive compulsive disorder (OCD)   
379   attention-deficit/hyperactivity disorder (ADHD)   
470                         Major Depressive Disorder   
517                         Major Depressive Disorder   
...                                               ...   
5868                              Alzheimer's disease   
588

In [43]:
diseases_not_found = [disease for disease, ids in specific_ids.items() if not ruiz_df['indication'].isin(ids if isinstance(ids, list) else [ids]).any()]

print("Diseases not found:", diseases_not_found)

Diseases not found: ['Autism Spectrum Disorder', 'Eating Disorders', 'Post Traumatic Stress Disorder']


In [44]:
filtered_df2.to_csv('../data/indication_gene_link/ruiz_filtered_data_2.csv', index=False)

In [45]:
indication_names_by_disease = filtered_df.groupby('Disease')['indication_name'].apply(lambda x: '; '.join(x.unique())).reset_index()
indication_names_df = pd.DataFrame(indication_names_by_disease)
indication_names_df.columns = ['Disease', 'Unique Indication Names']
indication_names_df.head(7)
indication_names_df.to_csv('../data/indication_gene_link/dictionary_ruiz.csv', index=False)

In [46]:
indication_names_df.head(7)

Unnamed: 0,Disease,Unique Indication Names
0,Alzheimer’s Disease,Alzheimer's disease
1,Anxiety Disorder,anxiety; Mixed anxiety and depressive disorder...
2,Attention Deficit Hyperactivity Disorder,attention-deficit/hyperactivity disorder (ADHD...
3,Autism Spectrum Disorder,Autistic Disorder
4,Bipolar Disorder,"bipolar disorder; Depression, Bipolar"
5,Major Depressive Disorder,respiratory depression; Major Depressive Disor...
6,OCD & Tourette Syndrome,obsessive compulsive disorder (OCD); Tourette'...
