In [1]:
import pandas as pd

In [2]:
# loading the SIDER indiciations data
indications = pd.read_csv('data/meddra_all_indications.tsv', sep='\t') # drug indications contains which drug treats which symptom
indications.columns = ['CID', 'Concept_Code', 'Source_Type', 'Concept_Name', 'Concept_Code_2', 'Concept_Name_2', 'Concept_Name_3']
indications_filtered = indications[["CID", "Concept_Name"]]
print("Head of Indications\n", indications_filtered.head())

Head of Indications
             CID             Concept_Name
0  CID100000085        Failure to Thrive
1  CID100000085             Hypoglycemia
2  CID100000085             Hypoglycemia
3  CID100000085  Kidney Failure, Chronic
4  CID100000085  Kidney Failure, Chronic


In [3]:
# loading the SIDER side effects data
sideeffects = pd.read_csv('data/meddra_all_se.tsv', sep='\t') # drug side effects contains which drug causes which symptom
sideeffects.columns = ['CID', 'Drug_Identifier', 'MedDRA_Concept_ID', 'MedDRA_Concept_Type', 'MedDRA_Concept_ID_Type', 'Side_Effect_Name']
sideeffects_filtered = sideeffects[["CID", "Side_Effect_Name"]]
print("\n\nHead of Side effects\n", sideeffects_filtered.head())



Head of Side effects
             CID       Side_Effect_Name
0  CID100000085         Abdominal pain
1  CID100000085         Abdominal pain
2  CID100000085  Gastrointestinal pain
3  CID100000085         Abdominal pain
4  CID100000085              Amblyopia


In [4]:
drug_names  = pd.read_csv('data/drug_names.tsv', sep='\t') # drug names contains the drug names and their respective IDs
drug_names.columns = ['CID', 'Drug_name']
print("\n\nHead of Drug Names\n", drug_names.head())



Head of Drug Names
             CID                 Drug_name
0  CID100000119        gamma-aminobutyric
1  CID100000137          5-aminolevulinic
2  CID100000143                leucovorin
3  CID100000146  5-methyltetrahydrofolate
4  CID100000158                      PGE2


In [10]:
'''
loading DDInter data
(1) A - Interaction involving alimentary tract and metabolism drugs
(2) B - Interaction involving blood and blood forming organs drugs
(3) D - Interaction involving dermatologicals drugs
(4) H - Interaction involving systemic hormonal preparations, excluding sex hormones and insulins drugs
(5) L - Interaction involving antineoplastic and immunomodulating agents drugs
(6) P - Interaction involving antiparasitic products, insecticides and repellents drugs
(7) R - Interaction involving respiratory system drugs 
(8) V - Interaction involving various drugs
'''
letters = ['A', 'B', 'D', 'H', 'L', 'P', 'R', 'V']
ddi_dfs = []
for letter in letters:
    ddi_dfs.append(pd.read_csv(f'data/ddinter_downloads_code_{letter}.csv'))
ddi_data = pd.concat(ddi_dfs)
print(ddi_data.shape)
ddi_data = ddi_data[ddi_data['Level'] != 'Unknown']
print(ddi_data.shape)
print("\n\nHead of DDI Data\n", ddi_data.head())

(222383, 5)
(175201, 5)


Head of DDI Data
    DDInterID_A              Drug_A  DDInterID_B        Drug_B     Level
0  DDInter1263          Naltrexone     DDInter1      Abacavir  Moderate
1     DDInter1            Abacavir  DDInter1348      Orlistat  Moderate
2    DDInter58  Aluminum hydroxide   DDInter582  Dolutegravir     Major
3   DDInter112          Aprepitant   DDInter582  Dolutegravir     Minor
4   DDInter138         Attapulgite   DDInter582  Dolutegravir     Major


In [None]:
# Get unique drugs from both Drug_A and Drug_B columns
unique_drugs_A = ddi_data['Drug_A'].unique()
unique_drugs_B = ddi_data['Drug_B'].unique()

# Combine the unique drugs into a single set to avoid duplicates
unique_drugs = set(unique_drugs_A).union(set(unique_drugs_B))
print(len(unique_drugs))

# Count how many of these drugs are in drug_names
count = 0
for drug in unique_drugs:
    if drug in drug_names['Drug_name'].values:
        count += 1

print(count)

14
