In [None]:
%pip install numpy
%pip install pandas
%pip install sklearn


In [None]:
import numpy as np
import pandas as pd

In [None]:
df = pd.read_csv("drug-adverse-effect.csv")
df.head()

In [None]:

df.describe()

In [None]:
df.shape

In [None]:
df.isna().sum()

In [None]:
new_df = df[["safetyreportid", "patientonsetage", "patientsex", "medicinalproduct", "activesubstancename", "reactionmeddrapt"]].copy()

In [None]:
new_df.head()

In [None]:
new_df.isna().sum()

In [None]:
reaction = dict()
for effect in new_df['reactionmeddrapt']:
    for r in effect.split(", "):
        if reaction.get(r.lower()):
            reaction[r.lower()]+=1
        else:
            reaction[r.lower()]=1

In [None]:
print(reaction)

In [None]:
reactionList = {}
i = 1
unw = ['device issue','incorrect dose administered','wrong technique in product usage process','prostate cancer','off label use',
       'product dose omission issue','death','covid-19','therapy interrupted','inappropriate schedule of product administration',
      'fall']
for (k,v) in reaction.items():
    if(v>=20 and k not in unw):
        reactionList[k] = i
        i+=1
reactionList["others"] = i

In [None]:
drug = dict()
for med in new_df['activesubstancename']:
    for m in str(med).split(", "):
        if drug.get(m.lower()):
            drug[m.lower()]+=1
        else:
            drug[m.lower()]=1

In [None]:
drugList = {}
i = 1

for (k,v) in drug.items():
    if(v>=10):
        drugList[k] = i 
        i+=1
drugList["others"] = i

In [None]:
print(drugList)

In [None]:
print(reactionList.keys())

In [None]:
reactiondict = {key: [0]*len(new_df) for key in reactionList.keys()}
for i, effect in enumerate(new_df["reactionmeddrapt"]):
    reactions = effect.split(", ")
    found = False
    for k in reactions:
        if k.lower() in reactionList.keys():
            reactiondict[k.lower()][i] = 1
            found = True
    if not found:
        reactiondict["others"][i] = 1

In [None]:
print(sum(reactiondict["others"]))

In [None]:
drugdict = {key: [0]*len(new_df) for key in drugList.keys()}
for i, drug in enumerate(new_df["medicinalproduct"]):
    drugs = drug.split(", ")
    found = False
    for k in drugs:
        if k.lower() in drugList.keys():
            drugdict[k.lower()][i] = 1
            found = True
    if not found:
        drugdict["others"][i] = 1

In [None]:
print(sum(drugdict["albuterol"]))

In [None]:
for k,v in drugdict.items():
    new_df[k] = v

In [None]:
X = new_df.drop(["patientonsetage","patientsex","safetyreportid", "medicinalproduct", "activesubstancename", "reactionmeddrapt"], axis=1)

In [None]:
X.shape

In [None]:
X.head()

In [None]:
y = pd.DataFrame(reactiondict)

In [None]:
y.shape

In [None]:
y.head()

In [None]:
d = pd.concat([X, y], axis=1, join='inner')

In [None]:
d = d.drop_duplicates()

In [None]:
X = d[X.columns]
y = d[y.columns]

In [None]:
%pip install scikit-learn

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, precision_score, recall_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [None]:
Y_test = Y_test.values

In [None]:
clf = LogisticRegression(solver='lbfgs', max_iter=400)
multi_target_lr = MultiOutputClassifier(clf)
multi_target_lr.fit(X_train, Y_train)

In [None]:
Y_pred = multi_target_lr.predict(X_test)

acc_score, rec_score = 0., 0.
Accuracy_lr = []
Recall_lr = []
for i in range(Y_test.shape[0]):
    acc_score += accuracy_score(Y_test[i],Y_pred[i])
    Accuracy_lr.append(acc_score)
    rec_score += recall_score(Y_test[i],Y_pred[i])
    Recall_lr.append(rec_score)

acc_score = acc_score/Y_test.shape[0]
rec_score = rec_score/Y_test.shape[0]

print('Accuracy : %.4f%%, \t, Recall : %.4f%%' %(acc_score, rec_score))

In [None]:
import joblib

In [None]:
joblib.dump(multi_target_lr, "model.h5")

In [None]:
from sklearn.neighbors import KNeighborsClassifier

clf = KNeighborsClassifier()
multi_target_knn = MultiOutputClassifier(clf)
multi_target_knn.fit(X_train.values, Y_train.values)

In [None]:
Y_pred = multi_target_knn.predict(X_test.values)

acc_score, rec_score = 0., 0.
Accuracy_lr = []
Recall_lr = []
for i in range(Y_test.shape[0]):
    acc_score += accuracy_score(Y_test[i],Y_pred[i])
    Accuracy_lr.append(acc_score)
    rec_score += recall_score(Y_test[i],Y_pred[i])
    Recall_lr.append(rec_score)

acc_score = acc_score/Y_test.shape[0]
rec_score = rec_score/Y_test.shape[0]

print('Accuracy : %.4f%%, \t, Recall : %.4f%%' %(acc_score, rec_score))

In [85]:
import joblib
joblib.dump(multi_target_knn, "model.h5")

In [None]:
model = joblib.load("model.h5")

In [None]:
import requests

def getReactions(drugName):
    url = f'https://api.fda.gov/drug/event.json?search=patient.drug.medicinalproduct:"{drugName}"'
    results = requests.get(url).json()['results']

    substancename = set()
    for res in results:
        for drug in res["patient"]["drug"]:
            if drug.get("activesubstance"):
                substancename.add(drug.get("activesubstance").get("activesubstancename"))
    print(substancename)            
    # substances = {
    #    'adalimumab':1, 'secukinumab':2, 'ranitidine':3, 'hydrochloride':4,
    #    'hydrochloride,':5, 'sodium':6, 'acetate':7, 'sodium,':8, 'prednisone,':9,
    #    'acid,':10, 'certolizumab':11, 'sulfate,':12, 'calcium':13, 'adalimumab,':14,
    #    'upadacitinib':15, 'fingolimod':16, 'pegol':17, 'insulin':18, 'glargine':19,
    #    'levonorgestrel':20, 'human':21, 'macitentan':22, 'apixaban':23, 'oxycodone':24,
    #    'palbociclib':25, 'leuprolide':26, 'tozinameran':27, 'letrozole,':28,
    #    'sacubitril\valsartan':29, 'others':30
    # }
    substances = {}
    j = 1
    for i in X.columns:
        substances[i] = j
        j = j + 1
    
    xres = [0]*len(substances)
    for sub in substancename:
        for s in sub.split():
            if s.lower() in substances.keys():
                xres[substances[s.lower()]-1] = 1
            else:
                xres[-1] = 1
    print(xres)
    yres = model.predict([xres])[0]

    # reactions = ['cough', 'drug ineffective', 'headache', 'decreased appetite',
    #    'pyrexia', 'nausea', 'pain in extremity', 'alopecia', 'fatigue',
    #    'diarrhoea', 'dizziness', 'injection site pain', 'vomiting',
    #    'product use in unapproved indication', 'asthenia', 'rash', 'pain',
    #    'malaise', 'arthralgia', 'hospitalisation', 'condition aggravated',
    #    'pruritus', 'dyspnoea', 'illness', 'pneumonia', 'weight decreased',
    #    'no adverse event', 'others']
    
    reactions = y.columns.to_list()

    
    res = []
    for i in range(len(yres)):
        if yres[i] == 1:
            res.append(reactions[i])

    return res

getReactions("humira")

In [None]:
Y_pred[145]

In [None]:
Y_test[145]

In [None]:
clf = RandomForestClassifier(n_estimators=300, criterion = 'gini', max_depth=60, random_state=None)
multi_target_forest = MultiOutputClassifier(clf)
multi_target_forest.fit(X_train, Y_train)

In [None]:
import warnings
warnings.filterwarnings('ignore')

Y_pred = multi_target_forest.predict(X_test)

acc_score, prec_score, rec_score = 0., 0., 0.
Accuracy_RF = []
Precision_RF = []
Recall_RF = []
for i in range(Y_test.shape[0]):
    acc_score  += accuracy_score(Y_test[i],Y_pred[i])
    Accuracy_RF.append(acc_score)
    prec_score += precision_score(Y_test[i],Y_pred[i])
    Precision_RF.append(prec_score)
    rec_score  += recall_score(Y_test[i],Y_pred[i])
    Recall_RF.append(rec_score)

acc_score  = acc_score/Y_test.shape[0]
prec_score = prec_score/Y_test.shape[0]
rec_score  = rec_score/Y_test.shape[0]

print('Accuracy : %.4f%%, \t Precision : %.4f%%, \t, Recall : %.4f%%' %(acc_score, prec_score, rec_score))