In [1]:
#Importing Important Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib

### Loading Dataset

In [2]:
df= pd.read_csv('Drug.csv')

In [3]:
df.head()

Unnamed: 0,Drug,Disease,Gender,Age
0,A CN Gel(Topical) 20gmA CN Soap 75gm,Acne,Male,23
1,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,Acne,Male,23
2,ACGEL CL NANO Gel 15gm,Acne,Male,23
3,ACGEL NANO Gel 15gm,Acne,Male,23
4,Acleen 1% Lotion 25ml,Acne,Male,23


In [4]:
df.shape

(4955, 4)

In [5]:
#Checking the missing values
df.isnull().sum()

Drug       0
Disease    0
Gender     0
Age        0
dtype: int64

In [6]:
df.dtypes

Drug       object
Disease    object
Gender     object
Age         int64
dtype: object

### Data Preprocessing

In [7]:
# Gender_mapping = {'Feamle':0,'Male':1}
# df['Gender'] = df['Gender'].replace(Gender_mapping,inplace = True)
df.replace({'Gender':{'Female':0,'Male':1}},inplace = True)

In [8]:
df.head()

Unnamed: 0,Drug,Disease,Gender,Age
0,A CN Gel(Topical) 20gmA CN Soap 75gm,Acne,1,23
1,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,Acne,1,23
2,ACGEL CL NANO Gel 15gm,Acne,1,23
3,ACGEL NANO Gel 15gm,Acne,1,23
4,Acleen 1% Lotion 25ml,Acne,1,23


In [9]:
x = df[['Disease']]


In [10]:
x.Disease.unique()

array(['Acne', 'Allergy', 'Diabetes', 'Fungal infection',
       'Urinary tract infection', 'Malaria', 'Migraine', 'Hepatitis B',
       'AIDS'], dtype=object)

In [11]:
df.replace({'Disease':{'Acne':0, 'Allergy':1, 'Diabetes':2, 'Fungal infection':3,
       'Urinary tract infection':4, 'Malaria':5, 'Migraine':6, 'Hepatitis B':7,
       'AIDS':8}},inplace = True)

In [12]:
df.head()

Unnamed: 0,Drug,Disease,Gender,Age
0,A CN Gel(Topical) 20gmA CN Soap 75gm,0,1,23
1,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,0,1,23
2,ACGEL CL NANO Gel 15gm,0,1,23
3,ACGEL NANO Gel 15gm,0,1,23
4,Acleen 1% Lotion 25ml,0,1,23


### Fitting the Model

In [13]:
df_x = df[['Disease','Gender','Age']]
df_y = df[['Drug']]

In [14]:
df_x.head()

Unnamed: 0,Disease,Gender,Age
0,0,1,23
1,0,1,23
2,0,1,23
3,0,1,23
4,0,1,23


In [15]:
df_y.head()

Unnamed: 0,Drug
0,A CN Gel(Topical) 20gmA CN Soap 75gm
1,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...
2,ACGEL CL NANO Gel 15gm
3,ACGEL NANO Gel 15gm
4,Acleen 1% Lotion 25ml


In [16]:
#Train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
     df_x, df_y, test_size=0.2, random_state=0)

In [17]:
X_train.head()

Unnamed: 0,Disease,Gender,Age
2150,2,1,23
167,0,0,20
3188,2,0,23
844,1,0,23
4129,3,0,37


In [18]:
X_test.head()

Unnamed: 0,Disease,Gender,Age
2540,2,0,23
446,1,1,23
4771,8,0,20
541,1,0,20
1270,2,0,23


In [19]:
y_train.head()

Unnamed: 0,Drug
2150,Glycinorm Total 30mg Tablet 10'SGlycinorm Tota...
167,Dersol BH Ointment 30gm
3188,Wosulin New 30/70 100IU Cartridge 3X3ml
844,Lejet M JR Tablet 10'S
4129,Zocon 50mg Tablet 4'SZocon Transgel 15gmZocon ...


In [20]:
#Fitting Random Forest
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier()
rf = rf.fit(df_x,np.ravel(df_y))

In [21]:
#Model Accuracy
from sklearn.metrics import accuracy_score
y_pred = rf.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(accuracy_score(y_test,y_pred,normalize = False))

0.034308779011099896
34


In [22]:
#Score
rf.score(X_test,y_test)

0.034308779011099896

In [23]:
prediction = rf.predict(X_test)
print(prediction[0:10])

["Adglim 2mg Tablet 10'SAdglim 1mg Tablet 10'S" "Alergo 120mg Tablet 10'S"
 "Axovir 800mg Tablet 10'SAxovir 400mg Tablet 10'SAxovir 250mg Injection 1'SAxovir 500mg Injection 1'SAxovir 200mg Tablet 10'S"
 "BILANIX Tablet 10's" "Adglim 2mg Tablet 10'SAdglim 1mg Tablet 10'S"
 'Jucan 150mg Tablet'
 "Axovir 800mg Tablet 10'SAxovir 400mg Tablet 10'SAxovir 250mg Injection 1'SAxovir 500mg Injection 1'SAxovir 200mg Tablet 10'S"
 "PAXOQUIN Tablet 10's" "PAXOQUIN Tablet 10's"
 "Axovir 800mg Tablet 10'SAxovir 400mg Tablet 10'SAxovir 250mg Injection 1'SAxovir 500mg Injection 1'SAxovir 200mg Tablet 10'S"]


In [24]:
#Making the prediction of a patient with disease Malaria, age 24,Gender Male
test = [5,1,24]
test = np.array(test)#List to numpy array
print(test.shape)#Shape of the array in 1D
test = np.array(test).reshape(1,-1)#Convert 1D to 2D array
print(test.shape)

(3,)
(1, 3)


In [25]:
#Recommend Drug Name based on Disease , Gender and Age
prediction = rf.predict(test)
print(prediction[0])

Combither Forte 80/480mg Tablet 6'S




In [53]:
#Dumping the model in the disk
import pickle
with open('medical_rf.pickle','wb') as f:
    pickle.dump(rf,f)

In [54]:
#Loading the model
rfc = joblib.load('medical_rf.pickle')

In [26]:
# Fitting the model by Naive Bayes

from sklearn.naive_bayes import GaussianNB

In [27]:
gnb = GaussianNB()
gnb.fit(df_x,np.ravel(df_y))

GaussianNB()

In [29]:
y_pred = gnb.predict(X_test)
print(accuracy_score(y_test,y_pred))
print(accuracy_score(y_test,y_pred,normalize = False))

0.039354187689202826
39


In [32]:
gnb.score(X_test,y_test)

0.039354187689202826

In [34]:
# Recommedation of Drug
result = gnb.predict(test)
print(result[0])

Combither Forte 80/480mg Tablet 6'S




In [35]:
#Dumping the model in the disk
import pickle
with open('medical_nb.pickle','wb') as f:
    pickle.dump(gnb,f)

In [36]:
#Loading the model
nb = joblib.load('medical_nb.pickle')