In [1]:
#  Importing necessary libraries
import pandas as pd
from pandas import read_csv
import numpy as np
import matplotlib.pyplot as plt

In [2]:
#  Loading Dataset
filename = 'Drug.csv'
data = read_csv(filename)
print(data.head())

     Drug  Disease  Gender  Age
0  apples  aasthma    Male   23
1  apples  aasthma    Male   33
2  apples  aasthma    Male   43
3  apples  aasthma  Female   23
4  apples  aasthma  Female   33


In [3]:
data.dtypes

Drug       object
Disease    object
Gender     object
Age         int64
dtype: object

In [4]:
data.shape

(12, 4)

In [5]:
# Check and handle missing values
data.isnull().sum()

Drug       0
Disease    0
Gender     0
Age        0
dtype: int64

### Data Preprocessing

In [6]:
data.replace({'Gender':{'Female':0,'Male':1}}, inplace=True)

In [7]:
x=data[['Disease']]

In [8]:
x.Disease.unique()

array(['aasthma', 'tuberculosis'], dtype=object)

In [9]:
data.replace({'Disease':{'aasthma':0, 'tuberculosis':1}}, inplace = True)

In [10]:
data.head()

Unnamed: 0,Drug,Disease,Gender,Age
0,apples,0,1,23
1,apples,0,1,33
2,apples,0,1,43
3,apples,0,0,23
4,apples,0,0,33


In [11]:
data.dtypes

Drug       object
Disease     int64
Gender      int64
Age         int64
dtype: object

### Fitting the Model

In [12]:
# Feature Selection
df_x = data[['Disease', 'Gender', 'Age']]
df_y = data[['Drug']]

In [13]:
df_x.head()

Unnamed: 0,Disease,Gender,Age
0,0,1,23
1,0,1,33
2,0,1,43
3,0,0,23
4,0,0,33


In [14]:
df_y.head()

Unnamed: 0,Drug
0,apples
1,apples
2,apples
3,apples
4,apples


# Train Test Split

In [15]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.2, random_state=0)

In [16]:
X_train.head()

Unnamed: 0,Disease,Gender,Age
10,1,0,33
2,0,1,43
8,1,1,43
1,0,1,33
7,1,1,33


In [17]:
y_train.head()

Unnamed: 0,Drug
10,bananas
2,apples
8,bananas
1,apples
7,bananas


In [18]:
# Fitting Random Forest CLassifier
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier()
rf = rf.fit(df_x, np.ravel(df_y))

In [19]:
# Model Accuracy
from sklearn.metrics import accuracy_score
y_pred = rf.predict(X_test)
print(accuracy_score(y_test, y_pred))
print(accuracy_score(y_test, y_pred, normalize = False))

1.0
3


In [20]:
rf.score(X_test, y_test)

1.0

In [21]:
# Making Predictions
prediction = rf.predict(X_test)
print(prediction[0:2])

['bananas' 'bananas']


In [22]:
test = [0, 1, 23]
test = np.array(test)
test = np.array(test).reshape(1, -1)
print(test.shape)

(1, 3)


In [23]:
prediction = rf.predict(test)
print(prediction[0])

apples




In [24]:
# Dumping Model
import joblib as joblib
joblib.dump(rf, 'model/medical_rf.pkl')

['model/medical_rf.pkl']

In [25]:
# Loading the Model
clf = joblib.load('model/medical_rf.pkl')

In [26]:
prediction = clf.predict(test)
print(prediction[0])

apples




### Gaussian Naive Bayes

In [27]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb = gnb.fit(df_x, np.ravel(df_y))

In [28]:
# Check Model Accuracy
y_pred = gnb.predict(X_test)
print(accuracy_score(y_test, y_pred))
print(accuracy_score(y_test, y_pred, normalize=False))

1.0
3


In [29]:
# Making Recommendation of Drug Name
result = gnb.predict(test)
print(result[0])

apples




In [30]:
# Dump the Model in the disk
joblib.dump(gnb, 'model/medical_nb.pkl')

['model/medical_nb.pkl']