In [7]:
# Importing necessary libraries
import pandas as pd 
from pandas import read_csv
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Load training data
filename = 'data/Training.csv'
data = read_csv(filename)
data.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection


#### Feature Selection And Split Dataset

In [10]:
from sklearn.model_selection import train_test_split

df_x = data.iloc[:, 0:132] # all the symptom columns
df_y = data.iloc[:, 132] # prognosis

# train test split
X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size = 0.2, random_state=0)

#### Fitting the model

In [14]:
# GaussianNB
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
import joblib

gnb = GaussianNB()
gnb.fit(X_train, np.ravel(y_train))

y_pred = gnb.predict(X_test)
print(accuracy_score(y_test, y_pred))
print(accuracy_score(y_test, y_pred, normalize=False))

# dump the model
joblib.dump(gnb, 'model/naive_bayes.pkl')
# you can load the model using: nb = joblib.load("model/naive_bayes.pkl")

1.0
984


['model/naive_bayes.pkl']

#### Create patient input data for testing model  
We write a function symptoms_match to create input for NaiveBayes model.

In [84]:
# create a patient with list of symptoms for prediction
# a list with all 0
symptoms_list = list(data)[:-1] # do not include prognosis
# match symptom return 0 as 1
patient_template = pd.DataFrame(np.zeros((1, 132)), columns=symptoms_list)

def symptoms_match(symp, patient_template):
    '''
    This function help matching patient symptom to build testing data
    '''
    patient_record = []
    for symptom in list(patient_template):
        
        if symptom in symp:
            patient_record.append(1)
        else:
            patient_record.append(0)

    test = np.array(patient_record)
    test = np.array(test).reshape(1, -1)
    
    return test


In [85]:
# create two patient for example
moses = ["continuous_sneezing", "stomach_pain", "small_dents_in_nails"]
moses_input = symptoms_match(moses, patient_template)

james = ['shivering', 'headache', 'vomiting', 'muscle_pain', 'diarrhoea']
james_input = symptoms_match(james, patient_template)

print(moses_input.shape)
print(gnb.predict(moses_input))
print(gnb.predict(james_input))

(1, 132)
['Allergy']
['Malaria']




#### Random Forest

In [81]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier()
rf.fit(X_train, np.ravel(y_train))
y_pred = rf.predict(X_test)

print(accuracy_score(y_test, y_pred))
print(accuracy_score(y_test, y_pred, normalize=False))
print(rf.score(X_test, y_test))

# dump model
joblib.dump(rf, "model/random_forest.pkl") #you can load model using joblib.load()

1.0
984
1.0


['model/random_forest.pkl']

In [86]:
# making prediction using Random forest
print(rf.predict(moses_input)) # result is differnet with gnb
print(rf.predict(james_input))

['Psoriasis']
['Malaria']


