## Import Datasets and Libraries

In [1]:
import pandas as pd

In [2]:
data = pd.read_csv('data/Training.csv')

In [3]:
data.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection


In [4]:
data.shape

(4920, 133)

In [5]:
data['prognosis'].unique()

array(['Fungal infection', 'Allergy', 'GERD', 'Chronic cholestasis',
       'Drug Reaction', 'Peptic ulcer diseae', 'AIDS', 'Diabetes ',
       'Gastroenteritis', 'Bronchial Asthma', 'Hypertension ', 'Migraine',
       'Cervical spondylosis', 'Paralysis (brain hemorrhage)', 'Jaundice',
       'Malaria', 'Chicken pox', 'Dengue', 'Typhoid', 'hepatitis A',
       'Hepatitis B', 'Hepatitis C', 'Hepatitis D', 'Hepatitis E',
       'Alcoholic hepatitis', 'Tuberculosis', 'Common Cold', 'Pneumonia',
       'Dimorphic hemmorhoids(piles)', 'Heart attack', 'Varicose veins',
       'Hypothyroidism', 'Hyperthyroidism', 'Hypoglycemia',
       'Osteoarthristis', 'Arthritis',
       '(vertigo) Paroymsal  Positional Vertigo', 'Acne',
       'Urinary tract infection', 'Psoriasis', 'Impetigo'], dtype=object)

## Train-Test Split

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [7]:
X = data.drop('prognosis',axis=1)
y = data['prognosis']

In [8]:
lb =LabelEncoder()
lb.fit(y)
Y = lb.transform(y)
Y


array([15, 15, 15, ..., 38, 35, 27])

In [9]:
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size = 0.3, random_state =20)

In [10]:
x_train.shape,x_test.shape,y_train.shape,y_test.shape

((3444, 132), (1476, 132), (3444,), (1476,))

## Training Top Models

In [11]:
from sklearn.datasets import make_classification
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
import numpy as np

from sklearn.metrics import accuracy_score, confusion_matrix  

#create a dic to store models
models = {
    'SVC': SVC(kernel= 'linear'),
    'RandomForestClassifier': RandomForestClassifier(n_estimators=100,random_state=42),
    'GradientBoostingClassifier':GradientBoostingClassifier(n_estimators=100,random_state=42),
    'KNeighbors':KNeighborsClassifier(n_neighbors=5),
    'MultinomialNB': MultinomialNB()
}

for model_name , model in models.items():
    model.fit(x_train,y_train)
    predict = model.predict(x_test)

    accuracy = accuracy_score(y_test,predict)

    cm = confusion_matrix(y_test,predict)

    print(f"{model_name} : {accuracy}")
    print(f"{model_name} Confusion matrix")
    print(np.array2string(cm,separator=","))

SVC : 1.0
SVC Confusion matrix
[[40, 0, 0,..., 0, 0, 0],
 [ 0,43, 0,..., 0, 0, 0],
 [ 0, 0,28,..., 0, 0, 0],
 ...,
 [ 0, 0, 0,...,34, 0, 0],
 [ 0, 0, 0,..., 0,41, 0],
 [ 0, 0, 0,..., 0, 0,31]]
RandomForestClassifier : 1.0
RandomForestClassifier Confusion matrix
[[40, 0, 0,..., 0, 0, 0],
 [ 0,43, 0,..., 0, 0, 0],
 [ 0, 0,28,..., 0, 0, 0],
 ...,
 [ 0, 0, 0,...,34, 0, 0],
 [ 0, 0, 0,..., 0,41, 0],
 [ 0, 0, 0,..., 0, 0,31]]
GradientBoostingClassifier : 1.0
GradientBoostingClassifier Confusion matrix
[[40, 0, 0,..., 0, 0, 0],
 [ 0,43, 0,..., 0, 0, 0],
 [ 0, 0,28,..., 0, 0, 0],
 ...,
 [ 0, 0, 0,...,34, 0, 0],
 [ 0, 0, 0,..., 0,41, 0],
 [ 0, 0, 0,..., 0, 0,31]]
KNeighbors : 1.0
KNeighbors Confusion matrix
[[40, 0, 0,..., 0, 0, 0],
 [ 0,43, 0,..., 0, 0, 0],
 [ 0, 0,28,..., 0, 0, 0],
 ...,
 [ 0, 0, 0,...,34, 0, 0],
 [ 0, 0, 0,..., 0,41, 0],
 [ 0, 0, 0,..., 0, 0,31]]
MultinomialNB : 1.0
MultinomialNB Confusion matrix
[[40, 0, 0,..., 0, 0, 0],
 [ 0,43, 0,..., 0, 0, 0],
 [ 0, 0,28,..., 0, 0, 0],
 

## Single Prediction

In [12]:
svc = SVC(kernel = 'linear')
svc.fit(x_train,y_train)
ypred = svc.predict(x_test)
accuracy_score(y_test,ypred)

1.0

In [13]:
import pickle
# pickle.dump(svc,open("models/svc.pkl","wb"))

In [14]:
svc = pickle.load(open("models/svc.pkl","rb"))

In [15]:
print("Predicted Label: ",svc.predict(x_test.iloc[15].values.reshape(1,-1)))
print("Actual Label: ",y_test[15])

Predicted Label:  [11]
Actual Label:  11




## Recommendation System and Prediction

### Load database

In [16]:
sym_data = pd.read_csv('data/symtoms_df.csv')
precautions_data = pd.read_csv('data/precautions_df.csv')
workout_data = pd.read_csv('data/workout_df.csv')
description_data = pd.read_csv('data/description.csv')
medi_data = pd.read_csv('data/medications.csv')
diet_data = pd.read_csv('data/diets.csv')

In [17]:
# helper function
def helper(disease):
    descr = description_data[description_data['Disease']== disease]['Description']
    descr =  " ".join([w for w in descr])

    precau = precautions_data[precautions_data['Disease']== disease][['Precaution_1','Precaution_2','Precaution_3','Precaution_4']]
    precau = [col for col in precau.values]

    med = medi_data[medi_data['Disease'] == disease]['Medication']
    med = [med for med in med.values]

    diet = diet_data[diet_data['Disease'] == disease]['Diet']
    diet = [diet for diet in diet.values]

    workout = workout_data[workout_data['disease'] == disease]['workout']

    return descr,precau,med,diet,workout


symptoms_dict = {'itching': 0, 'skin_rash': 1, 'nodal_skin_eruptions': 2, 'continuous_sneezing': 3, 'shivering': 4, 'chills': 5, 'joint_pain': 6, 'stomach_pain': 7, 'acidity': 8, 'ulcers_on_tongue': 9, 'muscle_wasting': 10, 'vomiting': 11, 'burning_micturition': 12, 'spotting_ urination': 13, 'fatigue': 14, 'weight_gain': 15, 'anxiety': 16, 'cold_hands_and_feets': 17, 'mood_swings': 18, 'weight_loss': 19, 'restlessness': 20, 'lethargy': 21, 'patches_in_throat': 22, 'irregular_sugar_level': 23, 'cough': 24, 'high_fever': 25, 'sunken_eyes': 26, 'breathlessness': 27, 'sweating': 28, 'dehydration': 29, 'indigestion': 30, 'headache': 31, 'yellowish_skin': 32, 'dark_urine': 33, 'nausea': 34, 'loss_of_appetite': 35, 'pain_behind_the_eyes': 36, 'back_pain': 37, 'constipation': 38, 'abdominal_pain': 39, 'diarrhoea': 40, 'mild_fever': 41, 'yellow_urine': 42, 'yellowing_of_eyes': 43, 'acute_liver_failure': 44, 'fluid_overload': 45, 'swelling_of_stomach': 46, 'swelled_lymph_nodes': 47, 'malaise': 48, 'blurred_and_distorted_vision': 49, 'phlegm': 50, 'throat_irritation': 51, 'redness_of_eyes': 52, 'sinus_pressure': 53, 'runny_nose': 54, 'congestion': 55, 'chest_pain': 56, 'weakness_in_limbs': 57, 'fast_heart_rate': 58, 'pain_during_bowel_movements': 59, 'pain_in_anal_region': 60, 'bloody_stool': 61, 'irritation_in_anus': 62, 'neck_pain': 63, 'dizziness': 64, 'cramps': 65, 'bruising': 66, 'obesity': 67, 'swollen_legs': 68, 'swollen_blood_vessels': 69, 'puffy_face_and_eyes': 70, 'enlarged_thyroid': 71, 'brittle_nails': 72, 'swollen_extremeties': 73, 'excessive_hunger': 74, 'extra_marital_contacts': 75, 'drying_and_tingling_lips': 76, 'slurred_speech': 77, 'knee_pain': 78, 'hip_joint_pain': 79, 'muscle_weakness': 80, 'stiff_neck': 81, 'swelling_joints': 82, 'movement_stiffness': 83, 'spinning_movements': 84, 'loss_of_balance': 85, 'unsteadiness': 86, 'weakness_of_one_body_side': 87, 'loss_of_smell': 88, 'bladder_discomfort': 89, 'foul_smell_of urine': 90, 'continuous_feel_of_urine': 91, 'passage_of_gases': 92, 'internal_itching': 93, 'toxic_look_(typhos)': 94, 'depression': 95, 'irritability': 96, 'muscle_pain': 97, 'altered_sensorium': 98, 'red_spots_over_body': 99, 'belly_pain': 100, 'abnormal_menstruation': 101, 'dischromic _patches': 102, 'watering_from_eyes': 103, 'increased_appetite': 104, 'polyuria': 105, 'family_history': 106, 'mucoid_sputum': 107, 'rusty_sputum': 108, 'lack_of_concentration': 109, 'visual_disturbances': 110, 'receiving_blood_transfusion': 111, 'receiving_unsterile_injections': 112, 'coma': 113, 'stomach_bleeding': 114, 'distention_of_abdomen': 115, 'history_of_alcohol_consumption': 116, 'fluid_overload.1': 117, 'blood_in_sputum': 118, 'prominent_veins_on_calf': 119, 'palpitations': 120, 'painful_walking': 121, 'pus_filled_pimples': 122, 'blackheads': 123, 'scurring': 124, 'skin_peeling': 125, 'silver_like_dusting': 126, 'small_dents_in_nails': 127, 'inflammatory_nails': 128, 'blister': 129, 'red_sore_around_nose': 130, 'yellow_crust_ooze': 131}
diseases_list = {15: 'Fungal infection', 4: 'Allergy', 16: 'GERD', 9: 'Chronic cholestasis', 14: 'Drug Reaction', 33: 'Peptic ulcer diseae', 1: 'AIDS', 12: 'Diabetes ', 17: 'Gastroenteritis', 6: 'Bronchial Asthma', 23: 'Hypertension ', 30: 'Migraine', 7: 'Cervical spondylosis', 32: 'Paralysis (brain hemorrhage)', 28: 'Jaundice', 29: 'Malaria', 8: 'Chicken pox', 11: 'Dengue', 37: 'Typhoid', 40: 'hepatitis A', 19: 'Hepatitis B', 20: 'Hepatitis C', 21: 'Hepatitis D', 22: 'Hepatitis E', 3: 'Alcoholic hepatitis', 36: 'Tuberculosis', 10: 'Common Cold', 34: 'Pneumonia', 13: 'Dimorphic hemmorhoids(piles)', 18: 'Heart attack', 39: 'Varicose veins', 26: 'Hypothyroidism', 24: 'Hyperthyroidism', 25: 'Hypoglycemia', 31: 'Osteoarthristis', 5: 'Arthritis', 0: '(vertigo) Paroymsal  Positional Vertigo', 2: 'Acne', 38: 'Urinary tract infection', 35: 'Psoriasis', 27: 'Impetigo'}

In [18]:
# model prediction function
def get_predicted_values(patient_symptoms):
    input_vector = np.zeros(len(symptoms_dict))

    for item in patient_symptoms:
        input_vector[symptoms_dict[item]] = 1

    return diseases_list[svc.predict([input_vector])[0]]

In [19]:
symptoms = input("Enter your Symptoms: ")
user_symptoms = [s.strip() for s in symptoms.split(',')]
user_symptoms = [sym.strip("[]'") for sym in user_symptoms] 
predicted_disease = get_predicted_values(user_symptoms)
descr,precau,med,diet,workout = helper(predicted_disease)

print("================Disease================")
print(predicted_disease)
print("==============Description==============")
print(descr)
print("==============precaution===============")
i =1
for pi in precau[0]:
    print(i,": ",pi)
    i+=1
print("=============Medication================")
i=1
for mi in med:
    print(i,": ",mi)
    i+=1

print("================Diet===================")
i=1
for d in diet:
    print(i,": ",d)
    i+=1
print("==============Workout==================")
i=1
for wi in workout:
    print(i,": ",wi)
    i+=1


Enter your Symptoms:  itching,skin_rash


Fungal infection
Fungal infection is a common skin condition caused by fungi.
1 :  bath twice
2 :  use detol or neem in bathing water
3 :  keep infected area dry
4 :  use clean cloths
1 :  ['Antifungal Cream', 'Fluconazole', 'Terbinafine', 'Clotrimazole', 'Ketoconazole']
1 :  ['Antifungal Diet', 'Probiotics', 'Garlic', 'Coconut oil', 'Turmeric']
1 :  Avoid sugary foods
2 :  Consume probiotics
3 :  Increase intake of garlic
4 :  Include yogurt in diet
5 :  Limit processed foods
6 :  Stay hydrated
7 :  Consume green tea
8 :  Eat foods rich in zinc
9 :  Include turmeric in diet
10 :  Eat fruits and vegetables




In [20]:
import sklearn
print(sklearn.__version__)

1.5.1
