# Import Libraries

In [1]:
import numpy as np
import pandas as pd

# Load the dataset

In [2]:
dataset = pd.read_csv("Training.csv")

In [3]:
dataset.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection


In [4]:
dataset.shape

(4920, 133)

In [5]:
len(dataset['prognosis'].unique())

41

In [6]:
dataset['prognosis'].unique()

array(['Fungal infection', 'Allergy', 'GERD', 'Chronic cholestasis',
       'Drug Reaction', 'Peptic ulcer diseae', 'AIDS', 'Diabetes ',
       'Gastroenteritis', 'Bronchial Asthma', 'Hypertension ', 'Migraine',
       'Cervical spondylosis', 'Paralysis (brain hemorrhage)', 'Jaundice',
       'Malaria', 'Chicken pox', 'Dengue', 'Typhoid', 'hepatitis A',
       'Hepatitis B', 'Hepatitis C', 'Hepatitis D', 'Hepatitis E',
       'Alcoholic hepatitis', 'Tuberculosis', 'Common Cold', 'Pneumonia',
       'Dimorphic hemmorhoids(piles)', 'Heart attack', 'Varicose veins',
       'Hypothyroidism', 'Hyperthyroidism', 'Hypoglycemia',
       'Osteoarthristis', 'Arthritis',
       '(vertigo) Paroymsal  Positional Vertigo', 'Acne',
       'Urinary tract infection', 'Psoriasis', 'Impetigo'], dtype=object)

# spliting into train and test sets

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [8]:
X = dataset.drop("prognosis", axis=1 )
y = dataset["prognosis"]

In [9]:
le = LabelEncoder()
le.fit(y)
Y = le.transform(y)

In [10]:
X_train,X_test,y_train,y_test = train_test_split(X,Y,test_size=0.3 , random_state = 20 )

In [11]:
X_train.shape , X_test.shape , y_train.shape , y_test.shape

((3444, 132), (1476, 132), (3444,), (1476,))

 # Training the models

In [12]:
from sklearn.datasets import make_classification
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier , GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB


In [13]:
from sklearn.metrics import accuracy_score , confusion_matrix

In [15]:
# create a dictionary to store different models 
models = {

    "SVC":SVC(kernel='linear'),
    "RandomForest" : RandomForestClassifier(n_estimators=100,random_state=42),
    "GradientBoosting" : GradientBoostingClassifier(n_estimators=100,random_state=42),
    "KNeighbors" : KNeighborsClassifier(n_neighbors=5),
    "MultinomialNB" : MultinomialNB()    
}
for model_name, model in models.items():
    # train model
    model.fit(X_train,y_train)

    # test the model
    predictions = model.predict(X_test)

    # calculate accuracy
    accuracy = accuracy_score(y_test,predictions)

    # calculate the confusion matrix
    cm = confusion_matrix(y_test , predictions )


    print(f"{model_name} accuracy : {accuracy}")
    print(f"{model_name} Confusion Matrix:")
    print(np.array2string(cm,separator=','))
    print()

SVC accuracy : 1.0
SVC Confusion Matrix:
[[40, 0, 0,..., 0, 0, 0],
 [ 0,43, 0,..., 0, 0, 0],
 [ 0, 0,28,..., 0, 0, 0],
 ...,
 [ 0, 0, 0,...,34, 0, 0],
 [ 0, 0, 0,..., 0,41, 0],
 [ 0, 0, 0,..., 0, 0,31]]

RandomForest accuracy : 1.0
RandomForest Confusion Matrix:
[[40, 0, 0,..., 0, 0, 0],
 [ 0,43, 0,..., 0, 0, 0],
 [ 0, 0,28,..., 0, 0, 0],
 ...,
 [ 0, 0, 0,...,34, 0, 0],
 [ 0, 0, 0,..., 0,41, 0],
 [ 0, 0, 0,..., 0, 0,31]]

GradientBoosting accuracy : 1.0
GradientBoosting Confusion Matrix:
[[40, 0, 0,..., 0, 0, 0],
 [ 0,43, 0,..., 0, 0, 0],
 [ 0, 0,28,..., 0, 0, 0],
 ...,
 [ 0, 0, 0,...,34, 0, 0],
 [ 0, 0, 0,..., 0,41, 0],
 [ 0, 0, 0,..., 0, 0,31]]

KNeighbors accuracy : 1.0
KNeighbors Confusion Matrix:
[[40, 0, 0,..., 0, 0, 0],
 [ 0,43, 0,..., 0, 0, 0],
 [ 0, 0,28,..., 0, 0, 0],
 ...,
 [ 0, 0, 0,...,34, 0, 0],
 [ 0, 0, 0,..., 0,41, 0],
 [ 0, 0, 0,..., 0, 0,31]]

MultinomialNB accuracy : 1.0
MultinomialNB Confusion Matrix:
[[40, 0, 0,..., 0, 0, 0],
 [ 0,43, 0,..., 0, 0, 0],
 [ 0, 0,28,..

# Single prediction

In [16]:
svc = SVC(kernel = 'linear')
svc.fit(X_train,y_train)
ypred = svc.predict(X_test)
accuracy_score(y_test,ypred)

1.0

In [17]:
# saving the model
import pickle
pickle.dump(svc,open("svc.pkl",'wb'))

In [18]:
# load model
svc = pickle.load(open("svc.pkl",'rb'))

In [19]:
# test1
print("Predicted Label :", svc.predict(X_test.iloc[0].values.reshape(1,-1)))
print("Actual Label :", y_test[0])

Predicted Label : [40]
Actual Label : 40




In [20]:
# test2
print("Predicted Label :", svc.predict(X_test.iloc[10].values.reshape(1,-1)))
print("Actual Label :", y_test[10])

Predicted Label : [20]
Actual Label : 20




# Load the datasets and use logic for prediction to build medical diagnosis system

In [26]:
symptom_description = pd.read_csv("symtoms_df.csv")
precautions = pd.read_csv("precautions_df.csv")
workout = pd.read_csv("workout_df.csv")

In [27]:
workout

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,disease,workout
0,0,0,Fungal infection,Avoid sugary foods
1,1,1,Fungal infection,Consume probiotics
2,2,2,Fungal infection,Increase intake of garlic
3,3,3,Fungal infection,Include yogurt in diet
4,4,4,Fungal infection,Limit processed foods
...,...,...,...,...
405,405,405,Impetigo,Consult a healthcare professional
406,406,406,Impetigo,Follow medical recommendations
407,407,407,Impetigo,Avoid scratching
408,408,408,Impetigo,Take prescribed antibiotics
