Import Libraries

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.datasets import make_classification
from sklearn.svm import SVC


Read the data set

In [6]:
data = pd.read_csv("realistic_training.csv")

In [7]:
data.head()

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,Arthritis
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Hypothyroidism
2,0,0,0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Common Cold
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Hyperthyroidism
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Heart attack


In [8]:
data.shape


(1000, 133)

In [9]:
data = data.dropna(axis=0, how='any')

In [10]:
data['prognosis'].unique() #number of all unique diseases not repeated

array(['Arthritis', 'Hypothyroidism', 'Common Cold', 'Hyperthyroidism',
       'Heart attack', 'Psoriasis', 'GERD', 'Chicken pox', 'Hypoglycemia',
       'Alcoholic hepatitis', 'Acne', 'Dengue', 'Peptic ulcer disease',
       'Drug Reaction', 'Diabetes ', 'Osteoarthristis',
       'Fungal infection', 'Pneumonia', 'Cervical spondylosis',
       'Gastroenteritis', 'Hepatitis C', 'Bronchial Asthma',
       'Dimorphic hemmorhoids(piles)', 'Allergy', 'AIDS',
       'Varicose veins', 'Hepatitis D', 'Migraine', 'Hypertension ',
       'Typhoid', 'hepatitis A', 'Chronic cholestasis',
       'Urinary tract infection', 'Malaria',
       'Paralysis (brain hemorrhage)',
       '(vertigo) Paroymsal  Positional Vertigo', 'Impetigo',
       'Hepatitis E', 'Jaundice', 'Tuberculosis', 'Hepatitis B'],
      dtype=object)

In [11]:
len(data['prognosis'].unique()) #number of unique diseases not repeated

41

In [12]:
data=data.dropna(axis=0, how='any') #drop rows with any NaN values

Train and Split dataset

"from sklearn.preprocessing import LabelEncoder"
This is for the prognosis columns
we have to encode the output column which is Prognosis

In [13]:
X = data.drop("prognosis", axis=1) #132 input columns in this x
y = data['prognosis']
X.shape, y.shape

((1000, 132), (1000,))

Transform the values of "Prognosis" into the numpy array.

In [14]:
le = LabelEncoder()
le.fit(y)
Y = le.transform(y)
Y


array([ 5, 26, 10, 24, 18, 35, 16,  8, 25, 25, 18,  8, 35,  3,  2, 11, 35,
       24, 10, 33,  3, 14, 12, 12, 31, 15, 34,  7,  7, 17, 18,  5, 20,  6,
       13, 13,  4,  1, 18, 39, 17, 21, 30, 21,  6, 23, 37, 11, 35, 40,  9,
       38, 15, 29,  8, 32, 18, 31, 17, 37,  0, 33, 31, 29, 17, 39,  2, 31,
        3,  0, 13, 11,  9, 31, 18, 10, 23,  6, 12, 14, 18, 38, 31, 30, 21,
        0, 27, 31,  1, 24, 23, 34, 10,  0, 33, 29,  8, 24, 26, 40, 15, 26,
       33, 22,  3,  4, 16, 16,  2, 38,  4,  3, 31, 33,  5, 37, 31, 32, 20,
       29, 24,  4, 13,  9, 32, 34,  8, 16, 28, 20,  8, 32, 28,  8, 25, 34,
        1, 28,  9, 12, 36, 32, 37, 34, 24, 14, 12, 28,  7, 22, 17, 38,  7,
        9, 36, 25, 10, 26, 13, 11, 37, 30, 15,  4, 29, 19,  6, 39, 27, 38,
       17, 34, 21,  8, 29,  7, 19, 31,  8,  2, 25,  4, 30, 14, 16,  7,  9,
       24, 24, 33, 16, 23,  4, 15, 30, 13,  7, 17,  9,  0, 19, 19, 39, 22,
        0,  3, 37, 25, 13,  4, 18, 37, 35, 22,  7, 38,  2, 18, 12, 17, 18,
        5, 25,  4, 28,  2

In [15]:
X_train,X_test,Y_train,Y_test  = train_test_split(X, Y, test_size=0.3, random_state=20)


In [16]:
X_train.shape,X_test.shape,Y_train.shape,Y_test.shape

((700, 132), (300, 132), (700,), (300,))

Training Model (Classification)

In [17]:
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB

from sklearn.metrics import accuracy_score,confusion_matrix

#create a dictionary to store models
models = {
    "SVC":SVC(kernel='linear'),
    "RandomForest":RandomForestClassifier(n_estimators=100,random_state=42),
    "GradientBoosting": GradientBoostingClassifier(n_estimators=100,random_state=42),
    "KNeighbors":KNeighborsClassifier(n_neighbors=5),
    "MultinomialNB":MultinomialNB()
}

for model_name, model in models.items():
    #print(model_name, " : ",model)
    #trin model
    model.fit(X_train,Y_train)

    #test model
    predictions = model.predict(X_test)
    
    #calculate accuracy
    accuracy = accuracy_score(Y_test,predictions)

    #calculate confusion matrix
    cm = confusion_matrix(Y_test,predictions)

    print(f"{model_name} accuracy : {accuracy}")
    print(f"{model_name} Confusion Matrix : ")
    print(np.array2string(cm, separator=', '))


SVC accuracy : 0.9966666666666667
SVC Confusion Matrix : 
[[ 5,  0,  0, ...,  0,  0,  0],
 [ 0,  4,  0, ...,  0,  0,  0],
 [ 0,  0,  5, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 10,  0,  0],
 [ 0,  0,  0, ...,  0,  4,  0],
 [ 0,  0,  0, ...,  0,  0,  9]]
RandomForest accuracy : 0.9966666666666667
RandomForest Confusion Matrix : 
[[ 5,  0,  0, ...,  0,  0,  0],
 [ 0,  4,  0, ...,  0,  0,  0],
 [ 0,  0,  5, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 10,  0,  0],
 [ 0,  0,  0, ...,  0,  4,  0],
 [ 0,  0,  0, ...,  0,  0,  9]]
GradientBoosting accuracy : 0.9966666666666667
GradientBoosting Confusion Matrix : 
[[ 5,  0,  0, ...,  0,  0,  0],
 [ 0,  4,  0, ...,  0,  0,  0],
 [ 0,  0,  5, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, ..., 10,  0,  0],
 [ 0,  0,  0, ...,  0,  4,  0],
 [ 0,  0,  0, ...,  0,  0,  8]]
KNeighbors accuracy : 0.9966666666666667
KNeighbors Confusion Matrix : 
[[ 5,  0,  0, ...,  0,  0,  0],
 [ 0,  4,  0, ...,  0,  0,  0],
 [ 0,  0,  5, ...,  0,  0,  0],
 ...,
 [ 0,  0,  0, 

Single Prediction

In [18]:
svc = SVC(kernel='linear')
svc.fit(X_train,Y_train)
ypred = svc.predict(X_test)
accuracy_score(Y_test,ypred)

0.9966666666666667

In [19]:
#saving models write bind
import pickle
pickle.dump(svc,open("svc.pkl",'wb'))

In [20]:
#load model read bind
svc = pickle.load(open("svc.pkl","rb"))

In [21]:
#test 1
print("Predicted Label : ", svc.predict(X_test.iloc[0].values.reshape(1,-1)))
print("Actual Label : ",Y_test[0])

Predicted Label :  [33]
Actual Label :  33




In [22]:
#test 2
print("Predicted Label : ", svc.predict(X_test.iloc[10].values.reshape(1,-1)))
print("Actual Label : ",Y_test[10])

Predicted Label :  [24]
Actual Label :  24




load dataset of symtomps description

In [23]:
# sym_des = pd.read_csv("symtoms_df.csv")
precautions = pd.read_csv("updated_precautions.csv")
workout = pd.read_csv("updated_workouts.csv")
description = pd.read_csv("updated_description.csv")
medication = pd.read_csv("medications_clean.csv")
diets = pd.read_csv("updated_diets.csv")

In [24]:
# columns_dict = {column: index for index, column in enumerate(sym_des.columns)}

# # Print the dictionary
# print(columns_dict)

In [25]:
def helper(dis):
    # Description (as a string)
    desc_row = description[description['Disease'] == dis]['Description']
    desc = desc_row.iloc[0] if not desc_row.empty else ""

    # Precautions (correctly handled as a list of strings)
    pre_row = precautions[precautions['Disease'] == dis][['Precaution_1', 'Precaution_2', 'Precaution_3', 'Precaution_4']]
    pre = pre_row.iloc[0].tolist() if not pre_row.empty else []



    # Medications (as a list)
    med_row = medication[medication['Disease'] == dis]['Medication']
    med = [med_row.iloc[0]] if not med_row.empty else []

    # Diet (as a list)
    die_row = diets[diets['Disease'] == dis]['Diet']
    die = [die_row.iloc[0]] if not die_row.empty else []

    # Workout (as a list, not string!)
    wrkout_row = workout[workout['Disease'] == dis]['Workout']
    wrkout = [wrkout_row.iloc[0]] if not wrkout_row.empty else []

    return desc, pre, med, die, wrkout


symptoms_dict={'itching': 0, 'skin_rash': 1, 'nodal_skin_eruptions': 2, 'continuous_sneezing': 3, 'shivering': 4, 'chills': 5, 'joint_pain': 6, 'stomach_pain': 7, 'acidity': 8, 'ulcers_on_tongue': 9, 'muscle_wasting': 10, 'vomiting': 11, 'burning_micturition': 12, 'spotting_ urination': 13, 'fatigue': 14, 'weight_gain': 15, 'anxiety': 16, 'cold_hands_and_feets': 17, 'mood_swings': 18, 'weight_loss': 19, 'restlessness': 20, 'lethargy': 21, 'patches_in_throat': 22, 'irregular_sugar_level': 23, 'cough': 24, 'high_fever': 25, 'sunken_eyes': 26, 'breathlessness': 27, 'sweating': 28, 'dehydration': 29, 'indigestion': 30, 'headache': 31, 'yellowish_skin': 32, 'dark_urine': 33, 'nausea': 34, 'loss_of_appetite': 35, 'pain_behind_the_eyes': 36, 'back_pain': 37, 'constipation': 38, 'abdominal_pain': 39, 'diarrhoea': 40, 'mild_fever': 41, 'yellow_urine': 42, 'yellowing_of_eyes': 43, 'acute_liver_failure': 44, 'fluid_overload': 45, 'swelling_of_stomach': 46, 'swelled_lymph_nodes': 47, 'malaise': 48, 'blurred_and_distorted_vision': 49, 'phlegm': 50, 'throat_irritation': 51, 'redness_of_eyes': 52, 'sinus_pressure': 53, 'runny_nose': 54, 'congestion': 55, 'chest_pain': 56, 'weakness_in_limbs': 57, 'fast_heart_rate': 58, 'pain_during_bowel_movements': 59, 'pain_in_anal_region': 60, 'bloody_stool': 61, 'irritation_in_anus': 62, 'neck_pain': 63, 'dizziness': 64, 'cramps': 65, 'bruising': 66, 'obesity': 67, 'swollen_legs': 68, 'swollen_blood_vessels': 69, 'puffy_face_and_eyes': 70, 'enlarged_thyroid': 71, 'brittle_nails': 72, 'swollen_extremeties': 73, 'excessive_hunger': 74, 'extra_marital_contacts': 75, 'drying_and_tingling_lips': 76, 'slurred_speech': 77, 'knee_pain': 78, 'hip_joint_pain': 79, 'muscle_weakness': 80, 'stiff_neck': 81, 'swelling_joints': 82, 'movement_stiffness': 83, 'spinning_movements': 84, 'loss_of_balance': 85, 'unsteadiness': 86, 'weakness_of_one_body_side': 87, 'loss_of_smell': 88, 'bladder_discomfort': 89, 'foul_smell_of urine': 90, 'continuous_feel_of_urine': 91, 'passage_of_gases': 92, 'internal_itching': 93, 'toxic_look_(typhos)': 94, 'depression': 95, 'irritability': 96, 'muscle_pain': 97, 'altered_sensorium': 98, 'red_spots_over_body': 99, 'belly_pain': 100, 'abnormal_menstruation': 101, 'dischromic _patches': 102, 'watering_from_eyes': 103, 'increased_appetite': 104, 'polyuria': 105, 'family_history': 106, 'mucoid_sputum': 107, 'rusty_sputum': 108, 'lack_of_concentration': 109, 'visual_disturbances': 110, 'receiving_blood_transfusion': 111, 'receiving_unsterile_injections': 112, 'coma': 113, 'stomach_bleeding': 114, 'distention_of_abdomen': 115, 'history_of_alcohol_consumption': 116, 'fluid_overload.1': 117, 'blood_in_sputum': 118, 'prominent_veins_on_calf': 119, 'palpitations': 120, 'painful_walking': 121, 'pus_filled_pimples': 122, 'blackheads': 123, 'scurring': 124, 'skin_peeling': 125, 'silver_like_dusting': 126, 'small_dents_in_nails': 127, 'inflammatory_nails': 128, 'blister': 129, 'red_sore_around_nose': 130, 'yellow_crust_ooze': 131}
diseases_list={15: 'Fungal infection',4: 'Allergy',16:  'GERD',9:  'Chronic cholestasis',14: 'Drug Reaction',33: 'Peptic ulcer disease',1: 'AIDS',12: 'Diabetes ',17: 'Gastroenteritis',6: 'Bronchial Asthma',23:  'Hypertension ',30: 'Migraine',7: 'Cervical spondylosis',32: 'Paralysis (brain hemorrhage)',28: 'Jaundice',29: 'Malaria',8: 'Chicken pox',11: 'Dengue',37: 'Typhoid',40: 'hepatitis A',19:'Hepatitis B',20: 'Hepatitis C',21: 'Hepatitis D',22: 'Hepatitis E',3:'Alcoholic hepatitis',36: 'Tuberculosis',10: 'Common Cold',34: 'Pneumonia',13:'Dimorphic hemmorhoids(piles)',18: 'Heart attack',39: 'Varicose veins',26:'Hypothyroidism',24: 'Hyperthyroidism',25:  'Hypoglycemia',31: 'Osteoarthristis',5:  'Arthritis',0:'(vertigo) Paroymsal  Positional Vertigo',2: 'Acne',38:'Urinary tract infection',35: 'Psoriasis',27: 'Impetigo'}

def get_predicted_value(patient_symptoms):
    input_vector=np.zeros(len(symptoms_dict))

    for item in patient_symptoms:
        input_vector[symptoms_dict[item]]=1
    return diseases_list[svc.predict([input_vector])[0]]

In [26]:
symptoms=input("Enter your symptoms: ")
print(symptoms)




In [27]:
# Ensure the cell containing the definition of `get_predicted_value` (CELL INDEX: 26) is executed before running this cell.
user_symptoms=[s.strip() for s in symptoms.split(',')]
user_symptoms=[sym.strip("[]' ") for sym in user_symptoms]
predicted_disease=get_predicted_value(user_symptoms)
desc,pre,med,die,wrkout=helper(predicted_disease)

KeyError: ''

In [None]:
print("*************Predicted Disease*****************")
print(predicted_disease)

print("*************Descrtiption**********************")
print(desc)

print("************* Precaution**********************")
if pre:
    for i, p_i in enumerate(pre, 1):
        print(i, ":", p_i)
else:
    print("No precautions found.")

print("************* Medications**********************")
i=1
for m_i in med:
    print(i,":",m_i)
    i=i+1

print("************* Workout**********************")
i=1
for w_i in wrkout:
    print(i,":",w_i)
    i=i+1

print("************* Diet**********************")
i=1
for d_i in die:
    print(i,":",d_i)
    i=i+1

*************Predicted Disease*****************
Peptic ulcer disease
*************Descrtiption**********************
Peptic ulcer disease involves open sores in the stomach or duodenal lining caused by acid and Helicobacter pylori, presenting with burning epigastric pain and potential bleeding.
************* Precaution**********************
No precautions found.
************* Medications**********************
1 : Antibiotics, Proton Pump Inhibitors (PPIs), H2 Blockers, Antacids, Cytoprotective agents
************* Workout**********************
1 : Opt for gentle exercises like walking or light yoga; avoid core-intensive or high-impact workouts that strain the abdomen.
************* Diet**********************
1 : Consume small, frequent meals of bland foods like oatmeal, bananas, mashed potatoes, and yogurt; avoid spicy foods, caffeine, alcohol, and acidic items.


In [None]:
# import joblib
# joblib.dump(svc, 'model.pkl')