In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split,KFold,cross_val_score,GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix,classification_report,confusion_matrix,precision_score,roc_curve
import seaborn as sns
from sklearn.utils import shuffle
# from pandas_profiling import ProfileReport
from sklearn.linear_model import LogisticRegression, Perceptron, RidgeClassifier, SGDClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier 
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier, VotingClassifier 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics

**Read and shuffle the dataset**

In [2]:
df = pd.read_csv('/kaggle/input/disease-symptom-description-dataset/dataset.csv')
df = shuffle(df,random_state=42)
df.head()

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
373,Acne,skin_rash,blackheads,scurring,,,,,,,,,,,,,,
4916,Acne,skin_rash,pus_filled_pimples,blackheads,scurring,,,,,,,,,,,,,
1550,Hyperthyroidism,fatigue,mood_swings,weight_loss,restlessness,sweating,diarrhoea,fast_heart_rate,excessive_hunger,muscle_weakness,irritability,abnormal_menstruation,,,,,,
3081,AIDS,muscle_wasting,patches_in_throat,high_fever,extra_marital_contacts,,,,,,,,,,,,,
3857,Chronic cholestasis,itching,vomiting,yellowish_skin,nausea,loss_of_appetite,abdominal_pain,yellowing_of_eyes,,,,,,,,,,


**Removing Hyphen from strings**

In [3]:
for col in df.columns:
    
    df[col] = df[col].str.replace('_',' ')
df.head()

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
373,Acne,skin rash,blackheads,scurring,,,,,,,,,,,,,,
4916,Acne,skin rash,pus filled pimples,blackheads,scurring,,,,,,,,,,,,,
1550,Hyperthyroidism,fatigue,mood swings,weight loss,restlessness,sweating,diarrhoea,fast heart rate,excessive hunger,muscle weakness,irritability,abnormal menstruation,,,,,,
3081,AIDS,muscle wasting,patches in throat,high fever,extra marital contacts,,,,,,,,,,,,,
3857,Chronic cholestasis,itching,vomiting,yellowish skin,nausea,loss of appetite,abdominal pain,yellowing of eyes,,,,,,,,,,


**Dataset characteristics**

In [4]:
df.describe()

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
count,4920,4920,4920,4920,4572,3714,2934,2268,1944,1692,1512,1194,744,504,306,240,192,72
unique,41,34,48,54,50,38,32,26,21,22,21,18,11,8,4,3,3,1
top,Acne,vomiting,vomiting,fatigue,high fever,headache,nausea,abdominal pain,abdominal pain,yellowing of eyes,yellowing of eyes,irritability,malaise,stomach bleeding,chest pain,chest pain,loss of smell,muscle pain
freq,120,822,870,726,378,348,390,264,276,228,198,120,126,72,96,144,72,72


**Check for null and NaN values**

In [5]:
null_checker = df.apply(lambda x: sum(x.isnull())).to_frame(name='count')
print(null_checker)

            count
Disease         0
Symptom_1       0
Symptom_2       0
Symptom_3       0
Symptom_4     348
Symptom_5    1206
Symptom_6    1986
Symptom_7    2652
Symptom_8    2976
Symptom_9    3228
Symptom_10   3408
Symptom_11   3726
Symptom_12   4176
Symptom_13   4416
Symptom_14   4614
Symptom_15   4680
Symptom_16   4728
Symptom_17   4848


**Remove the trailing space from the symptom columns**

In [6]:
cols = df.columns
data = df[cols].values.flatten()

s = pd.Series(data)
s = s.str.strip()
s = s.values.reshape(df.shape)

df = pd.DataFrame(s, columns=df.columns)
df.head()

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,Acne,skin rash,blackheads,scurring,,,,,,,,,,,,,,
1,Acne,skin rash,pus filled pimples,blackheads,scurring,,,,,,,,,,,,,
2,Hyperthyroidism,fatigue,mood swings,weight loss,restlessness,sweating,diarrhoea,fast heart rate,excessive hunger,muscle weakness,irritability,abnormal menstruation,,,,,,
3,AIDS,muscle wasting,patches in throat,high fever,extra marital contacts,,,,,,,,,,,,,
4,Chronic cholestasis,itching,vomiting,yellowish skin,nausea,loss of appetite,abdominal pain,yellowing of eyes,,,,,,,,,,


**Fill the NaN values with zero**

In [7]:
df = df.fillna(0)
df.head()

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,Acne,skin rash,blackheads,scurring,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Acne,skin rash,pus filled pimples,blackheads,scurring,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Hyperthyroidism,fatigue,mood swings,weight loss,restlessness,sweating,diarrhoea,fast heart rate,excessive hunger,muscle weakness,irritability,abnormal menstruation,0,0,0,0,0,0
3,AIDS,muscle wasting,patches in throat,high fever,extra marital contacts,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Chronic cholestasis,itching,vomiting,yellowish skin,nausea,loss of appetite,abdominal pain,yellowing of eyes,0,0,0,0,0,0,0,0,0,0


In [8]:
df1 = pd.read_csv('/kaggle/input/disease-symptom-description-dataset/Symptom-severity.csv')
x=df1['Symptom']
x

0                   itching
1                 skin_rash
2      nodal_skin_eruptions
3       continuous_sneezing
4                 shivering
               ...         
128      inflammatory_nails
129                 blister
130    red_sore_around_nose
131       yellow_crust_ooze
132               prognosis
Name: Symptom, Length: 133, dtype: object

In [9]:
dfx=pd.DataFrame()
dfx["Disease"]=df["Disease"]
y=0
dfx[x]=0
for index, row in df.iterrows():
    for symptom in df.columns[1:]:
        if row[symptom] != 0:
            dfx.loc[index, row[symptom]] = 1
dfx = dfx.fillna(0)
dfx[dfx.columns[1:]]=dfx[dfx.columns[1:]].astype('int')

  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx[x]=0
  dfx.loc[index, row[symptom]] = 1
  dfx.loc[index, row[symptom]] = 1
  dfx.loc[index, row[symptom]] = 1
  dfx.loc[index, row[symptom]] = 1
  dfx.loc[index, row[symptom]] = 1
  dfx.loc[index, row[symptom]] = 1
  dfx.loc[index, row[symptom]] = 1
  dfx.loc[index, row[symptom]] = 1
  dfx.loc[index, row[symptom]] = 1
  dfx.loc[index, row[symptom]] = 1
  dfx.loc[index, row[symptom]] = 1
  dfx.loc[index, row[symptom]] = 1
  dfx.loc[index, row[symptom]] = 1
  dfx.loc[index, row[symptom]] = 1
  dfx.loc[index, row[symptom]] = 1
  dfx.loc[index, row[symptom]] = 1
  dfx.loc[index, row[symptom]] = 1
  dfx.loc[index, row[symptom]] = 1
  dfx.l

In [10]:
dfx.columns = dfx.columns.str.strip()

In [11]:
dfx

Unnamed: 0,Disease,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,...,yellow crust ooze,ulcers on tongue,spotting urination,pain behind the eyes,red spots over body,internal itching,movement stiffness,knee pain,hip joint pain,dischromic patches
0,Acne,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Acne,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Hyperthyroidism,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,AIDS,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Chronic cholestasis,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,Psoriasis,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4916,Peptic ulcer diseae,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
4917,Dengue,0,0,0,0,0,1,0,0,0,...,0,0,0,1,1,0,0,0,0,0
4918,Fungal infection,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [12]:

dfx.drop(dfx.columns[-4:], axis=1, inplace=True)
dfx

Unnamed: 0,Disease,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,...,weakness of one body side,altered sensorium,nodal skin eruptions,red sore around nose,yellow crust ooze,ulcers on tongue,spotting urination,pain behind the eyes,red spots over body,internal itching
0,Acne,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Acne,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Hyperthyroidism,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,AIDS,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Chronic cholestasis,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,Psoriasis,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4916,Peptic ulcer diseae,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4917,Dengue,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,1,1,0
4918,Fungal infection,1,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0


In [13]:
columns_to_drop = ['foul_smell_ofurine', 'dischromic_patches', 'spotting_urination']
dfx = dfx.drop(columns=columns_to_drop)


In [14]:

dfx[dfx.columns[1:]].sum(axis=0).sort_values()

swelling_joints        0
muscle_weakness        0
hip_joint_pain         0
knee_pain              0
slurred_speech         0
                    ... 
nausea              1146
loss of appetite    1152
high fever          1362
vomiting            1914
fatigue             1932
Length: 222, dtype: int64

In [15]:
y=df['Disease'].unique()
y

array(['Acne', 'Hyperthyroidism', 'AIDS', 'Chronic cholestasis',
       'Hypertension', 'Hypoglycemia', 'Arthritis', 'Hepatitis B',
       'Migraine', 'Urinary tract infection', 'Diabetes', 'Hepatitis D',
       'Psoriasis', 'Alcoholic hepatitis', 'Dimorphic hemmorhoids(piles)',
       'Hepatitis E', 'Cervical spondylosis', 'Bronchial Asthma',
       'hepatitis A', 'Allergy', 'Hepatitis C', 'Pneumonia',
       'Hypothyroidism', 'Gastroenteritis', 'Varicose veins', 'Jaundice',
       'Drug Reaction', '(vertigo) Paroymsal  Positional Vertigo',
       'Heart attack', 'Tuberculosis', 'Typhoid', 'Common Cold',
       'Peptic ulcer diseae', 'Paralysis (brain hemorrhage)',
       'Fungal infection', 'Impetigo', 'GERD', 'Dengue', 'Malaria',
       'Chicken pox', 'Osteoarthristis'], dtype=object)

In [16]:
data = dfx.iloc[:,1:].values
labels = dfx['Disease'].values

In [17]:
x_train, x_test, y_train, y_test = train_test_split(data, labels, train_size = 0.7,random_state=42)
x_train, x_val, y_train,y_val=train_test_split(data,labels,test_size=0.3,random_state=42)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape, x_val.shape,y_val.shape)

(3444, 222) (1476, 222) (3444,) (1476,) (1476, 222) (1476,)


In [18]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)
y_val=le.transform(y_val)


In [19]:
y=le.classes_
y

array(['(vertigo) Paroymsal  Positional Vertigo', 'AIDS', 'Acne',
       'Alcoholic hepatitis', 'Allergy', 'Arthritis', 'Bronchial Asthma',
       'Cervical spondylosis', 'Chicken pox', 'Chronic cholestasis',
       'Common Cold', 'Dengue', 'Diabetes',
       'Dimorphic hemmorhoids(piles)', 'Drug Reaction',
       'Fungal infection', 'GERD', 'Gastroenteritis', 'Heart attack',
       'Hepatitis B', 'Hepatitis C', 'Hepatitis D', 'Hepatitis E',
       'Hypertension', 'Hyperthyroidism', 'Hypoglycemia',
       'Hypothyroidism', 'Impetigo', 'Jaundice', 'Malaria', 'Migraine',
       'Osteoarthristis', 'Paralysis (brain hemorrhage)',
       'Peptic ulcer diseae', 'Pneumonia', 'Psoriasis', 'Tuberculosis',
       'Typhoid', 'Urinary tract infection', 'Varicose veins',
       'hepatitis A'], dtype=object)

In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.metrics import f1_score, roc_auc_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
import pickle
# Assuming you've already loaded your dataset into DataFrames x_train, x_test, x_val, y_train, y_test, y_val

classifiers = {
    'Random Forest': RandomForestClassifier(),
    'XGBoost': XGBClassifier(),
    'LightGBM': LGBMClassifier(verbose=-1),
    'CatBoost': CatBoostClassifier(silent=True),
    'GradientBoost': GradientBoostingClassifier(),
    'ExtraTrees': ExtraTreesClassifier()
}

# Define the K-fold Cross Validator
kfold = KFold(n_splits=10, shuffle=True, random_state=1)

# K-fold Cross Validation model evaluation
for name, clf in classifiers.items():
    cv_scores = cross_val_score(clf, x_train, y_train, cv=kfold, scoring='f1_weighted')
    print(f'{name} cross-validation mean F1 score: %.3f' % cv_scores.mean())
    
    # Train and test each classifier
    clf.fit(x_train, y_train)
    
    test_predictions = clf.predict(x_test)
    test_f1 = f1_score(y_test, test_predictions, average='weighted')
    test_roc = roc_auc_score(y_test, clf.predict_proba(x_test), multi_class='ovr')
    print(f'{name} test F1 Score: {test_f1:.4f}, AUC-ROC Score: {test_roc:.4f}')
    
    val_predictions = clf.predict(x_val)
    val_f1 = f1_score(y_val, val_predictions, average='weighted')
    val_roc = roc_auc_score(y_val, clf.predict_proba(x_val), multi_class='ovr')
    print(f'{name} validation F1 Score: {val_f1:.4f}, AUC-ROC Score: {val_roc:.4f}')
    pickle.dump(clf, open(f"{name}", "wb"))


Random Forest cross-validation mean F1 score: 1.000
Random Forest test F1 Score: 1.0000, AUC-ROC Score: 1.0000
Random Forest validation F1 Score: 1.0000, AUC-ROC Score: 1.0000
XGBoost cross-validation mean F1 score: 0.999
XGBoost test F1 Score: 1.0000, AUC-ROC Score: 1.0000
XGBoost validation F1 Score: 1.0000, AUC-ROC Score: 1.0000
LightGBM cross-validation mean F1 score: 0.999
LightGBM test F1 Score: 1.0000, AUC-ROC Score: 1.0000
LightGBM validation F1 Score: 1.0000, AUC-ROC Score: 1.0000
CatBoost test F1 Score: 1.0000, AUC-ROC Score: 1.0000
CatBoost validation F1 Score: 1.0000, AUC-ROC Score: 1.0000
GradientBoost cross-validation mean F1 score: 0.999
GradientBoost test F1 Score: 1.0000, AUC-ROC Score: 1.0000
GradientBoost validation F1 Score: 1.0000, AUC-ROC Score: 1.0000
ExtraTrees cross-validation mean F1 score: 1.000
ExtraTrees test F1 Score: 1.0000, AUC-ROC Score: 1.0000
ExtraTrees validation F1 Score: 1.0000, AUC-ROC Score: 1.0000


In [None]:
desc = pd.read_csv("/kaggle/input/disease-symptom-description-dataset/symptom_Description.csv")

In [23]:
desc.head()

Unnamed: 0,Disease,Description
0,Drug Reaction,An adverse drug reaction (ADR) is an injury ca...
1,Malaria,An infectious disease caused by protozoan para...
2,Allergy,An allergy is an immune system response to a f...
3,Hypothyroidism,"Hypothyroidism, also called underactive thyroi..."
4,Psoriasis,Psoriasis is a common skin disorder that forms...


In [24]:
prec = pd.read_csv("/kaggle/input/disease-symptom-description-dataset/symptom_precaution.csv")

In [25]:
prec.head()

Unnamed: 0,Disease,Precaution_1,Precaution_2,Precaution_3,Precaution_4
0,Drug Reaction,stop irritation,consult nearest hospital,stop taking drug,follow up
1,Malaria,Consult nearest hospital,avoid oily food,avoid non veg food,keep mosquitos out
2,Allergy,apply calamine,cover area with bandage,,use ice to compress itching
3,Hypothyroidism,reduce stress,exercise,eat healthy,get proper sleep
4,Psoriasis,wash hands with warm soapy water,stop bleeding using pressure,consult doctor,salt baths


In [26]:
def predd(m, X):
    # Get probabilities for each class
    proba = m.predict_proba(X)

    # Get the indices and probabilities of the top 5 classes
    top5_idx = np.argsort(proba[0])[-5:][::-1]
    top5_proba = np.sort(proba[0])[-5:][::-1]

    # Get the names of the top 5 diseases
    top5_diseases = y[top5_idx]

    for i in range(5):
        
        disease = top5_diseases[i]
        probability = top5_proba[i]
        # print(f"{disease}={probability}" )
        
        print("Disease Name: ", disease)
        print("Probability: ", probability)
        if(disease in desc["Disease"].unique()):
            disp = desc[desc['Disease'] == disease]
            disp = disp.values[0][1]
            print("Disease Description: ", disp)
        
        if(disease in prec["Disease"].unique()):
            c = np.where(prec['Disease'] == disease)[0][0]
            precuation_list = []
            for j in range(1, len(prec.iloc[c])):
                precuation_list.append(prec.iloc[c, j])
            print("Recommended Things to do at home: ")
            for precaution in precuation_list:
                print(precaution)
        
        print("\n")


In [27]:
prec

Unnamed: 0,Disease,Precaution_1,Precaution_2,Precaution_3,Precaution_4
0,Drug Reaction,stop irritation,consult nearest hospital,stop taking drug,follow up
1,Malaria,Consult nearest hospital,avoid oily food,avoid non veg food,keep mosquitos out
2,Allergy,apply calamine,cover area with bandage,,use ice to compress itching
3,Hypothyroidism,reduce stress,exercise,eat healthy,get proper sleep
4,Psoriasis,wash hands with warm soapy water,stop bleeding using pressure,consult doctor,salt baths
5,GERD,avoid fatty spicy food,avoid lying down after eating,maintain healthy weight,exercise
6,Chronic cholestasis,cold baths,anti itch medicine,consult doctor,eat healthy
7,hepatitis A,Consult nearest hospital,wash hands through,avoid fatty spicy food,medication
8,Osteoarthristis,acetaminophen,consult nearest hospital,follow up,salt baths
9,(vertigo) Paroymsal Positional Vertigo,lie down,avoid sudden change in body,avoid abrupt head movment,relax


In [28]:
x=dfx.columns[1:]


In [29]:
x


Index(['itching', 'skin_rash', 'nodal_skin_eruptions', 'continuous_sneezing',
       'shivering', 'chills', 'joint_pain', 'stomach_pain', 'acidity',
       'ulcers_on_tongue',
       ...
       'weakness of one body side', 'altered sensorium',
       'nodal skin eruptions', 'red sore around nose', 'yellow crust ooze',
       'ulcers on tongue', 'spotting  urination', 'pain behind the eyes',
       'red spots over body', 'internal itching'],
      dtype='object', length=222)

In [30]:
y

array(['(vertigo) Paroymsal  Positional Vertigo', 'AIDS', 'Acne',
       'Alcoholic hepatitis', 'Allergy', 'Arthritis', 'Bronchial Asthma',
       'Cervical spondylosis', 'Chicken pox', 'Chronic cholestasis',
       'Common Cold', 'Dengue', 'Diabetes',
       'Dimorphic hemmorhoids(piles)', 'Drug Reaction',
       'Fungal infection', 'GERD', 'Gastroenteritis', 'Heart attack',
       'Hepatitis B', 'Hepatitis C', 'Hepatitis D', 'Hepatitis E',
       'Hypertension', 'Hyperthyroidism', 'Hypoglycemia',
       'Hypothyroidism', 'Impetigo', 'Jaundice', 'Malaria', 'Migraine',
       'Osteoarthristis', 'Paralysis (brain hemorrhage)',
       'Peptic ulcer diseae', 'Pneumonia', 'Psoriasis', 'Tuberculosis',
       'Typhoid', 'Urinary tract infection', 'Varicose veins',
       'hepatitis A'], dtype=object)

In [31]:
import pickle

In [35]:
t=pd.Series([0]*222, index=x)
m=ExtraTreesClassifier()
with open("ExtraTrees", 'rb') as f:
    m =  pickle.load(f)
t.loc["chest_pain"]=0
t.loc["phlegm"]=1
t.loc["runny_nose"]=1
t.loc["high_fever"]=1
t.loc["throat_irritation"]=1
t.loc["congestion"]=1
t.loc["redness_of_eyes"]=1
t=t.to_numpy()
print(t.shape)
t=t.reshape(1,-1)
predd(m,t)


<_io.BufferedReader name='ExtraTrees'>
(222,)
Disease Name:  Pneumonia
Probability:  0.14
Disease Description:  Pneumonia is an infection in one or both lungs. Bacteria, viruses, and fungi cause it. The infection causes inflammation in the air sacs in your lungs, which are called alveoli. The alveoli fill with fluid or pus, making it difficult to breathe.
Recommended Things to do at home: 
consult doctor
medication
rest
follow up


Disease Name:  Common Cold
Probability:  0.11
Disease Description:  The common cold is a viral infection of your nose and throat (upper respiratory tract). It's usually harmless, although it might not feel that way. Many types of viruses can cause a common cold.
Recommended Things to do at home: 
drink vitamin c rich drinks
take vapour
avoid cold food
keep fever in check


Disease Name:  Fungal infection
Probability:  0.1
Disease Description:  In humans, fungal infections occur when an invading fungus takes over an area of the body and is too much for the im

ExtraTreesClassifier()


# WEBAPP

from flask import Flask, request, jsonify
import pickle
import numpy as np
import pandas as pd

# Load the model
model = pickle.load(open('ExtraTrees_1_000.pkl', 'rb'))

# Define the symptoms
diseases = [
    '(vertigo) Paroymsal  Positional Vertigo', 'AIDS', 'Acne', 'Alcoholic hepatitis', 'Allergy', 
    'Arthritis', 'Bronchial Asthma', 'Cervical spondylosis', 'Chicken pox', 'Chronic cholestasis', 
    'Common Cold', 'Dengue', 'Diabetes', 'Dimorphic hemmorhoids(piles)', 'Drug Reaction', 
    'Fungal infection', 'GERD', 'Gastroenteritis', 'Heart attack', 'Hepatitis B', 'Hepatitis C', 
    'Hepatitis D', 'Hepatitis E', 'Hypertension', 'Hyperthyroidism', 'Hypoglycemia', 'Hypothyroidism', 
    'Impetigo', 'Jaundice', 'Malaria', 'Migraine', 'Osteoarthristis', 'Paralysis (brain hemorrhage)', 
    'Peptic ulcer diseae', 'Pneumonia', 'Psoriasis', 'Tuberculosis', 'Typhoid', 
    'Urinary tract infection', 'Varicose veins', 'hepatitis A'
]

symptoms = [ 'itching', 'skin_rash', 'nodal_skin_eruptions', 'continuous_sneezing', 'shivering', 'chills', 'joint_pain', 'stomach_pain', 'acidity', 'ulcers_on_tongue', 'muscle_wasting', 'vomiting', 'burning_micturition', 'fatigue', 'weight_gain', 'anxiety', 'cold_hands_and_feets', 'mood_swings', 'weight_loss', 'restlessness', 'lethargy', 'patches_in_throat', 'irregular_sugar_level', 'cough', 'high_fever', 'sunken_eyes', 'breathlessness', 'sweating', 'dehydration', 'indigestion', 'headache', 'yellowish_skin', 'dark_urine', 'nausea', 'loss_of_appetite', 'pain_behind_the_eyes', 'back_pain', 'constipation', 'abdominal_pain', 'diarrhoea', 'mild_fever', 'yellow_urine', 'yellowing_of_eyes', 'acute_liver_failure', 'fluid_overload', 'swelling_of_stomach', 'swelled_lymph_nodes', 'malaise', 'blurred_and_distorted_vision', 'phlegm', 'throat_irritation', 'redness_of_eyes', 'sinus_pressure', 'runny_nose', 'congestion', 'chest_pain', 'weakness_in_limbs', 'fast_heart_rate', 'pain_during_bowel_movements', 'pain_in_anal_region', 'bloody_stool', 'irritation_in_anus', 'neck_pain', 'dizziness', 'cramps', 'bruising', 'obesity', 'swollen_legs', 'swollen_blood_vessels', 'puffy_face_and_eyes', 'enlarged_thyroid', 'brittle_nails', 'swollen_extremeties', 'excessive_hunger', 'extra_marital_contacts', 'drying_and_tingling_lips', 'slurred_speech', 'knee_pain', 'hip_joint_pain', 'muscle_weakness', 'stiff_neck', 'swelling_joints', 'movement_stiffness', 'spinning_movements', 'loss_of_balance', 'unsteadiness', 'weakness_of_one_body_side', 'loss_of_smell', 'bladder_discomfort', 'continuous_feel_of_urine', 'passage_of_gases', 'internal_itching', 'toxic_look_(typhos)', 'depression', 'irritability', 'muscle_pain', 'altered_sensorium', 'red_spots_over_body', 'belly_pain', 'abnormal_menstruation', 'watering_from_eyes', 'increased_appetite', 'polyuria', 'family_history', 'mucoid_sputum', 'rusty_sputum', 'lack_of_concentration', 'visual_disturbances', 'receiving_blood_transfusion', 'receiving_unsterile_injections', 'coma', 'stomach_bleeding', 'distention_of_abdomen', 'history_of_alcohol_consumption', 'blood_in_sputum', 'prominent_veins_on_calf', 'palpitations', 'painful_walking', 'pus_filled_pimples', 'blackheads', 'scurring', 'skin_peeling', 'silver_like_dusting', 'small_dents_in_nails', 'inflammatory_nails', 'blister', 'red_sore_around_nose', 'yellow_crust_ooze']


print(len(symptoms))
desc=pd.read_csv("/kaggle/input/disease-symptom-description-dataset/symptom_Description.csv")
prec=pd.read_csv("/kaggle/input/disease-symptom-description-dataset/symptom_precaution.csv")
app = Flask(__name__)

@app.route('/', methods=["GET"])
def home():
    return app.send_static_file('index.html')

@app.route('/predict', methods=['POST'])
def predict():
    # Get the data from the POST request
    data = request.get_json(force=True)

    # Create a list of zeros
    features = [0]*len(symptoms)

    # Set the corresponding indices to 1 for the symptoms present in the data
    for symptom in data:
        if symptom in symptoms:
            index = symptoms.index(symptom)
            features[index] = 1

    # Make prediction using the model
    proba = model.predict_proba([features])

    # Get the indices and probabilities of the top 5 classes
    top5_idx = np.argsort(proba[0])[-5:][::-1]
    top5_proba = np.sort(proba[0])[-5:][::-1]

    # Get the names of the top 5 diseases
    top5_diseases = [diseases[i] for i in top5_idx]

    # Prepare the response
    response = []
    for i in range(5):
        disease = top5_diseases[i]
        probability = top5_proba[i]
        
        # Get the disease description
        disp = desc[desc['Disease'] == disease].values[0][1] if disease in desc["Disease"].unique() else "No description available"
        
        # Get the precautions
        precautions = []
        if disease in prec["Disease"].unique():
            c = np.where(prec['Disease'] == disease)[0][0]
            for j in range(1, len(prec.iloc[c])):
                precautions.append(prec.iloc[c, j])
        
        # Add the disease prediction to the response
        response.append({
            'disease': disease,
            'probability': float(probability),
            'description': disp,
            'precautions': precautions
        })

    # Send back to the client
    return jsonify(response)

if __name__ == '__main__':
    app.run(port=5000, debug=True)


## PUT THIS IN HTML FILE

<!DOCTYPE html>
<html>
<head>
    <title>Medical Diagnosis App</title>
    <link rel="stylesheet" href="https://code.jquery.com/ui/1.12.1/themes/base/jquery-ui.css">
    <script src="https://code.jquery.com/jquery-1.12.4.js"></script>
    <script src="https://code.jquery.com/ui/1.12.1/jquery-ui.js"></script>
</head>
<body>
    <h1>Medical Diagnosis App</h1>
    <input type="text" id="symptom" placeholder="Enter a symptom">
    <button id="add">Add Symptom</button>
    <button id="predict">Predict Disease</button>
    <ul id="symptoms"></ul>
    <div id="result"></div>

    <script>
        var symptoms = [ 'itching', 'skin_rash', 'nodal_skin_eruptions', 'continuous_sneezing', 'shivering', 'chills', 'joint_pain', 'stomach_pain', 'acidity', 'ulcers_on_tongue', 'muscle_wasting', 'vomiting', 'burning_micturition', 'fatigue', 'weight_gain', 'anxiety', 'cold_hands_and_feets', 'mood_swings', 'weight_loss', 'restlessness', 'lethargy', 'patches_in_throat', 'irregular_sugar_level', 'cough', 'high_fever', 'sunken_eyes', 'breathlessness', 'sweating', 'dehydration', 'indigestion', 'headache', 'yellowish_skin', 'dark_urine', 'nausea', 'loss_of_appetite', 'pain_behind_the_eyes', 'back_pain', 'constipation', 'abdominal_pain', 'diarrhoea', 'mild_fever', 'yellow_urine', 'yellowing_of_eyes', 'acute_liver_failure', 'fluid_overload', 'swelling_of_stomach', 'swelled_lymph_nodes', 'malaise', 'blurred_and_distorted_vision', 'phlegm', 'throat_irritation', 'redness_of_eyes', 'sinus_pressure', 'runny_nose', 'congestion', 'chest_pain', 'weakness_in_limbs', 'fast_heart_rate', 'pain_during_bowel_movements', 'pain_in_anal_region', 'bloody_stool', 'irritation_in_anus', 'neck_pain', 'dizziness', 'cramps', 'bruising', 'obesity', 'swollen_legs', 'swollen_blood_vessels', 'puffy_face_and_eyes', 'enlarged_thyroid', 'brittle_nails', 'swollen_extremeties', 'excessive_hunger', 'extra_marital_contacts', 'drying_and_tingling_lips', 'slurred_speech', 'knee_pain', 'hip_joint_pain', 'muscle_weakness', 'stiff_neck', 'swelling_joints', 'movement_stiffness', 'spinning_movements', 'loss_of_balance', 'unsteadiness', 'weakness_of_one_body_side', 'loss_of_smell', 'bladder_discomfort', 'continuous_feel_of_urine', 'passage_of_gases', 'internal_itching', 'toxic_look_(typhos)', 'depression', 'irritability', 'muscle_pain', 'altered_sensorium', 'red_spots_over_body', 'belly_pain', 'abnormal_menstruation', 'watering_from_eyes', 'increased_appetite', 'polyuria', 'family_history', 'mucoid_sputum', 'rusty_sputum', 'lack_of_concentration', 'visual_disturbances', 'receiving_blood_transfusion', 'receiving_unsterile_injections', 'coma', 'stomach_bleeding', 'distention_of_abdomen', 'history_of_alcohol_consumption', 'blood_in_sputum', 'prominent_veins_on_calf', 'palpitations', 'painful_walking', 'pus_filled_pimples', 'blackheads', 'scurring', 'skin_peeling', 'silver_like_dusting', 'small_dents_in_nails', 'inflammatory_nails', 'blister', 'red_sore_around_nose', 'yellow_crust_ooze'];
        var selectedSymptoms = [];

        $("#symptom").autocomplete({
            source: symptoms
        });

        $("#add").click(function() {
            var symptom = $("#symptom").val();
            selectedSymptoms.push(symptom);
            $("#symptoms").append("<li>" + symptom + "</li>");
            $("#symptom").val("");
        });

        $("#predict").click(function() {
            $.ajax({
                url: 'http://127.0.0.1:5000/predict',
                contentType: 'application/json',
                data: JSON.stringify(selectedSymptoms),
                method: 'POST',
                success: function(response) {
                    $("#result").empty();
                    response.forEach(function(item) {
                        $("#result").append("<p><strong>Disease:</strong> " + item.disease + "<br><strong>Probability:</strong> " + item.probability + "<br><strong>Description:</strong> " + item.description + "<br><strong>Precautions:</strong> " + item.precautions.join(", ") + "</p>");
                    });
                }
            });
        });
    </script>
</body>
</html>
