In [11]:
import numpy as np
import pandas as pd
from scipy.stats import mode
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
 
data = pd.read_csv('Training.csv').dropna(axis = 1)
disease_counts = data['prognosis'].value_counts()
temp_df = pd.DataFrame({
    'Disease': disease_counts.index,
    'Counts': disease_counts.values
})

encoder = LabelEncoder()
data["prognosis"] = encoder.fit_transform(data["prognosis"])

X = data.drop(columns = ['prognosis'])
y = data['prognosis']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state = 24)

svm_model = SVC()
nb_model = GaussianNB()
rf_model = RandomForestClassifier(random_state=18)

svm_model.fit(X, y)
nb_model.fit(X, y)
rf_model.fit(X, y)

test_data = pd.read_csv('Testing.csv').dropna(axis=1)

test_X= test_data.drop(columns = ['prognosis'])
test_y= test_data['prognosis']

svm_preds = svm_model.predict(test_X)
nb_preds = nb_model.predict(test_X)
rf_preds = rf_model.predict(test_X)

final_preds = [mode([i,j,k], keepdims=True)[0][0] for i,j,
               k in zip(svm_preds, nb_preds, rf_preds)]
 

In [2]:
symptoms = X.columns.values

symptom_index = {}
for index, value in enumerate(symptoms):
    symptom = " ".join([i.capitalize() for i in value.split("_")])
    symptom_index[symptom] = index
 
data_dict = {
    "symptom_index":symptom_index,
    "predictions_classes":encoder.classes_
}

def predictDisease(symptoms):
    symptoms = symptoms.split(",")
    input_data = [0] * len(data_dict["symptom_index"])
    for symptom in symptoms:
        index = data_dict["symptom_index"][symptom]
        input_data[index] = 1
    input_data = np.array(input_data).reshape(1,-1)
    rf_prediction = data_dict["predictions_classes"][rf_model.predict(input_data)[0]]
    nb_prediction = data_dict["predictions_classes"][nb_model.predict(input_data)[0]]
    svm_prediction = data_dict["predictions_classes"][svm_model.predict(input_data)[0]]
    
    final_prediction = mode([rf_prediction, nb_prediction, svm_prediction])[0][0]
    predictions = {"Prediction": final_prediction}
    
    
    return predictions
