In [1]:


from joblib import dump, load
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Ensemble Approach
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

import numpy as np


diseases_training  = pd.read_csv('./dataset/training_data.csv')
diseases_testing = pd.read_csv('./dataset/test_data.csv')




In [2]:
cols =  diseases_training.columns
cols = cols[:-2]
train_features = diseases_training[cols]
train_labels = diseases_training['prognosis']

        # Check for data sanity
assert (len(train_features.iloc[0]) == 132)
assert (len(train_labels) == train_features.shape[0])



In [3]:
cols_test = diseases_testing.columns
cols_test = cols_test[:-1]
test_features = diseases_testing[cols_test]
test_labels = diseases_testing['prognosis']

# Check for data sanity
assert (len(test_features.iloc[0]) == 132)
assert (len(test_labels) == test_features.shape[0])

In [4]:
def _train_val_split():
    X_train, X_val, y_train, y_val = train_test_split(train_features, train_labels,
                                                          test_size=0.4,
                                                          random_state=101)

    return X_train, y_train, X_val, y_val



In [11]:
X_train, y_train, X_val, y_val = _train_val_split()
classifier = RandomForestClassifier(10)
# Training the Model
classifier = classifier.fit(X_train, y_train)
# Trained Model Evaluation on Validation Dataset
confidence = classifier.score(X_val, y_val)
# Validation Data Prediction
y_pred = classifier.predict(X_val)
# Model Validation Accuracy
accuracy = accuracy_score(y_val, y_pred)

# Model Confusion Matrix
conf_mat = confusion_matrix(y_val, y_pred)

# Model Classification Report
clf_report = classification_report(y_val, y_pred)

# Model Cross Validation Score
score = cross_val_score(classifier, X_val, y_val, cv=3)
print (score)

dump(classifier, str("./exports/RandomForest.joblib"))


[1. 1. 1.]


['./exports/RandomForest.joblib']

In [6]:
model = load(str("./exports/RandomForest.joblib"))

result = model.predict(test_features)
accuracy = accuracy_score(test_labels, result)
report = classification_report(test_labels, result)

print("Accuracy: ", accuracy)
print("Report: ", report)


Accuracy:  1.0
Report:                                           precision    recall  f1-score   support

(vertigo) Paroymsal  Positional Vertigo       1.00      1.00      1.00         1
                                   AIDS       1.00      1.00      1.00         1
                                   Acne       1.00      1.00      1.00         1
                    Alcoholic hepatitis       1.00      1.00      1.00         1
                                Allergy       1.00      1.00      1.00         1
                              Arthritis       1.00      1.00      1.00         1
                       Bronchial Asthma       1.00      1.00      1.00         1
                   Cervical spondylosis       1.00      1.00      1.00         1
                            Chicken pox       1.00      1.00      1.00         1
                    Chronic cholestasis       1.00      1.00      1.00         1
                            Common Cold       1.00      1.00      1.00         1
   

In [7]:
symptoms = {'itching': 0, 'skin_rash': 0, 'nodal_skin_eruptions': 0, 'continuous_sneezing': 0,
                'shivering': 0, 'chills': 0, 'joint_pain': 0, 'stomach_pain': 0, 'acidity': 0, 'ulcers_on_tongue': 0,
                'muscle_wasting': 0, 'vomiting': 0, 'burning_micturition': 0, 'spotting_ urination': 0, 'fatigue': 0,
                'weight_gain': 0, 'anxiety': 0, 'cold_hands_and_feets': 0, 'mood_swings': 0, 'weight_loss': 0,
                'restlessness': 0, 'lethargy': 0, 'patches_in_throat': 0, 'irregular_sugar_level': 0, 'cough': 0,
                'high_fever': 0, 'sunken_eyes': 0, 'breathlessness': 0, 'sweating': 0, 'dehydration': 0,
                'indigestion': 0, 'headache': 0, 'yellowish_skin': 0, 'dark_urine': 0, 'nausea': 0, 'loss_of_appetite': 0,
                'pain_behind_the_eyes': 0, 'back_pain': 0, 'constipation': 0, 'abdominal_pain': 0, 'diarrhoea': 0, 'mild_fever': 0,
                'yellow_urine': 0, 'yellowing_of_eyes': 0, 'acute_liver_failure': 0, 'fluid_overload': 0, 'swelling_of_stomach': 0,
                'swelled_lymph_nodes': 0, 'malaise': 1, 'blurred_and_distorted_vision': 0, 'phlegm': 0, 'throat_irritation': 0,
                'redness_of_eyes': 0, 'sinus_pressure': 0, 'runny_nose': 0, 'congestion': 0, 'chest_pain': 0, 'weakness_in_limbs': 0,
                'fast_heart_rate': 0, 'pain_during_bowel_movements': 0, 'pain_in_anal_region': 0, 'bloody_stool': 0,
                'irritation_in_anus': 0, 'neck_pain': 0, 'dizziness': 1, 'cramps': 0, 'bruising': 0, 'obesity': 0, 'swollen_legs': 0,
                'swollen_blood_vessels': 0, 'puffy_face_and_eyes': 0, 'enlarged_thyroid': 0, 'brittle_nails': 0, 'swollen_extremeties': 0,
                'excessive_hunger': 0, 'extra_marital_contacts': 0, 'drying_and_tingling_lips': 0, 'slurred_speech': 0,
                'knee_pain': 0, 'hip_joint_pain': 0, 'muscle_weakness': 0, 'stiff_neck': 1, 'swelling_joints': 0, 'movement_stiffness': 0,
                'spinning_movements': 0, 'loss_of_balance': 0, 'unsteadiness': 0, 'weakness_of_one_body_side': 0, 'loss_of_smell': 0,
                'bladder_discomfort': 0, 'foul_smell_of urine': 0, 'continuous_feel_of_urine': 1, 'passage_of_gases': 0, 'internal_itching': 0,
                'toxic_look_(typhos)': 0, 'depression': 0, 'irritability': 0, 'muscle_pain': 0, 'altered_sensorium': 1,
                'red_spots_over_body': 0, 'belly_pain': 0, 'abnormal_menstruation': 0, 'dischromic _patches': 0, 'watering_from_eyes': 0,
                'increased_appetite': 0, 'polyuria': 0, 'family_history': 0, 'mucoid_sputum': 0, 'rusty_sputum': 0, 'lack_of_concentration': 0,
                'visual_disturbances': 0, 'receiving_blood_transfusion': 0, 'receiving_unsterile_injections': 0, 'coma': 1,
                'stomach_bleeding': 0, 'distention_of_abdomen': 0, 'history_of_alcohol_consumption': 0, 'fluid_overload.1': 0,
                'blood_in_sputum': 0, 'prominent_veins_on_calf': 0, 'palpitations': 0, 'painful_walking': 0, 'pus_filled_pimples': 0,
                'blackheads': 0, 'scurring': 0, 'skin_peeling': 0, 'silver_like_dusting': 0, 'small_dents_in_nails': 0, 'inflammatory_nails': 0,
                'blister': 0, 'red_sore_around_nose': 1, 'yellow_crust_ooze': 0}

    # Prepare Test Data
test = pd.DataFrame(columns=list(symptoms.keys()))
test.loc[0] = np.array(list(symptoms.values()))

# Load pre-trained model
model_from_disk = load(str("./exports/RandomForest.joblib"))
result = model_from_disk.predict(test)
print(f"Predicted Disease: {result}")

Predicted Disease: ['Hepatitis E']
