# **1. Import Libraries**

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder


# **2. Load the Data**

In [2]:
# Load the training and testing data
train_data = pd.read_csv('/content/Training.csv', sep=",")
test_data = pd.read_csv('/content/Testing.csv', sep=",")


# **3. Explore the Data**

In [3]:
# Display the first few rows of the training data
print(train_data.head())


   itching  skin_rash  nodal_skin_eruptions  continuous_sneezing  shivering  \
0        1          1                     1                    0          0   
1        0          1                     1                    0          0   
2        1          0                     1                    0          0   
3        1          1                     0                    0          0   
4        1          1                     1                    0          0   

   chills  joint_pain  stomach_pain  acidity  ulcers_on_tongue  ...  scurring  \
0       0           0             0        0                 0  ...         0   
1       0           0             0        0                 0  ...         0   
2       0           0             0        0                 0  ...         0   
3       0           0             0        0                 0  ...         0   
4       0           0             0        0                 0  ...         0   

   skin_peeling  silver_like_dusting  

# **4. Data Cleaning**

In [4]:
# Drop columns with NaN values
train_data.dropna(axis=1, inplace=True)
test_data.dropna(axis=1, inplace=True)


# **5. Prepare Features and Target Variable**

In [5]:
# Separate features and target variable in the training set
X_train = train_data.drop('prognosis', axis=1)
y_train = train_data['prognosis']

# Encode the target variable if it's categorical
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)


# **6. Train the Model**

In [6]:
# Train the RandomForestClassifier model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train_encoded)


# **7. Prepare Testing Data**

In [7]:
# Prepare the testing data
X_test = test_data.drop('prognosis', axis=1)
y_test = test_data['prognosis']
y_test_encoded = label_encoder.transform(y_test)


# **8. Make Predictions and Evaluate the Model**

In [8]:
# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print(classification_report(y_test_encoded, y_pred))
print('Accuracy:', accuracy_score(y_test_encoded, y_pred))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1
           2       1.00      1.00      1.00         1
           3       1.00      1.00      1.00         1
           4       1.00      1.00      1.00         1
           5       1.00      1.00      1.00         1
           6       1.00      1.00      1.00         1
           7       1.00      1.00      1.00         1
           8       1.00      1.00      1.00         1
           9       1.00      1.00      1.00         1
          10       1.00      1.00      1.00         1
          11       1.00      1.00      1.00         1
          12       1.00      1.00      1.00         1
          13       1.00      1.00      1.00         1
          14       1.00      1.00      1.00         1
          15       1.00      0.50      0.67         2
          16       1.00      1.00      1.00         1
          17       1.00    

# **9. Save the Model**

In [9]:
import joblib

# Save the trained model to a file
model_filename = 'random_forest_model.joblib'
joblib.dump(model, model_filename)

# Save the label encoder as well
label_encoder_filename = 'label_encoder.joblib'
joblib.dump(label_encoder, label_encoder_filename)


['label_encoder.joblib']

# **10. Load and Test the Model with random symptoms**

In [15]:
import joblib
import pandas as pd
import numpy as np
import random

# Load the saved model and label encoder
loaded_model = joblib.load('random_forest_model.joblib')
loaded_label_encoder = joblib.load('label_encoder.joblib')

# List all possible symptoms (features)
all_symptoms = [
    "itching", "skin rash", "nodal skin eruptions", "continuous sneezing", "shivering",
    "chills", "joint pain", "stomach pain", "acidity", "ulcers on tongue",
    "muscle wasting", "vomiting", "burning micturition", "spotting urination", "fatigue",
    "weight gain", "anxiety", "cold hands and feets", "mood swings", "weight loss",
    "restlessness", "lethargy", "patches in throat", "irregular sugar level", "cough",
    "high fever", "sunken eyes", "breathlessness", "sweating", "dehydration",
    "indigestion", "headache", "yellowish skin", "dark urine", "nausea",
    "loss of appetite", "pain behind the eyes", "back pain", "constipation", "abdominal pain",
    "diarrhoea", "mild fever", "yellow urine", "yellowing of eyes", "acute liver failure",
    "fluid overload", "swelling of stomach", "swelled lymph nodes", "malaise", "blurred and distorted vision",
    "phlegm", "throat irritation", "redness of eyes", "sinus pressure", "runny nose",
    "congestion", "chest pain", "weakness in limbs", "fast heart rate", "pain during bowel movements",
    "pain in anal region", "bloody stool", "irritation in anus", "neck pain", "dizziness",
    "cramps", "bruising", "obesity", "swollen legs", "swollen blood vessels",
    "puffy face and eyes", "enlarged thyroid", "brittle nails", "swollen extremeties", "excessive hunger",
    "extra marital contacts", "drying and tingling lips", "slurred speech", "knee pain", "hip joint pain",
    "muscle weakness", "stiff neck", "swelling joints", "movement stiffness", "spinning movements",
    "loss of balance", "unsteadiness", "weakness of one body side", "loss of smell", "bladder discomfort",
    "foul smell of urine", "continuous feel of urine", "passage of gases", "internal itching", "toxic look (typhos)",
    "depression", "irritability", "muscle pain", "altered sensorium", "red spots over body",
    "belly pain", "abnormal menstruation", "dischromic patches", "watering from eyes", "increased appetite",
    "polyuria", "family history", "mucoid sputum", "rusty sputum", "lack of concentration",
    "visual disturbances", "receiving blood transfusion", "receiving unsterile injections", "coma", "stomach bleeding",
    "distention of abdomen", "history of alcohol consumption", "fluid overload", "blood in sputum", "prominent veins on calf",
    "palpitations", "painful walking", "pus filled pimples", "blackheads", "scurring",
    "skin peeling", "silver like dusting", "small dents in nails", "inflammatory nails", "blister",
    "red sore around nose", "yellow crust ooze"
]

# Randomly select n symptoms from the list
n = random.randint(5, len(all_symptoms))
random_symptoms = random.sample(all_symptoms, n)

# List all symptom columns (features)
symptom_columns = X_train.columns

# Create an input array with zeros
input_symptoms = pd.DataFrame([0] * len(symptom_columns)).T
input_symptoms.columns = symptom_columns

# Set the corresponding symptom columns to 1 based on random selection
for symptom in random_symptoms:
    symptom = symptom.strip()
    if symptom in input_symptoms.columns:
        input_symptoms[symptom] = 1

# Make a prediction
predicted_prognosis_encoded = loaded_model.predict(input_symptoms)
predicted_prognosis = loaded_label_encoder.inverse_transform(predicted_prognosis_encoded)

# Display the randomly chosen symptoms and the prediction
print(f"Randomly Chosen Symptoms: {', '.join(random_symptoms)}")
print(f"Predicted Disease: {predicted_prognosis[0]}")


Randomly Chosen Symptoms: muscle weakness, weight loss, obesity, increased appetite, prominent veins on calf, chills, inflammatory nails, drying and tingling lips, continuous feel of urine, yellowing of eyes, irregular sugar level, sunken eyes, loss of smell, high fever, swelling joints, vomiting, blackheads, stiff neck, pain behind the eyes, lack of concentration, dizziness
Predicted Disease: Acne


# **11. Load and Test the Model with User Input**

In [11]:
# Load the saved model and label encoder
loaded_model = joblib.load(model_filename)
loaded_label_encoder = joblib.load(label_encoder_filename)

# List all symptom columns (features)
symptom_columns = X_train.columns

# Create an input array with zeros
input_symptoms = pd.DataFrame([0] * len(symptom_columns)).T
input_symptoms.columns = symptom_columns

# Get symptoms from the user
user_symptoms = input("Enter the symptoms separated by commas: ").split(',')

# Set the corresponding symptom columns to 1 based on user input
for symptom in user_symptoms:
    symptom = symptom.strip()
    if symptom in input_symptoms.columns:
        input_symptoms[symptom] = 1

# Make a prediction
predicted_prognosis_encoded = loaded_model.predict(input_symptoms)
predicted_prognosis = loaded_label_encoder.inverse_transform(predicted_prognosis_encoded)

# Display the prediction
print(f"Predicted Disease: {predicted_prognosis[0]}")


Enter the symptoms separated by commas: joint pain,stomach pain,acidity,ulcers on tongue,muscle wasting,vomiting,burning micturition,irregular sugar level,cough,high fever,sunken eyes,breathlessness,sweating,dehydration,indigestion
Predicted Disease: Heart attack
