In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Load datasets
try:
    symptom_severity = pd.read_csv('Symptom-severity.csv')
    symptoms_df = pd.read_csv('symtoms_df (1).csv')
    doctor_df=pd.read_csv('doctor.csv')
except Exception as e:
    print("Error loading datasets:", e)
# Clean and preprocess data
symptom_severity['Symptom'] = symptom_severity['Symptom'].str.strip().str.lower()
symptoms_df.columns = symptoms_df.columns.str.strip().str.lower()
    

ModuleNotFoundError: No module named 'sklearn'

In [None]:
# Confirm if columns have been properly loaded and aligned
print("Symptom Severity Columns:", symptom_severity.columns)
print("Symptoms DF Columns:", symptoms_df.columns)
print("Specialist DF Columns:", doctor_df.columns)

Symptom Severity Columns: Index(['Symptom', 'weight'], dtype='object')
Symptoms DF Columns: Index(['unnamed: 0', 'disease', 'symptom_1', 'symptom_2', 'symptom_3',
       'symptom_4'],
      dtype='object')
Specialist DF Columns: Index(['Disease', 'Specialty'], dtype='object')


In [None]:
# Standardize symptom formatting in symptom_severity
symptom_severity['Symptom'] = symptom_severity['Symptom'].str.strip().str.lower()

# Standardize symptom formatting in symptoms_df and combine symptom columns
symptoms_df['symptom_1'] = symptoms_df['symptom_1'].str.strip().str.lower()
symptoms_df['symptom_2'] = symptoms_df['symptom_2'].str.strip().str.lower()
symptoms_df['symptom_3'] = symptoms_df['symptom_3'].str.strip().str.lower()
symptoms_df['symptom_4'] = symptoms_df['symptom_4'].str.strip().str.lower()

# Combine symptom columns into a single list for each row
symptoms_df['combined_symptoms'] = symptoms_df[['symptom_1', 'symptom_2', 'symptom_3','symptom_4']].values.tolist()

# Create the symptom list and encode using MultiLabelBinarizer
symptom_list = symptom_severity['Symptom'].unique()
mlb = MultiLabelBinarizer(classes=symptom_list)

# Check if all symptoms in symptoms_df are in symptom_list
all_symptoms_in_df = set(sum(symptoms_df['combined_symptoms'], []))  # Flatten the list of lists
missing_symptoms = all_symptoms_in_df - set(symptom_list)

if missing_symptoms:
    print("Missing symptoms in symptom_severity dataset:", missing_symptoms)
else:
    print("No missing symptoms, proceeding with encoding.")

# Encoding with MultiLabelBinarizer if there are no mismatches
if not missing_symptoms:
    symptoms_encoded = mlb.fit_transform(symptoms_df['combined_symptoms'])
    print("Symptoms successfully encoded.")
else:
    print("Please update the symptom list in symptom_severity.csv to include missing symptoms.")

Missing symptoms in symptom_severity dataset: {'dischromic _patches', 'spotting_ urination', 'foul_smell_of urine', nan}
Please update the symptom list in symptom_severity.csv to include missing symptoms.


In [None]:
# Filter out symptoms not in symptom_severity list
symptoms_df['filtered_symptoms'] = symptoms_df['combined_symptoms'].apply(
    lambda x: [symptom for symptom in x if symptom in symptom_list]
)

# Encode with MultiLabelBinarizer using only the filtered symptoms
symptoms_encoded = mlb.fit_transform(symptoms_df['filtered_symptoms'])
print("Symptoms successfully encoded, with missing symptoms ignored.")

Symptoms successfully encoded, with missing symptoms ignored.


In [None]:
# Encode disease labels
label_encoder = LabelEncoder()
try:
    disease_labels = label_encoder.fit_transform(symptoms_df['disease'])
except KeyError as e:
    print("Error encoding disease labels:", e)
    raise e

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(symptoms_encoded, disease_labels, test_size=0.2, random_state=42)

# Build the neural network model
model = Sequential([
    Dense(128, input_shape=(X_train.shape[1],), activation='relu'),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])

model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Train the model
try:
    history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=16)
except Exception as e:
    print("Error during model training:", e)
    raise e

Epoch 1/50
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.3004 - loss: 3.0663 - val_accuracy: 0.9197 - val_loss: 0.2572
Epoch 2/50
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9514 - loss: 0.2601 - val_accuracy: 0.9858 - val_loss: 0.0684
Epoch 3/50
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9761 - loss: 0.1132 - val_accuracy: 0.9949 - val_loss: 0.0409
Epoch 4/50
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.9860 - loss: 0.0777 - val_accuracy: 0.9949 - val_loss: 0.0347
Epoch 5/50
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9831 - loss: 0.0725 - val_accuracy: 0.9949 - val_loss: 0.0282
Epoch 6/50
[1m246/246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9864 - loss: 0.0512 - val_accuracy: 0.9949 - val_loss: 0.0250
Epoch 7/50
[1m246/246[0m 

In [None]:
def predict_disease(symptom_input):
    # Clean and check input symptoms
    recognized_symptoms = [symptom.strip().lower() for symptom in symptom_input if symptom.strip().lower() in symptom_list]
    if not recognized_symptoms:
        return "No recognized symptoms provided. Please check the symptoms and try again.", None
    
    try:
        symptoms_array = mlb.transform([recognized_symptoms])
        disease_prediction = model.predict(symptoms_array)
        disease_index = np.argmax(disease_prediction)
        disease_name = label_encoder.inverse_transform([disease_index])[0]
        
        # Find specialist
        specialist = doctor_df[doctor_df['Disease'] == disease_name]['Specialty'].values[0]
        return disease_name, specialist
    except KeyError as e:
        print("KeyError encountered during prediction:", e)
        print("Symptom input:", symptom_input)
        print("Recognized symptoms after filtering:", recognized_symptoms)
        raise e
        

In [None]:
user_symptoms = ['fatigue', 'weight_loss']  # replace with user input symptoms
disease, specialist = predict_disease(user_symptoms)

if specialist:
    print(f"Predicted Disease: {disease}")
    print(f"Recommended Specialist: {specialist}")
else:
    print(disease)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step


IndexError: index 0 is out of bounds for axis 0 with size 0

In [None]:
def predict_disease(symptom_input):
    # Clean and check input symptoms
    recognized_symptoms = [symptom.strip().lower() for symptom in symptom_input if symptom.strip().lower() in symptom_list]
    if not recognized_symptoms:
        return "No recognized symptoms provided. Please check the symptoms and try again.", None
    
    try:
        symptoms_array = mlb.transform([recognized_symptoms])
        disease_prediction = model.predict(symptoms_array)
        disease_index = np.argmax(disease_prediction)
        disease_name = label_encoder.inverse_transform([disease_index])[0]
        
        # Check if disease exists in doctor_df
        if disease_name in doctor_df['Disease'].values:
            # Find specialist
            specialist = doctor_df[doctor_df['Disease'] == disease_name]['Specialty'].values[0]
            return disease_name, specialist
        else:
            return disease_name, "No specialist found for this disease."
    
    except KeyError as e:
        print("KeyError encountered during prediction:", e)
        print("Symptom input:", symptom_input)
        print("Recognized symptoms after filtering:", recognized_symptoms)
        return "Error during prediction. Please check the input symptoms and try again.", None
    except IndexError as e:
        print("IndexError encountered:", e)
        print("Disease name:", disease_name)
        return "Error: Specialist information is unavailable for this disease.", None
    except Exception as e:
        print("Unexpected error:", e)
        return "An unexpected error occurred. Please try again.", None

# Sample user input
user_symptoms = ['lethargy','lethargy','weight_loss']  # Replace with user input symptoms
disease, specialist = predict_disease(user_symptoms)

if specialist:
    print(f"Predicted Disease: {disease}")
    print(f"Recommended Specialist: {specialist}")
else:
    print(disease)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 303ms/step
Predicted Disease: Diabetes 
Recommended Specialist: No specialist found for this disease.
