<h1>Assignment 1 - Healthcare Recommendation System</h1>

<h3>Step1: Importing Modules</h3>

In [31]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from sklearn.metrics import accuracy_score, classification_report

<h3>Step2: Load the Dataset</h3>

In [32]:
df = pd.read_csv('Downloads/health_symptoms_diagnosis_treatment.csv')
df.head()

Unnamed: 0,symptom1,symptom2,symptom3,diagnosis,treatment
0,nausea,fever,headache,stomach flu,anti-nausea medication
1,shortness of breath,fever,fatigue,COVID-19,anti-nausea medication
2,fever,sore throat,shortness of breath,food poisoning,antibiotics
3,diarrhea,diarrhea,sore throat,common cold,antiviral medication
4,fatigue,headache,runny nose,stomach flu,antibiotics


<h3>Step3: Data Preprocessing</h3>

In [33]:
# Filling missing values
df.fillna('', inplace=True)

# Combine symptoms into a single column (if not combined)
df['combined_symptoms'] = df[['symptom1', 'symptom2', 'symptom3']].apply(lambda x: ', '.join(x), axis=1)

# Encoding symptoms using LabelEncoder
symptom_encoder = LabelEncoder()
df['encoded_symptoms'] = symptom_encoder.fit_transform(df['combined_symptoms'])

# Encoding diagnosis using LabelEncoder
diagnosis_encoder = LabelEncoder()
df['encoded_diagnosis'] = diagnosis_encoder.fit_transform(df['diagnosis'])

# Defining input (X) and output (y)
X = df['encoded_symptoms'].values.reshape(-1, 1)  # Input: encoded symptoms
y = df['encoded_diagnosis'].values  # Output: encoded diagnosis (target)

<h3>Step4: Splitting the dataset into training and testing</h3>

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizing the data (scaling)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

<h3>Step5: Building the ANN Model</h3>

In [24]:
model = Sequential()
model.add(Input(shape=(1,)))  # Correcting the input shape
model.add(Dense(64, activation='relu'))  # Input layer
model.add(Dense(32, activation='relu'))  # Hidden layer 1
model.add(Dense(16, activation='relu'))  # Hidden layer 2
model.add(Dense(len(np.unique(y_train)), activation='softmax'))  # Output layer (number of diagnosis classes)

# Compiling the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

<h3>Step6: Training the model</h3>

In [25]:
model.fit(X_train_scaled, y_train, epochs=50, batch_size=16, validation_data=(X_test_scaled, y_test))

Epoch 1/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 132ms/step - accuracy: 0.0941 - loss: 2.2013 - val_accuracy: 0.1000 - val_loss: 2.1997
Epoch 2/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.1326 - loss: 2.1894 - val_accuracy: 0.1000 - val_loss: 2.1951
Epoch 3/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.1868 - loss: 2.1807 - val_accuracy: 0.1000 - val_loss: 2.1901
Epoch 4/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - accuracy: 0.1703 - loss: 2.1716 - val_accuracy: 0.1000 - val_loss: 2.1869
Epoch 5/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.1653 - loss: 2.1618 - val_accuracy: 0.1000 - val_loss: 2.1847
Epoch 6/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.1299 - loss: 2.1520 - val_accuracy: 0.1000 - val_loss: 2.1844
Epoch 7/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7117465aa1a0>

<h3>Step7: Evaluating the model</h3>

In [26]:
y_pred = np.argmax(model.predict(X_test_scaled), axis=1)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112ms/step


<h3>Step8: Model Performance</h3>

In [28]:
# Model performance
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy:  0.15
Classification Report:
               precision    recall  f1-score   support

           0       0.12      1.00      0.21         2
           1       0.00      0.00      0.00         1
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         3
           4       0.00      0.00      0.00         2
           5       0.00      0.00      0.00         2
           6       0.50      0.33      0.40         3
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         4

    accuracy                           0.15        20
   macro avg       0.07      0.15      0.07        20
weighted avg       0.09      0.15      0.08        20



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


<h3>Step9: Making predictions with new input</h3>

In [29]:

def predict_diagnosis(symptoms):
    # Encode the new symptoms using the same encoder as the training data
    if symptoms not in symptom_encoder.classes_:
        return "New symptoms not recognized in training data."
    encoded_symptoms = symptom_encoder.transform([symptoms])
    scaled_symptoms = scaler.transform(np.array(encoded_symptoms).reshape(-1, 1))
    
    # Predict the diagnosis
    predicted_diagnosis = model.predict(scaled_symptoms)
    diagnosis_label = diagnosis_encoder.inverse_transform([np.argmax(predicted_diagnosis)])
    return diagnosis_label[0]

<h3>Step10: Suggest treatment</h3>

In [30]:
def suggest_treatment(diagnosis):
    treatment = df[df['diagnosis'] == diagnosis]['treatment'].values[0]
    return treatment

# Example: Making a prediction with new symptoms
new_symptoms = "fever, cough, headache"  # Example symptoms
predicted_diagnosis = predict_diagnosis(new_symptoms)
print(f"Predicted Diagnosis: {predicted_diagnosis}")

if predicted_diagnosis != "New symptoms not recognized in training data.":
    recommended_treatment = suggest_treatment(predicted_diagnosis)
    print(f"Recommended Treatment: {recommended_treatment}")
else:
    print(f"Recommended Treatment: No treatment available for unrecognized symptoms.")


Predicted Diagnosis: New symptoms not recognized in training data.
Recommended Treatment: No treatment available for unrecognized symptoms.
