In [4]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Example Dataset
data = {
    "Symptom_1": [1, 0, 1, 1, 0, 1, 0, 1, 1, 0],  # 1 = Present, 0 = Absent
    "Symptom_2": [0, 1, 1, 0, 1, 0, 1, 1, 0, 1],
    "Symptom_3": [1, 1, 1, 0, 1, 0, 0, 1, 1, 1],
    "Symptom_4": [0, 1, 0, 1, 1, 0, 1, 0, 1, 1],
    "Disease": [0, 1, 0, 0, 1, 0, 1, 0, 1, 1]  # 0 = Disease A, 1 = Disease B
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Step 2: Preprocessing
# Features (X) and Target (y)
X = df.drop("Disease", axis=1)  # Drop the target column
y = df["Disease"]  # Target column

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Train Logistic Regression Model
model = LogisticRegression()
model.fit(X_train, y_train)

# Step 4: Make Predictions
y_pred = model.predict(X_test)
probabilities = model.predict_proba(X_test)  # Get probability predictions

# Step 5: Evaluate the Model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("\nClassification Report:\n", report)

# Optional: Display predictions with probabilities
predictions = pd.DataFrame({
    "Symptoms": X_test.values.tolist(),
    "Actual Disease": y_test.values,
    "Predicted Disease": y_pred,
    "Disease A Probability": probabilities[:, 0],  # Probability of Disease A
    "Disease B Probability": probabilities[:, 1]   # Probability of Disease B
})

print("\nPredictions:\n", predictions)


Accuracy: 0.5

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.50      0.67         2

    accuracy                           0.50         2
   macro avg       0.50      0.25      0.33         2
weighted avg       1.00      0.50      0.67         2


Predictions:
        Symptoms  Actual Disease  Predicted Disease  Disease A Probability  \
0  [1, 0, 1, 1]               1                  0               0.721328   
1  [0, 1, 1, 1]               1                  1               0.342414   

   Disease B Probability  
0               0.278672  
1               0.657586  


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
