In [23]:
# import pandas as pd
# import numpy as np
# import joblib
# from sklearn.model_selection import train_test_split
# from sklearn.ensemble import AdaBoostClassifier
# from sklearn.metrics import accuracy_score
# from sklearn.preprocessing import StandardScaler
# from imblearn.over_sampling import SMOTE

# # Load dataset
# df = pd.read_csv("./PD_dataset.csv")

# # Selecting features and target
# features = ["UPDRS", "Tremor", "FunctionalAssessment", "MoCA", "Rigidity"]
# X = df[features]
# y = df["Diagnosis"]

# # Handle class imbalance
# smote = SMOTE(random_state=42)
# X_resampled, y_resampled = smote.fit_resample(X, y)

# # Split dataset
# X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# # Normalize data
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

# # Initialize and train AdaBoost model
# model = AdaBoostClassifier(n_estimators=50, learning_rate=0.2, random_state=42)
# model.fit(X_train, y_train)

# # Make predictions
# y_pred = model.predict(X_test)
# accuracy = accuracy_score(y_test, y_pred)
# print(f"Model Accuracy: {accuracy:.2f}")

# # Save the model and scaler
# joblib.dump(model, "model.pkl")
# joblib.dump(scaler, "scaler.pkl")
# print("Model and scaler saved successfully.")


In [24]:
import pandas as pd
import numpy as np
import joblib  # For saving and loading models
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from imblearn.over_sampling import SMOTE  # Handle class imbalance

# Load dataset
df = pd.read_csv("PD_dataset.csv")

# Define input features and target
features = ["UPDRS", "Tremor", "FunctionalAssessment", "MoCA", "Rigidity"]
X = df[features]
y = df["Diagnosis"]  # Target (0 = Not Affected, 1 = Affected)

# 🔹 Check class distribution
print("Original Class Distribution:\n", y.value_counts())

# 🛠 Apply SMOTE to balance classes
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# 🔹 Check new class distribution
print("New Class Distribution After SMOTE:\n", pd.Series(y_resampled).value_counts())

# 🔹 Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# 🔹 Normalize data (Important for AdaBoost)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 🔹 Train AdaBoost Model
ada_model = AdaBoostClassifier(n_estimators=100, random_state=42)
ada_model.fit(X_train, y_train)

# 🔹 Train Random Forest Model (Alternative)
rf_model = RandomForestClassifier(n_estimators=100, class_weight="balanced", random_state=42)
rf_model.fit(X_train, y_train)

# Save models and scaler
joblib.dump(ada_model, "model_ada.pkl")
joblib.dump(rf_model, "model_rf.pkl")
joblib.dump(scaler, "scaler.pkl")
print("Models and scaler saved successfully.")

# 🔹 Evaluate AdaBoost
ada_preds = ada_model.predict(X_test)
ada_probs = ada_model.predict_proba(X_test)

print("\n🔹 AdaBoost Evaluation:")
print("Accuracy:", accuracy_score(y_test, ada_preds))
print("Confusion Matrix:\n", confusion_matrix(y_test, ada_preds))
print("Classification Report:\n", classification_report(y_test, ada_preds))

# 🔹 Adjust Prediction Threshold (AdaBoost)
threshold = 0.4  # Try lowering from 0.5
adjusted_preds = (ada_probs[:, 1] >= threshold).astype(int)

print("\n🔹 Adjusted Predictions (Threshold = 0.4):", adjusted_preds)
print("Confusion Matrix (Adjusted):\n", confusion_matrix(y_test, adjusted_preds))

# 🔹 Evaluate Random Forest
rf_preds = rf_model.predict(X_test)
print("\n🔹 Random Forest Evaluation:")
print("Accuracy:", accuracy_score(y_test, rf_preds))
print("Confusion Matrix:\n", confusion_matrix(y_test, rf_preds))
print("Classification Report:\n", classification_report(y_test, rf_preds))

# 🔹 Function for User Input Prediction
def predict_diagnosis(model_name, user_input):
    """Predicts diagnosis based on user input and selected model."""
    # Load models
    if model_name == "ada":
        model = joblib.load("model_ada.pkl")
    elif model_name == "rf":
        model = joblib.load("model_rf.pkl")
    else:
        return "Invalid model selection."

    scaler = joblib.load("scaler.pkl")  # Load scaler
    user_input_scaled = scaler.transform([user_input])  # Scale input
    prediction = model.predict(user_input_scaled)
    prediction_prob = model.predict_proba(user_input_scaled)

    print("\n🔹 Prediction Probabilities:", prediction_prob)
    
    # Apply custom threshold if using AdaBoost
    if model_name == "ada":
        threshold = 0.4  # Adjust threshold
        prediction = [1 if prediction_prob[:, 1] >= threshold else 0]
    
    return f"Predicted Diagnosis: {'Affected (1)' if prediction[0] == 1 else 'Not Affected (0)'}"

# Example Prediction
user_input = [37.30, 0, 4.78, 12.33, 1]  # Example user input
print(predict_diagnosis("ada", user_input))  # Change to "rf" for RandomForest


Original Class Distribution:
 Diagnosis
1    1304
0     801
Name: count, dtype: int64
New Class Distribution After SMOTE:
 Diagnosis
0    1304
1    1304
Name: count, dtype: int64
Models and scaler saved successfully.

🔹 AdaBoost Evaluation:
Accuracy: 0.8908045977011494
Confusion Matrix:
 [[236  16]
 [ 41 229]]
Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.94      0.89       252
           1       0.93      0.85      0.89       270

    accuracy                           0.89       522
   macro avg       0.89      0.89      0.89       522
weighted avg       0.89      0.89      0.89       522


🔹 Adjusted Predictions (Threshold = 0.4): [1 1 1 0 0 1 0 1 1 0 1 1 0 1 1 1 1 1 0 1 1 1 0 1 1 1 0 1 0 1 1 1 0 1 0 1 0
 1 1 0 0 0 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 0 1 1 1
 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 0 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1
 1 1 1 0 1 0 0 1 1 1 0 0 1 0 1 1 1 1 1 0 1 0 1 1 1 1 0 1 1 1 1 0 0 1 

