In [None]:
# ✅ Step 1: Setup & Imports
import sys
sys.path.append('../src')  # Import from src folder

from data_preprocessing import load_and_preprocess_data
from model_training import train_and_evaluate
from model_explainability import load_model, explain_model

import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# ✅ Step 2: Load and Preprocess Data
file_path = '../data/processed/features_combined.csv'

X_train, X_test, y_train, y_test, scaler, feature_names = load_and_preprocess_data(file_path)

print("X_train shape:", X_train.shape)
print("y_train value counts:\n", y_train.value_counts())

In [None]:
# ✅ Step 3: Train Model and Save
model_save_path = '../models/xgb_pd_model.pkl'

train_and_evaluate(file_path, model_save_path)

In [None]:
# ✅ Step 4: Load Trained Model
model = load_model(model_save_path)

In [None]:
# ✅ Step 5: SHAP Global Explanation
explain_model(model, X_train, feature_names, save_summary_path='../outputs/shap_summary.png')

In [None]:
# ✅ Step 6: Manual Single Prediction
manual_input = {
    'nqScore': 3.0,
    'Typing speed': 2.5,
    'afTap': 1.5,
    'sTap': 1.0
}

input_df = pd.DataFrame([manual_input])

# Predict probability
proba = model.predict_proba(input_df)[0][1]
print(f"\nPredicted PD Probability: {proba*100:.2f}%")

# Predict class
pred_class = model.predict(input_df)[0]
print("Prediction:", "Likely PD" if pred_class == 1 else "No PD")

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

# Store models in a dictionary
models = {
    'Logistic Regression': LogisticRegression(class_weight='balanced', random_state=42, max_iter=1000),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42),
    'SVM (Linear)': SVC(kernel='linear', probability=True, random_state=42),
    'SVM (RBF)': SVC(kernel='rbf', probability=True, random_state=42),
    'KNN': KNeighborsClassifier(n_neighbors=5)
}

# Loop through models and evaluate
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"\n🔍 {name} Accuracy: {acc:.2%}")
    print(classification_report(y_test, y_pred, target_names=['Healthy', 'PD']))


In [None]:
voting = VotingClassifier(
    estimators=[
        ('lr', models['Logistic Regression']),
        ('rf', models['Random Forest']),
        ('svm', models['SVM (RBF)'])
    ],
    voting='soft'
)
voting.fit(X_train, y_train)
y_pred = voting.predict(X_test)
print("Voting Classifier Accuracy:", accuracy_score(y_test, y_pred))
