In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier
from sklearn.neural_network import MLPClassifier

# Load your dataset
df = pd.read_csv(" ",engine="python",encoding='cp1252')  # Replace with your filename

# Define features and target
features = ["Power (W)", "Speed (mm/s)", "Hatch (mm)", "Layer (mm)", "Laser Focus (mm)",
            "tomography_avg", "tomography_median", "tomography_max", "tomography_min", "tomography_std", "tomography_roughness",
            "polarimetry_begin_aop_avg", "polarimetry_begin_aop_median", "polarimetry_begin_aop_max", "polarimetry_begin_aop_min", "polarimetry_begin_aop_std", "polarimetry_begin_aop_roughness",
            "polarimetry_end_aop_avg", "polarimetry_end_aop_median", "polarimetry_end_aop_max", "polarimetry_end_aop_min", "polarimetry_end_aop_std", "polarimetry_end_aop_roughness",
            "polarimetry_begin_dolp_avg", "polarimetry_begin_dolp_median", "polarimetry_begin_dolp_max", "polarimetry_begin_dolp_min", "polarimetry_begin_dolp_std", "polarimetry_begin_dolp_roughness",
            "polarimetry_end_dolp_avg", "polarimetry_end_dolp_median", "polarimetry_end_dolp_max", "polarimetry_end_dolp_min", "polarimetry_end_dolp_std", "polarimetry_end_dolp_roughness"]

# Create quantile-based target
df['PF'] = pd.qcut(df['PF at 77oC, mW/m K2'], q=[0, .5, 1], labels=False, precision=0)
X = df[features].values
y = df['PF'].values

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize features
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Define classifiers
models = {
    "Naive Bayes": GaussianNB(),
    "Logistic Regression": LogisticRegression(max_iter=1000, random_state=42),
    "Linear SVM": SVC(kernel='linear', probability=True, random_state=42),
    "Polynomial SVM": SVC(kernel='poly', degree=3, probability=True, random_state=42),
    "RBF SVM": SVC(kernel='rbf', probability=True, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(n_estimators=50, random_state=42),
    "AdaBoost": AdaBoostClassifier(n_estimators=50, random_state=42),
    "Bagging": BaggingClassifier(n_estimators=50, random_state=42),
    "MLP": MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=42)
}

# Evaluate all models
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    if hasattr(model, "predict_proba"):
        y_proba = model.predict_proba(X_test)[:, 1]
    else:
        y_proba = model.decision_function(X_test)
        y_proba = (y_proba - y_proba.min()) / (y_proba.max() - y_proba.min())  # normalize decision scores

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_proba)

    print(f"\n{name}")
    print(f"Accuracy : {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall   : {rec:.4f}")
    print(f"F1 Score : {f1:.4f}")
    print(f"AUC      : {auc:.4f}")


In [None]:
# Identify top 15 features for best classifier which is Bagging classifier in this case

from sklearn.inspection import permutation_importance

# Fit Bagging classifier
bagging = BaggingClassifier(n_estimators=50, random_state=42)
bagging.fit(X_train, y_train)

# Permutation importance
result = permutation_importance(bagging, X_test, y_test, n_repeats=30, random_state=42, scoring='accuracy')

# Combine results
importances_df = pd.DataFrame({
    'Feature': features,
    'Mean': result.importances_mean,
    'Std': result.importances_std
})

# Format the output as "mean ± std"
importances_df['Mean ± Std'] = importances_df.apply(
    lambda row: f"{row['Mean']:.3f} ± {row['Std']:.3f}", axis=1
)

# Select top 15
top15 = importances_df.sort_values(by='Mean', ascending=False).head(15)

# Display
print("\nTop 15 Features by Permutation Importance (Bagging Classifier):")
print(top15[['Feature', 'Mean ± Std']].to_string(index=False))
