In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report

In [6]:
# Load dataset
df = pd.read_csv("Employee Attrition.csv")  # Change file path if needed

In [8]:
# Drop irrelevant columns
df.drop(columns=["EmployeeNumber", "Over18", "StandardHours", "EmployeeCount"], inplace=True)

In [10]:
# Encode categorical variables
label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [12]:
# Define target variable and selected independent variables
selected_features = [
    "OverTime", "MonthlyIncome", "YearsAtCompany", "YearsInCurrentRole",
    "YearsWithCurrManager", "TotalWorkingYears", "JobSatisfaction",
    "EnvironmentSatisfaction", "DistanceFromHome", "WorkLifeBalance"
]

In [14]:
X = df[selected_features]  # Independent variables
y = df["Attrition"]  # Dependent variable

In [16]:
# Standardize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [18]:
# Split dataset into training (80%) and testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [20]:
# Train and evaluate models
models = {
    "Logistic Regression": LogisticRegression(),
    "k-Nearest Neighbors": KNeighborsClassifier(n_neighbors=5),
    "Support Vector Classifier": SVC(kernel='linear', probability=True)
}

In [22]:
for model_name, model in models.items():
    model.fit(X_train, y_train)  # Train model
    y_pred = model.predict(X_test)  # Predict on test data
    print(f"\n🔹 Classification Report for {model_name} 🔹")
    print(classification_report(y_test, y_pred))


🔹 Classification Report for Logistic Regression 🔹
              precision    recall  f1-score   support

           0       0.87      0.98      0.92       255
           1       0.38      0.08      0.13        39

    accuracy                           0.86       294
   macro avg       0.62      0.53      0.53       294
weighted avg       0.81      0.86      0.82       294


🔹 Classification Report for k-Nearest Neighbors 🔹
              precision    recall  f1-score   support

           0       0.87      0.95      0.91       255
           1       0.25      0.10      0.15        39

    accuracy                           0.84       294
   macro avg       0.56      0.53      0.53       294
weighted avg       0.79      0.84      0.81       294


🔹 Classification Report for Support Vector Classifier 🔹
              precision    recall  f1-score   support

           0       0.87      1.00      0.93       255
           1       0.00      0.00      0.00        39

    accuracy           

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
