In [1]:
# =========================================
# STEP 1: Import Required Libraries
# =========================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

In [2]:
# =========================================
# STEP 2: Load HR Dataset
# =========================================

data = pd.read_csv("WA_Fn-UseC_-HR-Employee-Attrition.csv")

print("Dataset Shape:", data.shape)
data.head()

Dataset Shape: (1470, 35)


Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeNumber,...,RelationshipSatisfaction,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,Yes,Travel_Rarely,1102,Sales,1,2,Life Sciences,1,1,...,1,80,0,8,0,1,6,4,0,5
1,49,No,Travel_Frequently,279,Research & Development,8,1,Life Sciences,1,2,...,4,80,1,10,3,3,10,7,1,7
2,37,Yes,Travel_Rarely,1373,Research & Development,2,2,Other,1,4,...,2,80,0,7,3,3,0,0,0,0
3,33,No,Travel_Frequently,1392,Research & Development,3,4,Life Sciences,1,5,...,3,80,0,8,3,3,8,7,3,0
4,27,No,Travel_Rarely,591,Research & Development,2,1,Medical,1,7,...,4,80,1,6,3,3,2,2,2,2


In [3]:
data.isnull().sum()

Unnamed: 0,0
Age,0
Attrition,0
BusinessTravel,0
DailyRate,0
Department,0
DistanceFromHome,0
Education,0
EducationField,0
EmployeeCount,0
EmployeeNumber,0


In [4]:
# =========================================
# STEP 3: Encode Categorical Variables
# =========================================

label_encoder = LabelEncoder()

for col in data.columns:
    if data[col].dtype == 'object':
        data[col] = label_encoder.fit_transform(data[col])

In [5]:
# =========================================
# STEP 4: Split Features & Target
# =========================================

X = data.drop("MonthlyIncome", axis=1)
y = data["MonthlyIncome"]

In [6]:
# =========================================
# STEP 5: Train-Test Split
# =========================================

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [9]:
# =========================================
# STEP 6: Multiple Linear Regression
# =========================================

mlr = LinearRegression()
mlr.fit(X_train, y_train)

y_pred_mlr = mlr.predict(X_test)

print("Multiple Linear Regression")
print("MSE:", mean_squared_error(y_test, y_pred_mlr))
print("R2 Score:", r2_score(y_test, y_pred_mlr))

Multiple Linear Regression
MSE: 2267362.809261191
R2 Score: 0.8962562153972032


In [10]:
# =========================================
# STEP 7: Lasso Regression
# =========================================

lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_train)

y_pred_lasso = lasso.predict(X_test)

print("\nLasso Regression")
print("MSE:", mean_squared_error(y_test, y_pred_lasso))
print("R2 Score:", r2_score(y_test, y_pred_lasso))


Lasso Regression
MSE: 2267196.0655066795
R2 Score: 0.8962638448017614


In [11]:
# =========================================
# STEP 8: Ridge Regression
# =========================================

ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)

y_pred_ridge = ridge.predict(X_test)

print("\nRidge Regression")
print("MSE:", mean_squared_error(y_test, y_pred_ridge))
print("R2 Score:", r2_score(y_test, y_pred_ridge))


Ridge Regression
MSE: 2265542.1561786793
R2 Score: 0.8963395198601931


In [12]:
# =========================================
# STEP 9: Compare Models
# =========================================

results = pd.DataFrame({
    "Model": ["Multiple Linear", "Lasso", "Ridge"],
    "MSE": [
        mean_squared_error(y_test, y_pred_mlr),
        mean_squared_error(y_test, y_pred_lasso),
        mean_squared_error(y_test, y_pred_ridge)
    ],
    "R2 Score": [
        r2_score(y_test, y_pred_mlr),
        r2_score(y_test, y_pred_lasso),
        r2_score(y_test, y_pred_ridge)
    ]
})

results

Unnamed: 0,Model,MSE,R2 Score
0,Multiple Linear,2267363.0,0.896256
1,Lasso,2267196.0,0.896264
2,Ridge,2265542.0,0.89634
