In [22]:
import pandas as pd
import numpy as np
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [23]:
nltk.download('vader_lexicon')
sen=SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\gupta\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [24]:
data=pd.read_csv("employee_feedback.csv")

In [25]:
data

Unnamed: 0,Employee_ID,Feedback,Department,Job_Role,Salary,Work_Hours_Per_Week,Attrition
0,1,I feel valued and enjoy my work.,HR,Manager,70000,40,No
1,2,Workload is overwhelming.,IT,Engineer,60000,50,Yes
2,3,Great team and work culture!,Marketing,Executive,50000,42,No
3,4,No growth opportunities here.,IT,Developer,65000,45,Yes
4,5,I get recognition for my work.,Finance,Analyst,72000,38,No
5,6,Toxic work environment.,IT,Engineer,58000,52,Yes
6,7,"Flexible work hours, I love it.",HR,HR Coordinator,55000,36,No
7,8,Salary is too low for my efforts.,Finance,Consultant,63000,48,Yes


In [26]:
def get_sentiment_score(text):
    if pd.isna(text):
        return 0
    return sen.polarity_scores(text)["compound"]

In [27]:
data["sentiment_score"]=data["Feedback"].apply(get_sentiment_score)

In [28]:
data

Unnamed: 0,Employee_ID,Feedback,Department,Job_Role,Salary,Work_Hours_Per_Week,Attrition,sentiment_score
0,1,I feel valued and enjoy my work.,HR,Manager,70000,40,No,0.7269
1,2,Workload is overwhelming.,IT,Engineer,60000,50,Yes,0.0
2,3,Great team and work culture!,Marketing,Executive,50000,42,No,0.6588
3,4,No growth opportunities here.,IT,Developer,65000,45,Yes,0.4588
4,5,I get recognition for my work.,Finance,Analyst,72000,38,No,0.0
5,6,Toxic work environment.,IT,Engineer,58000,52,Yes,0.0
6,7,"Flexible work hours, I love it.",HR,HR Coordinator,55000,36,No,0.7269
7,8,Salary is too low for my efforts.,Finance,Consultant,63000,48,Yes,-0.2732


In [29]:
encoder=LabelEncoder()

In [30]:
data["Department"]=encoder.fit_transform(data["Department"])
data["Job_Role"]=encoder.fit_transform(data["Job_Role"])

In [31]:
data

Unnamed: 0,Employee_ID,Feedback,Department,Job_Role,Salary,Work_Hours_Per_Week,Attrition,sentiment_score
0,1,I feel valued and enjoy my work.,1,6,70000,40,No,0.7269
1,2,Workload is overwhelming.,2,3,60000,50,Yes,0.0
2,3,Great team and work culture!,3,4,50000,42,No,0.6588
3,4,No growth opportunities here.,2,2,65000,45,Yes,0.4588
4,5,I get recognition for my work.,0,0,72000,38,No,0.0
5,6,Toxic work environment.,2,3,58000,52,Yes,0.0
6,7,"Flexible work hours, I love it.",1,5,55000,36,No,0.7269
7,8,Salary is too low for my efforts.,0,1,63000,48,Yes,-0.2732


In [32]:
scaler = StandardScaler()
data[["Salary", "Work_Hours_Per_Week"]] = scaler.fit_transform(data[["Salary", "Work_Hours_Per_Week"]])

In [33]:
X = data[["Department", "Job_Role", "Salary", "Work_Hours_Per_Week", "sentiment_score"]]
y = data["Attrition"].map({"Yes": 1, "No": 0})  # Convert Attrition to binary (1 = Yes, 0 = No)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Model Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Model Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           1       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

