In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score
import pickle
print("✅ All libraries loaded successfully!")

✅ All libraries loaded successfully!


In [25]:
# Load dataset
data = pd.read_csv("Employee_Performance_Dataset.csv")


In [26]:
data.columns

Index(['Employee ID', 'Name', 'Department', 'Job Role', 'Performance Score',
       'KPI Score', 'Attendance (%)', 'Peer Rating', 'Task Completion (%)',
       'Work Hours Logged', 'Manager Feedback', 'Training Hours',
       'Promotion Eligibility'],
      dtype='object')

In [31]:
# Drop unnecessary columns
data.drop(['Manager Feedback','Work Hours Logged','Training Hours' ], axis=1,inplace=True)


In [32]:
# Define features and target

y = data['Promotion Eligibility']
X = data.drop('Promotion Eligibility', axis=1)


In [33]:
y

0        No
1       Yes
2        No
3       Yes
4        No
       ... 
4995     No
4996    Yes
4997     No
4998     No
4999     No
Name: Promotion Eligibility, Length: 5000, dtype: object

In [34]:
# Convert Yes/No → 1/0
y = y.map({'Yes': 1, 'No': 0})


In [35]:
y

0       0
1       1
2       0
3       1
4       0
       ..
4995    0
4996    1
4997    0
4998    0
4999    0
Name: Promotion Eligibility, Length: 5000, dtype: int64

In [36]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [37]:
X_train

Unnamed: 0,Department,Job Role,Performance Score,KPI Score,Attendance (%),Peer Rating,Task Completion (%)
4227,Finance,Accountant,94,69.05,92.21,4.6,83.13
4676,Marketing,Marketing Executive,79,68.11,82.26,4.0,96.26
800,Sales,Sales Executive,63,69.68,87.01,3.6,81.33
3671,HR,Employee Relations,91,89.31,92.50,4.5,84.69
4193,Finance,Financial Analyst,89,91.90,87.17,4.2,80.09
...,...,...,...,...,...,...,...
4426,HR,Recruitment Specialist,88,84.68,87.00,4.5,98.13
466,HR,Employee Relations,67,70.79,79.98,3.2,95.85
3092,Finance,Accountant,97,81.01,92.40,4.2,83.24
3772,HR,Recruitment Specialist,74,78.15,84.55,3.3,99.75


In [38]:
X_train.columns


Index(['Department', 'Job Role', 'Performance Score', 'KPI Score',
       'Attendance (%)', 'Peer Rating', 'Task Completion (%)'],
      dtype='object')

In [39]:
y_train

4227    0
4676    0
800     0
3671    1
4193    1
       ..
4426    1
466     0
3092    0
3772    0
860     0
Name: Promotion Eligibility, Length: 4000, dtype: int64

In [40]:
# Separate columns
categorical_cols = ['Department', 'Job Role']
numeric_cols = [col for col in X.columns if col not in categorical_cols]


In [41]:
categorical_cols

['Department', 'Job Role']

In [42]:
numeric_cols

['Performance Score',
 'KPI Score',
 'Attendance (%)',
 'Peer Rating',
 'Task Completion (%)']

In [43]:
# Preprocessor: Encode categorical columns
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_cols)
    ],
    remainder='passthrough'
)


In [44]:
# Define full pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])


In [45]:
# Train model
model.fit(X_train, y_train)


In [46]:
# Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))


Accuracy: 0.844
F1 Score: 0.36065573770491804


In [47]:
# Save trained pipeline
pickle.dump(model, open('promotion_pipeline.pkl', 'wb'))
print("✅ Model saved as 'promotion_pipeline.pkl'")

✅ Model saved as 'promotion_pipeline.pkl'


In [20]:
!pip install streamlit_jupyter

Defaulting to user installation because normal site-packages is not writeable
Collecting streamlit_jupyter
  Downloading streamlit_jupyter-0.3.1-py3-none-any.whl.metadata (6.4 kB)
Collecting fastcore (from streamlit_jupyter)
  Downloading fastcore-1.8.14-py3-none-any.whl.metadata (3.7 kB)
Collecting stqdm (from streamlit_jupyter)
  Downloading stqdm-0.0.5-py3-none-any.whl.metadata (3.0 kB)
Collecting tabulate (from streamlit_jupyter)
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Downloading streamlit_jupyter-0.3.1-py3-none-any.whl (13 kB)
Downloading fastcore-1.8.14-py3-none-any.whl (86 kB)
Downloading stqdm-0.0.5-py3-none-any.whl (11 kB)
Downloading tabulate-0.9.0-py3-none-any.whl (35 kB)
Installing collected packages: tabulate, fastcore, stqdm, streamlit_jupyter
Successfully installed fastcore-1.8.14 stqdm-0.0.5 streamlit_jupyter-0.3.1 tabulate-0.9.0



[notice] A new release of pip is available: 25.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip
