# Employee Turnover Prediction AI
This AI will be tasked with first determining employee sentiment based on the filled out feedback forms. This sentiment analysis would then feed into the turnover analysis and predictions.

## Step 1: Setup and Connections

In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.metrics import AUC, Accuracy

## Step 2: Cleaning the Dataset

In [21]:
df = pd.read_csv('employee_turnover_dataset_2.csv')
# df = pd.read_csv('Employee_Attrition_Dataset.csv')

df['Start Date'] = pd.to_datetime(df['Start Date'])
df['Tenure'] = datetime.now().year - df['Start Date'].dt.year
    
df = pd.get_dummies(df, columns=['Department'], drop_first=True)

df.drop(['Employee ID', 'Start Date'], axis=1, inplace=True)

X = df.drop('Resigned', axis=1)
y = df['Resigned']
    
df.head()

Unnamed: 0,Satisfaction Level,Age,Years of Experience,Salary,Promotions,Resigned,Tenure,Department_HR,Department_Marketing,Department_Operations,Department_Sales,Department_Tech
0,3.472586,64,16,57025,1,0,11,False,False,False,True,False
1,1.420574,45,6,95251,0,1,27,False,False,False,True,False
2,3.752803,54,9,74503,0,0,18,True,False,False,False,False
3,1.23977,32,2,104050,0,0,6,True,False,False,False,False
4,2.072702,38,9,58023,0,1,17,True,False,False,False,False


## Step 3: Training the Model

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=100, batch_size=1000, validation_split=0.2)


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6350 - loss: 0.6542 - val_accuracy: 0.7501 - val_loss: 0.5112
Epoch 2/100
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7493 - loss: 0.5281 - val_accuracy: 0.7509 - val_loss: 0.5025
Epoch 3/100
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7486 - loss: 0.5202 - val_accuracy: 0.7501 - val_loss: 0.5024
Epoch 4/100
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7540 - loss: 0.5130 - val_accuracy: 0.7501 - val_loss: 0.5026
Epoch 5/100
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7524 - loss: 0.5114 - val_accuracy: 0.7501 - val_loss: 0.5019
Epoch 6/100
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7537 - loss: 0.5059 - val_accuracy: 0.7501 - val_loss: 0.5017
Epoch 7/100
[1m56/56[0m [32m━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x298f84d10>

## Testing the Model

In [23]:
probabilities = model.predict(X_test)
predicted_classes = np.where(probabilities > 0.5, 1, 0)

accuracy = accuracy_score(y_test, predicted_classes)
print(f'Accuracy: {accuracy}')
print(f'Predicted probabilities: {probabilities[:5]}')

[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 229us/step
Accuracy: 0.7533333333333333
Predicted probabilities: [[0.27398664]
 [0.29331142]
 [0.36798567]
 [0.32876962]
 [0.07593747]]


## Saving the Model

In [24]:
model.save('AttritionAI.h5')
model.save('AttritionAI.keras')

