## Part 1: Preprocessing

In [None]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras import layers

#  Import and read the attrition data
attrition_df = pd.read_csv('https://static.bc-edx.com/ai/ail-v-1-0/m19/lms/datasets/attrition.csv')
attrition_df.head()

Unnamed: 0,Age,Attrition,BusinessTravel,Department,DistanceFromHome,Education,EducationField,EnvironmentSatisfaction,HourlyRate,JobInvolvement,...,PerformanceRating,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,Yes,Travel_Rarely,Sales,1,2,Life Sciences,2,94,3,...,3,1,0,8,0,1,6,4,0,5
1,49,No,Travel_Frequently,Research & Development,8,1,Life Sciences,3,61,2,...,4,4,1,10,3,3,10,7,1,7
2,37,Yes,Travel_Rarely,Research & Development,2,2,Other,4,92,2,...,3,2,0,7,3,3,0,0,0,0
3,33,No,Travel_Frequently,Research & Development,3,4,Life Sciences,4,56,3,...,3,3,0,8,3,3,8,7,3,0
4,27,No,Travel_Rarely,Research & Development,2,1,Medical,1,40,3,...,3,4,1,6,3,3,2,2,2,2


In [None]:
# Determine the number of unique values in each column.
attrition_df.nunique()

Unnamed: 0,0
Age,43
Attrition,2
BusinessTravel,3
Department,3
DistanceFromHome,29
Education,5
EducationField,6
EnvironmentSatisfaction,4
HourlyRate,71
JobInvolvement,4


In [None]:
# Create y_df with the Attrition and Department columns
y_df = attrition_df[['Attrition', 'Department']]


In [None]:
# Create a list of at least 10 column names to use as X data
selected_columns = [
    'Age', 'BusinessTravel', 'DistanceFromHome', 'Education',
    'EnvironmentSatisfaction', 'HourlyRate', 'JobInvolvement',
    'JobSatisfaction', 'NumCompaniesWorked', 'OverTime'
]


# Create X_df using your selected columns
X_df = attrition_df[selected_columns]

# Show the data types for X_df
X_df.dtypes



Unnamed: 0,0
Age,int64
BusinessTravel,object
DistanceFromHome,int64
Education,int64
EnvironmentSatisfaction,int64
HourlyRate,int64
JobInvolvement,int64
JobSatisfaction,int64
NumCompaniesWorked,int64
OverTime,object


In [None]:
# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
# X_df contains the features, y_df contains the target labels for Attrition and Department
X_train, X_test, y_train, y_test = train_test_split(X_df, y_df, test_size=0.2, random_state=42)

# Display the shapes of the resulting splits to verify
X_train.shape, X_test.shape, y_train.shape, y_test.shape


((1176, 10), (294, 10), (1176, 2), (294, 2))

In [None]:
# Convert your X data to numeric data types however you see fit
# Add new code cells as necessary
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

# Step 1: Encode 'OverTime' (binary column) using LabelEncoder
overtime_encoder = LabelEncoder()
X_train['OverTime'] = overtime_encoder.fit_transform(X_train['OverTime'])
X_test['OverTime'] = overtime_encoder.transform(X_test['OverTime'])

# Step 2: One-Hot Encode 'BusinessTravel' (multi-category column) using OneHotEncoder
business_travel_encoder = OneHotEncoder(drop='first', sparse_output=False)
business_travel_encoded_train = business_travel_encoder.fit_transform(X_train[['BusinessTravel']])
business_travel_encoded_test = business_travel_encoder.transform(X_test[['BusinessTravel']])

# Convert the encoded arrays to DataFrames and join them back to X_train and X_test
business_travel_encoded_train_df = pd.DataFrame(
    business_travel_encoded_train,
    columns=business_travel_encoder.get_feature_names_out(['BusinessTravel']),
    index=X_train.index
)
business_travel_encoded_test_df = pd.DataFrame(
    business_travel_encoded_test,
    columns=business_travel_encoder.get_feature_names_out(['BusinessTravel']),
    index=X_test.index
)

# Drop the original 'BusinessTravel' column and add the one-hot encoded columns
X_train = X_train.drop('BusinessTravel', axis=1).join(business_travel_encoded_train_df)
X_test = X_test.drop('BusinessTravel', axis=1).join(business_travel_encoded_test_df)

# Ensure all columns are numeric now
X_train = X_train.astype(float)
X_test = X_test.astype(float)

In [None]:
# Create a StandardScaler
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
# Fit the StandardScaler to the training data
X_train_scaled = scaler.fit_transform(X_train)

# Scale the training and testing data
X_test_scaled = scaler.transform(X_test)

# Displaying the first few rows of the scaled training data to confirm
pd.DataFrame(X_train_scaled, columns=X_train.columns).head()


Unnamed: 0,Age,DistanceFromHome,Education,EnvironmentSatisfaction,HourlyRate,JobInvolvement,JobSatisfaction,NumCompaniesWorked,OverTime,BusinessTravel_Travel_Frequently,BusinessTravel_Travel_Rarely
0,-1.388559,1.440396,-0.863356,0.279706,-0.472832,-1.01234,-1.582336,-1.059168,-0.63641,-0.490414,0.639049
1,-2.040738,-0.522699,-0.863356,-0.639104,0.309374,0.389912,1.152834,-0.659431,-0.63641,-0.490414,-1.564826
2,-0.845077,1.317703,-0.863356,1.198515,-1.059487,0.389912,1.152834,-0.259693,-0.63641,-0.490414,0.639049
3,0.241886,0.336155,0.099933,1.198515,-0.032841,0.389912,-0.670613,0.539781,-0.63641,-0.490414,0.639049
4,-0.627685,1.317703,0.099933,-0.639104,1.09158,0.389912,0.241111,-0.659431,-0.63641,-0.490414,0.639049


In [None]:

from sklearn.preprocessing import OneHotEncoder

# Step 1: Initialize OneHotEncoder for 'Department' with sparse_output=False
department_encoder = OneHotEncoder(drop='first', sparse_output=False)

# Step 2: Fit the encoder to the 'Department' column in the training data
department_encoded_train = department_encoder.fit_transform(y_train[['Department']])

# Step 3: Apply the encoder to both training and testing data
department_encoded_test = department_encoder.transform(y_test[['Department']])

# Convert encoded arrays to DataFrames for easier handling
department_encoded_train_df = pd.DataFrame(
    department_encoded_train,
    columns=department_encoder.get_feature_names_out(['Department']),
    index=y_train.index
)
department_encoded_test_df = pd.DataFrame(
    department_encoded_test,
    columns=department_encoder.get_feature_names_out(['Department']),
    index=y_test.index
)

# Display the first few rows of the encoded training data for verification
department_encoded_train_df.head(), department_encoded_test_df.head()


(      Department_Research & Development  Department_Sales
 1097                                1.0               0.0
 727                                 1.0               0.0
 254                                 0.0               1.0
 1175                                1.0               0.0
 1341                                1.0               0.0,
       Department_Research & Development  Department_Sales
 1041                                0.0               1.0
 184                                 1.0               0.0
 1222                                0.0               0.0
 67                                  1.0               0.0
 220                                 1.0               0.0)

In [None]:
# Join the one-hot encoded department data with y_train and y_test
y_train = y_train.drop('Department', axis=1).join(department_encoded_train_df)
y_test = y_test.drop('Department', axis=1).join(department_encoded_test_df)

# Verify the updated y_train and y_test
y_train.head(), y_test.head()

(     Attrition  Department_Research & Development  Department_Sales
 1097        No                                1.0               0.0
 727         No                                1.0               0.0
 254         No                                0.0               1.0
 1175        No                                1.0               0.0
 1341        No                                1.0               0.0,
      Attrition  Department_Research & Development  Department_Sales
 1041        No                                0.0               1.0
 184         No                                1.0               0.0
 1222       Yes                                0.0               0.0
 67          No                                1.0               0.0
 220         No                                1.0               0.0)

In [None]:


# Initialize a OneHotEncoder for the 'Attrition' column
attrition_encoder = OneHotEncoder(drop='first', sparse_output=False)

# Fit the encoder to the 'Attrition' column in the training data
attrition_encoded_train = attrition_encoder.fit_transform(y_train[['Attrition']])

# Apply the encoder to both training and testing data
attrition_encoded_test = attrition_encoder.transform(y_test[['Attrition']])

# Convert encoded arrays to DataFrames for easier handling
attrition_encoded_train_df = pd.DataFrame(
    attrition_encoded_train,
    columns=attrition_encoder.get_feature_names_out(['Attrition']),
    index=y_train.index
)
attrition_encoded_test_df = pd.DataFrame(
    attrition_encoded_test,
    columns=attrition_encoder.get_feature_names_out(['Attrition']),
    index=y_test.index
)

# Display the first few rows of the encoded training data for verification
attrition_encoded_train_df.head(), attrition_encoded_test_df.head()

(      Attrition_Yes
 1097            0.0
 727             0.0
 254             0.0
 1175            0.0
 1341            0.0,
       Attrition_Yes
 1041            0.0
 184             0.0
 1222            1.0
 67              0.0
 220             0.0)

In [None]:
# Join the one-hot encoded attrition data with y_train and y_test
y_train = y_train.drop('Attrition', axis=1).join(attrition_encoded_train_df)
y_test = y_test.drop('Attrition', axis=1).join(attrition_encoded_test_df)

# Verify the updated y_train and y_test
y_train.head(), y_test.head()


(      Department_Research & Development  Department_Sales  Attrition_Yes
 1097                                1.0               0.0            0.0
 727                                 1.0               0.0            0.0
 254                                 0.0               1.0            0.0
 1175                                1.0               0.0            0.0
 1341                                1.0               0.0            0.0,
       Department_Research & Development  Department_Sales  Attrition_Yes
 1041                                0.0               1.0            0.0
 184                                 1.0               0.0            0.0
 1222                                0.0               0.0            1.0
 67                                  1.0               0.0            0.0
 220                                 1.0               0.0            0.0)

## Create, Compile, and Train the Model

In [None]:

import tensorflow as tf
from tensorflow.keras.layers import Input, Dense

# Step 1: Find the number of columns in the X training data
num_features = X_train.shape[1]

# Step 2: Create the input layer
input_layer = Input(shape=(num_features,))

# Step 3: Create at least two shared layers
shared_layer_1 = Dense(64, activation='relu')(input_layer)
shared_layer_2 = Dense(32, activation='relu')(shared_layer_1)

# Display the summary of the layers so far
input_layer, shared_layer_1, shared_layer_2

(<KerasTensor shape=(None, 11), dtype=float32, sparse=None, name=keras_tensor_18>,
 <KerasTensor shape=(None, 64), dtype=float32, sparse=False, name=keras_tensor_19>,
 <KerasTensor shape=(None, 32), dtype=float32, sparse=False, name=keras_tensor_20>)

In [None]:

from tensorflow.keras.layers import Dense

# Step 1: Create the hidden layer for the Department branch
department_branch_hidden = Dense(16, activation='relu')(shared_layer_2)

# Step 2: Create the output layer for Department branch (assuming 2 output classes for simplicity)
department_output = Dense(2, activation='softmax', name='department_output')(department_branch_hidden)

# Display the structure of the Department branch layers
department_branch_hidden, department_output


(<KerasTensor shape=(None, 16), dtype=float32, sparse=False, name=keras_tensor_21>,
 <KerasTensor shape=(None, 2), dtype=float32, sparse=False, name=keras_tensor_22>)

In [None]:
# Create the branches

# Attrition branch
attrition_branch_hidden = Dense(16, activation='relu')(shared_layer_2)
attrition_output = Dense(1, activation='sigmoid', name='attrition_output')(attrition_branch_hidden)

# Department branch
department_branch_hidden = Dense(16, activation='relu')(shared_layer_2)
department_output = Dense(2, activation='softmax', name='department_output')(department_branch_hidden)

# Step 2: Create the model with the two output branches
model = Model(inputs=input_layer, outputs=[attrition_output, department_output])

# Recompile the model with categorical crossentropy for the department output
model.compile(
    optimizer='adam',
    loss={
        'attrition_output': 'binary_crossentropy',
        'department_output': 'categorical_crossentropy'  # Use categorical_crossentropy here
    },
    metrics={
        'attrition_output': 'accuracy',
        'department_output': 'accuracy'
    }
)

# Step 4: Summarize the model
model.summary()

In [None]:
history = model.fit(
    X_train_scaled,
    {
        'attrition_output': y_train['Attrition_Yes'],  # Single binary column for attrition
        'department_output': y_train[['Department_Research & Development', 'Department_Sales']]  # One-hot encoded
    },
    validation_data=(
        X_test_scaled,
        {
            'attrition_output': y_test['Attrition_Yes'],
            'department_output': y_test[['Department_Research & Development', 'Department_Sales']]
        }
    ),
    epochs=30,
    batch_size=32
)


Epoch 1/30
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - attrition_output_accuracy: 0.7798 - department_output_accuracy: 0.5986 - loss: 1.2185 - val_attrition_output_accuracy: 0.8673 - val_department_output_accuracy: 0.7109 - val_loss: 0.9933
Epoch 2/30
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - attrition_output_accuracy: 0.8210 - department_output_accuracy: 0.6788 - loss: 1.0453 - val_attrition_output_accuracy: 0.8707 - val_department_output_accuracy: 0.7109 - val_loss: 0.9637
Epoch 3/30
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - attrition_output_accuracy: 0.8419 - department_output_accuracy: 0.6686 - loss: 0.9817 - val_attrition_output_accuracy: 0.8707 - val_department_output_accuracy: 0.7109 - val_loss: 0.9542
Epoch 4/30
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - attrition_output_accuracy: 0.8372 - department_output_accuracy: 0.6963 - loss: 0.9673 - val_attri

In [None]:
# Evaluate the model with the testing data
evaluation = model.evaluate(
    X_test_scaled,
    {
        'attrition_output': y_test['Attrition_Yes'],
        'department_output': y_test[['Department_Research & Development', 'Department_Sales']]
    }
)

# Display the evaluation results
print(evaluation)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - attrition_output_accuracy: 0.8411 - department_output_accuracy: 0.6917 - loss: 3.8642 
[3.515458583831787, 0.8673469424247742, 0.7108843326568604]


In [None]:
# Print the accuracy for both department and attrition
# Assuming `evaluation` holds the results from model.evaluate()
# Extracting accuracy values for each output (attrition and department)

attrition_accuracy = evaluation[1]  # Attrition accuracy
department_accuracy = evaluation[2]  # Department accuracy

print(f"Attrition Accuracy: {attrition_accuracy:.2f}")
print(f"Department Accuracy: {department_accuracy:.2f}")


Attrition Accuracy: 0.87
Department Accuracy: 0.71


# Summary

In the provided space below, briefly answer the following questions.

1. Is accuracy the best metric to use on this data? Why or why not?

2. What activation functions did you choose for your output layers, and why?

3. Can you name a few ways that this model might be improved?

YOUR ANSWERS HERE

1. Accuracy is not always the best metric, especially if the dataset is imbalanced. In a classification problem with imbalanced classes, accuracy may give a misleading sense of model performance by favoring the majority class. Instead, metrics like precision, recall, or F1-score may provide a better picture, particularly for the Attrition prediction, where class imbalance is common (e.g., more employees might stay than leave). These metrics can offer insights into the model's sensitivity to minority classes.


2. For the attrition_output layer, I used sigmoid activation, as it is suited for binary classification. The sigmoid function outputs probabilities between 0 and 1, which is ideal for a "Yes" or "No" prediction on attrition.

For the department_output layer, I used softmax activation, which is appropriate for multi-class classification. The softmax function provides a probability distribution across multiple classes, allowing the model to make a prediction on which department an employee is best suited for based on the highest probability.
3. Feature Engineering: Creating additional meaningful features or using domain-specific knowledge could enhance model performance. For instance, deriving tenure duration or calculating satisfaction scores could improve predictions.

Hyperparameter Tuning: Experimenting with hyperparameters such as the learning rate, batch size, and the number of layers or neurons could yield better performance. Techniques like Grid Search or Random Search can help find optimal values.

Using Ensemble Methods: Implementing ensemble techniques, such as bagging or stacking with other machine learning models, may enhance performance. These methods leverage the strengths of multiple models.

Regularization Techniques: Adding dropout layers or L2 regularization to avoid overfitting could improve the model's generalizability on unseen data.

Evaluate with Additional Metrics: Using precision, recall, or F1-score, especially for imbalanced data, could provide more insights and help adjust the model accordingly to handle class imbalances.