## Part 1: Preprocessing

In [None]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras import layers
from sklearn.preprocessing import OneHotEncoder


#  Import and read the attrition data
attrition_df = pd.read_csv(
    "https://static.bc-edx.com/ai/ail-v-1-0/m19/lms/datasets/attrition.csv"
)
attrition_df.head()

Unnamed: 0,Age,Attrition,BusinessTravel,Department,DistanceFromHome,Education,EducationField,EnvironmentSatisfaction,HourlyRate,JobInvolvement,...,PerformanceRating,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,Yes,Travel_Rarely,Sales,1,2,Life Sciences,2,94,3,...,3,1,0,8,0,1,6,4,0,5
1,49,No,Travel_Frequently,Research & Development,8,1,Life Sciences,3,61,2,...,4,4,1,10,3,3,10,7,1,7
2,37,Yes,Travel_Rarely,Research & Development,2,2,Other,4,92,2,...,3,2,0,7,3,3,0,0,0,0
3,33,No,Travel_Frequently,Research & Development,3,4,Life Sciences,4,56,3,...,3,3,0,8,3,3,8,7,3,0
4,27,No,Travel_Rarely,Research & Development,2,1,Medical,1,40,3,...,3,4,1,6,3,3,2,2,2,2


In [2]:
# Determine the number of unique values in each column
attrition_df.nunique()

Unnamed: 0,0
Age,43
Attrition,2
BusinessTravel,3
Department,3
DistanceFromHome,29
Education,5
EducationField,6
EnvironmentSatisfaction,4
HourlyRate,71
JobInvolvement,4


In [3]:
# Create y_df with the Attrition and Department columns
y_df = attrition_df[["Attrition", "Department"]]
print(y_df.head())

  Attrition              Department
0       Yes                   Sales
1        No  Research & Development
2       Yes  Research & Development
3        No  Research & Development
4        No  Research & Development


In [4]:
# Print attrition columns for picking X data columns
print(attrition_df.columns)

Index(['Age', 'Attrition', 'BusinessTravel', 'Department', 'DistanceFromHome',
       'Education', 'EducationField', 'EnvironmentSatisfaction', 'HourlyRate',
       'JobInvolvement', 'JobLevel', 'JobRole', 'JobSatisfaction',
       'MaritalStatus', 'NumCompaniesWorked', 'OverTime', 'PercentSalaryHike',
       'PerformanceRating', 'RelationshipSatisfaction', 'StockOptionLevel',
       'TotalWorkingYears', 'TrainingTimesLastYear', 'WorkLifeBalance',
       'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion',
       'YearsWithCurrManager'],
      dtype='object')


In [None]:
# Create a list of at least 10 column names to use as X data
selected_columns = [
    "Education",
    "Age",
    "DistanceFromHome",
    "JobSatisfaction",
    "OverTime",
    "StockOptionLevel",
    "WorkLifeBalance",
    "YearsAtCompany",
    "YearsSinceLastPromotion",
    "NumCompaniesWorked",
]

# Create X_df using your selected columns
X_df = attrition_df[selected_columns]

# Show the data types for X_df
print(X_df.dtypes)

Education                   int64
Age                         int64
DistanceFromHome            int64
JobSatisfaction             int64
OverTime                   object
StockOptionLevel            int64
WorkLifeBalance             int64
YearsAtCompany              int64
YearsSinceLastPromotion     int64
NumCompaniesWorked          int64
dtype: object


In [6]:
X_df.head()

Unnamed: 0,Education,Age,DistanceFromHome,JobSatisfaction,OverTime,StockOptionLevel,WorkLifeBalance,YearsAtCompany,YearsSinceLastPromotion,NumCompaniesWorked
0,2,41,1,4,Yes,0,1,6,0,8
1,1,49,8,2,No,1,3,10,1,1
2,2,37,2,3,Yes,0,3,0,0,6
3,4,33,3,3,Yes,0,3,8,3,1
4,1,27,2,2,No,1,3,2,2,9


### Split the data into training and testing sets.

In [7]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_df, y_df, random_state=3)

### Convert categorical data to numeric in X data set. And Scale the X data set

In [None]:
# X_train catrorical column OverTime Data
print("---- X_train OverTime Data -------")
print(X_train["OverTime"].value_counts())

---- X_train OverTime Data -------
OverTime
No     793
Yes    309
Name: count, dtype: int64


In [None]:
# X_test catrorical column OverTime Data
print("---- X_test OverTime Data -------")
print(X_test["OverTime"].value_counts())

---- X_test OverTime Data -------
OverTime
No     261
Yes    107
Name: count, dtype: int64


In [None]:
# Convert X_train data to numberic and update X_train data set
X_train["OverTime"] = X_train["OverTime"].map({"Yes": 1, "No": 0})
print(X_train["OverTime"].value_counts())

OverTime
0    793
1    309
Name: count, dtype: int64


In [None]:
# Convert X_test data to numberic and update X_test data set
X_test["OverTime"] = X_test["OverTime"].map({"Yes": 1, "No": 0})
print(X_test["OverTime"].value_counts())

OverTime
0    261
1    107
Name: count, dtype: int64


In [12]:
# Create a StandardScaler
scaler = StandardScaler()

# Fit the StandardScaler to the training data
scaler.fit(X_train)

# Scale the training and testing data
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [13]:
# Create a OneHotEncoder for the Department column
encoder_dept = OneHotEncoder(sparse_output=False)

# Fit the encoder to the training data
encoder_dept.fit(y_train[["Department"]])

# Create two new variables by applying the encoder
# to the training and testing data
y_train_dept_encoded = encoder_dept.transform(y_train[["Department"]])
y_test_dept_encoded = encoder_dept.transform(y_test[["Department"]])

print(y_train_dept_encoded[:3])
print(y_test_dept_encoded[:3])

[[0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]]
[[0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]]


In [14]:
# Create a OneHotEncoder for the Attrition column
encoder_attrition = OneHotEncoder(sparse_output=False)

# Fit the encoder to the training data
encoder_attrition.fit(y_train[["Attrition"]])

# Create two new variables by applying the encoder
# to the training and testing data
y_train_att_encoded = encoder_attrition.transform(y_train[["Attrition"]])
y_test_att_encoded = encoder_attrition.transform(y_test[["Attrition"]])

print(y_train_att_encoded[:3])
print(y_test_att_encoded[:3])

[[1. 0.]
 [1. 0.]
 [1. 0.]]
[[0. 1.]
 [1. 0.]
 [1. 0.]]


## Part 2: Create, Compile, and Train the Model

In [15]:
# Find the number of columns in the X training data.
x_columns = X_train_scaled.shape[1]

# Create the input layer
input_layer = layers.Input(name="InputLayer", shape=(x_columns,))

# Create at least two shared layers
shared_layer_1 = layers.Dense(units=64, activation="relu")(input_layer)
shared_layer_2 = layers.Dense(units=128, activation="relu")(shared_layer_1)

In [None]:
# Create a branch for Department
# with a hidden layer and an output layer

# Create the hidden layer
dept_hid_layer = layers.Dense(units=32, activation="relu")(shared_layer_2)

# Create the output layer
dept_output_layer = layers.Dense(
    y_train_dept_encoded.shape[1], activation="softmax", name="department_output"
)(dept_hid_layer)

In [None]:
# Create a branch for Attrition
# with a hidden layer and an output layer

# Create the hidden layer
attrition_hid_layer = layers.Dense(units=32, activation="relu")(shared_layer_2)

# Create the output layer
attrition_output_layer = layers.Dense(
    y_train_att_encoded.shape[1], activation="softmax", name="attrition_output"
)(attrition_hid_layer)

In [None]:
# Create the model
model = Model(inputs=input_layer, outputs=[dept_output_layer, attrition_output_layer])

# Compile the model
model.compile(
    optimizer="adam",
    loss={
        "department_output": "categorical_crossentropy",
        "attrition_output": "categorical_crossentropy",
    },
    metrics={"department_output": "accuracy", "attrition_output": "accuracy"},
)

# Summarize the model
model.summary()

In [None]:
# Train the model
fit_model = model.fit(
    X_train_scaled,
    {
        "department_output": y_train_dept_encoded,
        "attrition_output": y_train_att_encoded,
    },
    epochs=100,
    batch_size=32,
    validation_data=(
        X_test_scaled,
        {
            "department_output": y_test_dept_encoded,
            "attrition_output": y_test_att_encoded,
        },
    ),
    verbose=1,
)

Epoch 1/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 26ms/step - attrition_output_accuracy: 0.7579 - attrition_output_loss: 0.5647 - department_output_accuracy: 0.4952 - department_output_loss: 1.0066 - loss: 1.5715 - val_attrition_output_accuracy: 0.8424 - val_attrition_output_loss: 0.4319 - val_department_output_accuracy: 0.6957 - val_department_output_loss: 0.7523 - val_loss: 1.1877
Epoch 2/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - attrition_output_accuracy: 0.8478 - attrition_output_loss: 0.4010 - department_output_accuracy: 0.6225 - department_output_loss: 0.8100 - loss: 1.2112 - val_attrition_output_accuracy: 0.8370 - val_attrition_output_loss: 0.4016 - val_department_output_accuracy: 0.6957 - val_department_output_loss: 0.7467 - val_loss: 1.1523
Epoch 3/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - attrition_output_accuracy: 0.8513 - attrition_output_loss: 0.3731 - department_output_ac

In [None]:
# Evaluate the model with the testing data
model_eval = model.evaluate(
    X_test_scaled,
    {"department_output": y_test_dept_encoded, "attrition_output": y_test_att_encoded},
    verbose=1,
)

print(model_eval)

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - attrition_output_accuracy: 0.7996 - attrition_output_loss: 2.2049 - department_output_accuracy: 0.5062 - department_output_loss: 3.0328 - loss: 5.2395 
[4.960219383239746, 2.958111524581909, 1.9898673295974731, 0.801630437374115, 0.5163043737411499]


In [None]:
# Print the accuracy for both department and attrition
attrition_pred = model.predict(X_test_scaled)[1]
attrition_accuracy = np.mean(
    np.argmax(attrition_pred, axis=1) == np.argmax(y_test_att_encoded, axis=1)
)
print(f"Attrition prediction accuracy: {attrition_accuracy:.2f}")

department_pred = model.predict(X_test_scaled)[0]
department_accuracy = np.mean(
    np.argmax(department_pred, axis=1) == np.argmax(y_test_dept_encoded, axis=1)
)
print(f"Department prediction accuracy: {department_accuracy:.2f}")

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Attrition prediction accuracy: 0.80
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Department prediction accuracy: 0.52


# Summary

In the provided space below, briefly answer the following questions.

1. Is accuracy the best metric to use on this data? Why or why not?

2. What activation functions did you choose for your output layers, and why?

3. Can you name a few ways that this model might be improved?

YOUR ANSWERS HERE

1. **No**, 
Accuracy is not the most reliable metric because it fails to consider class imbalances. For instance, if the number of employees leaving the company (attrition) is significantly lower than those staying, accuracy may present a misleading picture. Metrics such as precision, recall, and F1-score offer a deeper understanding of the model's performance, particularly for minority classes. These metrics take into account true positives, false positives, and false negatives, providing a more thorough evaluation of the model's effectiveness.

2. **Softmax.** 
For both department_output and attrition_output. Softmax is used for multi-class classification in the department_output to ensure the outputs sum to 1 and represent probabilities for each class. For attrition_output, although it’s a binary classification, softmax is used to get probabilities of each class (Yes/No). This allows us to interpret the outputs as the likelihood of each class, facilitating better decision-making based on predicted probabilities.

3. A few way's the model can improved are:
**More Data:** Increase the amount of training data to improve model generalization. Collecting more employee data over time or from other sources can help the model learn better.

    **Feature Engineering:** Add or modify features to provide better input data. For example, create new features like the ratio of total working years to years at the company, or encode tenure in months rather than years.

    **Hyperparameter Tuning:** Adjust learning rates, batch sizes, and the number of neurons/layers. Try different values for these parameters to find the best combination that improves model performance.

    **Regularization:** Implement dropout or L2 regularization to prevent overfitting. Dropout randomly ignores some neurons during training to make the model more robust, while L2 regularization adds a penalty for large weights.
Different Architectures: Experiment with different neural network architectures or other machine learning models. For example, try using convolutional neural networks (CNNs) for feature extraction or ensemble methods like random forests and gradient boosting for improved accuracy.