## Part 1: Preprocessing

In [36]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras import layers

#  Import and read the attrition data
attrition_df = pd.read_csv('https://static.bc-edx.com/ai/ail-v-1-0/m19/lms/datasets/attrition.csv')
attrition_df.head()

Unnamed: 0,Age,Attrition,BusinessTravel,Department,DistanceFromHome,Education,EducationField,EnvironmentSatisfaction,HourlyRate,JobInvolvement,...,PerformanceRating,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,41,Yes,Travel_Rarely,Sales,1,2,Life Sciences,2,94,3,...,3,1,0,8,0,1,6,4,0,5
1,49,No,Travel_Frequently,Research & Development,8,1,Life Sciences,3,61,2,...,4,4,1,10,3,3,10,7,1,7
2,37,Yes,Travel_Rarely,Research & Development,2,2,Other,4,92,2,...,3,2,0,7,3,3,0,0,0,0
3,33,No,Travel_Frequently,Research & Development,3,4,Life Sciences,4,56,3,...,3,3,0,8,3,3,8,7,3,0
4,27,No,Travel_Rarely,Research & Development,2,1,Medical,1,40,3,...,3,4,1,6,3,3,2,2,2,2


In [37]:
# Determine the number of unique values in each column.
attrition_df.nunique()

Unnamed: 0,0
Age,43
Attrition,2
BusinessTravel,3
Department,3
DistanceFromHome,29
Education,5
EducationField,6
EnvironmentSatisfaction,4
HourlyRate,71
JobInvolvement,4


In [38]:
# Create y_df with the Attrition and Department columns
y_df = attrition_df[['Attrition', 'Department']]


In [39]:
# Create a list of at least 10 column names to use as X data
selected_columns = ['Age', 'DistanceFromHome', 'Education', 'JobLevel', 'HourlyRate',
                    'EnvironmentSatisfaction', 'JobInvolvement', 'JobSatisfaction',
                    'NumCompaniesWorked', 'TotalWorkingYears']

# Create X_df using your selected columns
X_df = attrition_df[selected_columns]

# Show the data types for X_df
print(X_df.dtypes)



Age                        int64
DistanceFromHome           int64
Education                  int64
JobLevel                   int64
HourlyRate                 int64
EnvironmentSatisfaction    int64
JobInvolvement             int64
JobSatisfaction            int64
NumCompaniesWorked         int64
TotalWorkingYears          int64
dtype: object


In [40]:
# Split the data into training and testing sets
from sklearn.model_selection import train_test_split



In [41]:
# Convert X data to numeric data types
X_df = X_df.apply(pd.to_numeric, errors='coerce')

# Check the data types after conversion
print(X_df.dtypes)


Age                        int64
DistanceFromHome           int64
Education                  int64
JobLevel                   int64
HourlyRate                 int64
EnvironmentSatisfaction    int64
JobInvolvement             int64
JobSatisfaction            int64
NumCompaniesWorked         int64
TotalWorkingYears          int64
dtype: object


In [42]:
# Create a StandardScaler
scaler = StandardScaler()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_df, y_df, test_size=0.2, random_state=42)

# Fit the StandardScaler to the training data
scaler.fit(X_train)

# Scale the training and testing data
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Print the scaled data
print(X_train_scaled)
print(X_test_scaled)



[[-1.38855944  1.44039645 -0.86335572 ... -1.58233648 -1.05916816
  -1.1673683 ]
 [-2.04073779 -0.52269928 -0.86335572 ...  1.15283407 -0.65943075
  -1.42339685]
 [-0.84507748  1.31770296 -0.86335572 ...  1.15283407 -0.25969335
  -0.14325407]
 ...
 [-1.60595222 -0.76808624  1.06322176 ...  1.15283407 -1.05916816
  -1.29538258]
 [-0.84507748  0.45884859 -0.86335572 ... -0.67061296  0.53978146
  -0.14325407]
 [ 1.43754676 -0.03192534  0.09993302 ...  0.24111056  0.53978146
   2.03298865]]
[[-0.95377387 -0.52269928  0.09993302 ... -1.58233648 -1.05916816
  -0.65531119]
 [ 1.76363593  0.45884859 -0.86335572 ... -1.58233648 -0.65943075
  -0.78332546]
 [-1.38855944  1.56308993 -1.82664446 ...  0.24111056 -0.65943075
  -1.29538258]
 ...
 [ 1.43754676  2.29925083  0.09993302 ... -1.58233648  0.53978146
   1.7769601 ]
 [ 0.02449366  0.09076814 -0.86335572 ...  1.15283407  0.14004406
  -0.91133974]
 [ 1.11145758 -0.89077972 -0.86335572 ...  1.15283407 -1.05916816
  -0.39928263]]


In [43]:
from sklearn.preprocessing import OneHotEncoder

# Create a OneHotEncoder for the Department column
encoder = OneHotEncoder(handle_unknown='ignore')

# Fit the encoder to the training data
encoder.fit(y_train[['Department']])

# Create two new variables by applying the encoder to the training and testing data
department_train_encoded = encoder.transform(y_train[['Department']])
department_test_encoded = encoder.transform(y_test[['Department']])

# Print the encoded data
print(department_train_encoded)
print(department_test_encoded)

  (0, 1)	1.0
  (1, 1)	1.0
  (2, 2)	1.0
  (3, 1)	1.0
  (4, 1)	1.0
  (5, 0)	1.0
  (6, 1)	1.0
  (7, 0)	1.0
  (8, 2)	1.0
  (9, 1)	1.0
  (10, 2)	1.0
  (11, 2)	1.0
  (12, 2)	1.0
  (13, 1)	1.0
  (14, 0)	1.0
  (15, 1)	1.0
  (16, 2)	1.0
  (17, 1)	1.0
  (18, 1)	1.0
  (19, 2)	1.0
  (20, 2)	1.0
  (21, 2)	1.0
  (22, 2)	1.0
  (23, 1)	1.0
  (24, 1)	1.0
  :	:
  (1151, 1)	1.0
  (1152, 1)	1.0
  (1153, 1)	1.0
  (1154, 1)	1.0
  (1155, 2)	1.0
  (1156, 1)	1.0
  (1157, 2)	1.0
  (1158, 1)	1.0
  (1159, 1)	1.0
  (1160, 1)	1.0
  (1161, 1)	1.0
  (1162, 1)	1.0
  (1163, 2)	1.0
  (1164, 1)	1.0
  (1165, 1)	1.0
  (1166, 1)	1.0
  (1167, 2)	1.0
  (1168, 2)	1.0
  (1169, 1)	1.0
  (1170, 1)	1.0
  (1171, 1)	1.0
  (1172, 1)	1.0
  (1173, 1)	1.0
  (1174, 1)	1.0
  (1175, 2)	1.0
  (0, 2)	1.0
  (1, 1)	1.0
  (2, 0)	1.0
  (3, 1)	1.0
  (4, 1)	1.0
  (5, 2)	1.0
  (6, 1)	1.0
  (7, 1)	1.0
  (8, 2)	1.0
  (9, 1)	1.0
  (10, 2)	1.0
  (11, 2)	1.0
  (12, 1)	1.0
  (13, 1)	1.0
  (14, 1)	1.0
  (15, 1)	1.0
  (16, 1)	1.0
  (17, 1)	1.0
  (18, 1)	1.

In [44]:
# Create a OneHotEncoder for the Attrition column
encoder_attrition = OneHotEncoder(handle_unknown='ignore')

# Fit the encoder to the training data
encoder_attrition.fit(y_train[['Attrition']])

# Create two new variables by applying the encoder to the training and testing data
attrition_train_encoded = encoder_attrition.transform(y_train[['Attrition']])
attrition_test_encoded = encoder_attrition.transform(y_test[['Attrition']])

# Print the encoded data
print(attrition_train_encoded)
print(attrition_test_encoded)



  (0, 0)	1.0
  (1, 0)	1.0
  (2, 0)	1.0
  (3, 0)	1.0
  (4, 0)	1.0
  (5, 0)	1.0
  (6, 0)	1.0
  (7, 0)	1.0
  (8, 0)	1.0
  (9, 0)	1.0
  (10, 0)	1.0
  (11, 0)	1.0
  (12, 0)	1.0
  (13, 0)	1.0
  (14, 0)	1.0
  (15, 1)	1.0
  (16, 1)	1.0
  (17, 0)	1.0
  (18, 0)	1.0
  (19, 0)	1.0
  (20, 0)	1.0
  (21, 0)	1.0
  (22, 0)	1.0
  (23, 0)	1.0
  (24, 0)	1.0
  :	:
  (1151, 0)	1.0
  (1152, 0)	1.0
  (1153, 0)	1.0
  (1154, 1)	1.0
  (1155, 0)	1.0
  (1156, 0)	1.0
  (1157, 0)	1.0
  (1158, 0)	1.0
  (1159, 1)	1.0
  (1160, 0)	1.0
  (1161, 1)	1.0
  (1162, 0)	1.0
  (1163, 1)	1.0
  (1164, 0)	1.0
  (1165, 0)	1.0
  (1166, 0)	1.0
  (1167, 0)	1.0
  (1168, 0)	1.0
  (1169, 0)	1.0
  (1170, 0)	1.0
  (1171, 0)	1.0
  (1172, 0)	1.0
  (1173, 1)	1.0
  (1174, 0)	1.0
  (1175, 0)	1.0
  (0, 0)	1.0
  (1, 0)	1.0
  (2, 1)	1.0
  (3, 0)	1.0
  (4, 0)	1.0
  (5, 0)	1.0
  (6, 0)	1.0
  (7, 0)	1.0
  (8, 0)	1.0
  (9, 0)	1.0
  (10, 0)	1.0
  (11, 0)	1.0
  (12, 0)	1.0
  (13, 0)	1.0
  (14, 0)	1.0
  (15, 0)	1.0
  (16, 0)	1.0
  (17, 1)	1.0
  (18, 1)	1.

## Create, Compile, and Train the Model

In [45]:
# Scale the training and testing data
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the number of columns
num_columns = X_train_scaled.shape[1]

from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model

# Define the input layer and shared layers
input_layer = Input(shape=(num_columns,))
shared_layer1 = Dense(units=64, activation='relu')(input_layer)
shared_layer2 = Dense(units=32, activation='relu')(shared_layer1)

# Define branches for Attrition and Department
attrition_hidden_layer = Dense(units=64, activation='relu')(shared_layer2)
attrition_output_layer = Dense(units=2, activation='softmax', name="attrition_output")(attrition_hidden_layer)

department_hidden_layer = Dense(units=64, activation='relu')(shared_layer2)
department_output_layer = Dense(units=3, activation='softmax', name="department_output")(department_hidden_layer)

# Define the model
model = Model(inputs=input_layer, outputs=[attrition_output_layer, department_output_layer])

# Compile the model
model.compile(
    optimizer='adam',
    loss={
        'attrition_output': 'categorical_crossentropy',
        'department_output': 'categorical_crossentropy'
    },
    metrics={
        'attrition_output': 'accuracy',
        'department_output': 'accuracy'
    }
)

# Display the model summary
model.summary()


In [46]:
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model

# Define the input layer and shared layers
input_layer = Input(shape=(num_columns,))
shared_layer1 = Dense(units=64, activation='relu')(input_layer)
shared_layer2 = Dense(units=32, activation='relu')(shared_layer1)

# Define branches for Attrition and Department
attrition_hidden_layer = Dense(units=64, activation='relu')(shared_layer2)
attrition_output_layer = Dense(units=2, activation='softmax', name="attrition_output")(attrition_hidden_layer)

# Modified Department branch - Adding an extra hidden layer
department_hidden_layer1 = Dense(units=64, activation='relu')(shared_layer2) # This is the original department hidden layer
department_hidden_layer2 = Dense(units=64, activation='relu')(department_hidden_layer1) # This is the additional hidden layer you wanted to add
department_output_layer = Dense(units=3, activation='softmax', name="department_output")(department_hidden_layer2) # Connecting to the new hidden layer

# Define the model
model = Model(inputs=input_layer, outputs=[attrition_output_layer, department_output_layer])

# Compile the model (rest remains the same)
model.compile(
    optimizer='adam',
    loss={
        'attrition_output': 'categorical_crossentropy',
        'department_output': 'categorical_crossentropy'
    },
    metrics={
        'attrition_output': 'accuracy',
        'department_output': 'accuracy'
    }
)

# Display the model summary
model.summary()



In [47]:
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model

# Assuming 'num_columns' is defined and represents the number of features in your input data

# Define the input layer
input_layer = Input(shape=(num_columns,))

# Existing shared layers (if any)
# ... your existing code for shared_layer1, shared_layer2 ...

# Define branches for Attrition and Department
# ... your existing code for department branch ...

# Define the Attrition branch layers
attrition_hidden_layer = Dense(units=64, activation='relu')(shared_layer2) # Connect to shared layer
attrition_output_layer = Dense(units=2, activation='softmax', name="attrition_output")(attrition_hidden_layer)

# Define the model with both branches as outputs
model = Model(inputs=input_layer, outputs=[attrition_output_layer, department_output_layer]) #department_output_layer is output of your existing department branch


# Compile the model
model.compile(
    optimizer='adam',
    loss={
        'attrition_output': 'categorical_crossentropy',
        'department_output': 'categorical_crossentropy'
    },
    metrics={
        'attrition_output': 'accuracy',
        'department_output': 'accuracy'
    }
)

# Display the model summary
model.summary()


In [48]:
# Create the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


# Summarize the model
model.summary()


In [49]:
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
print(Input) # Print the Input function/class to check if it was assigned over. If this shows the expected functionality, then the problem has been resolved by defining num_columns properly

# Assuming X_train_scaled has 10 columns
num_columns = 10  # Or however many columns your X_train_scaled data has

# Define the input layer
input_layer = Input(shape=(num_columns,))

# Existing shared layers (if any)
# ... your existing code for shared_layer1, shared_layer2 ...

# Define branches for Attrition and Department
# ... your existing code for department branch ...

# Define the Attrition branch layers
attrition_hidden_layer = Dense(units=64, activation='relu')(shared_layer2) # Connect to shared layer
attrition_output_layer = Dense(units=2, activation='softmax', name="attrition_output")(attrition_hidden_layer)

# Define the model with both branches as outputs
model = Model(inputs=input_layer, outputs=[attrition_output_layer, department_output_layer]) #department_output_layer is output of your existing department branch


# Compile the model
model.compile(
    optimizer='adam',
    loss={
        'attrition_output': 'categorical_crossentropy',
        'department_output': 'categorical_crossentropy'
    },
    metrics={
        'attrition_output': 'accuracy',
        'department_output': 'accuracy'
    }
)

# Display the model summary
model.summary()

<function Input at 0x78ecc45c5000>


In [50]:
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
print(Input) # Print the Input function/class to check if it was assigned over. If this shows the expected functionality, then the problem has been resolved by defining num_columns properly

# Assuming X_train_scaled has 10 columns
num_columns = 10  # Or however many columns your X_train_scaled data has

# Define the input layer
input_layer = Input(shape=(num_columns,))

# Define the shared layers (Make sure this is present)
shared_layer1 = Dense(units=64, activation='relu')(input_layer)
shared_layer2 = Dense(units=32, activation='relu')(shared_layer1)


# Define branches for Attrition and Department
# Define the Attrition branch layers
attrition_hidden_layer = Dense(units=64, activation='relu')(shared_layer2) # Connect to shared layer
attrition_output_layer = Dense(units=2, activation='softmax', name="attrition_output")(attrition_hidden_layer)

# Define the Department Branch
# Modified Department branch - Adding an extra hidden layer
department_hidden_layer1 = Dense(units=64, activation='relu')(shared_layer2) # This is the original department hidden layer
department_hidden_layer2 = Dense(units=64, activation='relu')(department_hidden_layer1) # This is the additional hidden layer you wanted to add
department_output_layer = Dense(units=3, activation='softmax', name="department_output")(department_hidden_layer2) # Connecting to the new hidden layer



# Define the model with both branches as outputs
model = Model(inputs=input_layer, outputs=[attrition_output_layer, department_output_layer]) #department_output_layer is output of your existing department branch


# Compile the model
model.compile(
    optimizer='adam',
    loss={
        'attrition_output': 'categorical_crossentropy',
        'department_output': 'categorical_crossentropy'
    },
    metrics={
        'attrition_output': 'accuracy',
        'department_output': 'accuracy'
    }
)

# Display the model summary
model.summary()

<function Input at 0x78ecc45c5000>


In [51]:
# Evaluate the model on the test data
try:
    losses, attrition_accuracy, department_accuracy = model.evaluate(
        X_test_scaled,
        [attrition_test_encoded, department_test_encoded],
        return_dict=True
    )

    # Print the accuracies
    print(f'Attrition Accuracy: {attrition_accuracy}')
    print(f'Department Accuracy: {department_accuracy}')
except Exception as e:
    print(f"An error occurred during evaluation: {e}")
    print("Model output shapes:")
    attrition_output, department_output = model(X_test_scaled)
    print(f"Attrition output shape: {attrition_output.shape}")
    print(f"Department output shape: {department_output.shape}")
    print("Target data shapes:")
    print(f"Attrition target shape: {attrition_test_encoded.shape}")
    print(f"Department target shape: {department_test_encoded.shape}")

An error occurred during evaluation: Failed to convert elements of SparseTensor(indices=Tensor("DeserializeSparse:0", shape=(None, 2), dtype=int64), values=Tensor("DeserializeSparse:1", shape=(None,), dtype=float32), dense_shape=Tensor("stack:0", shape=(2,), dtype=int64)) to Tensor. Consider casting elements to a supported type. See https://www.tensorflow.org/api_docs/python/tf/dtypes for supported TF dtypes.
Model output shapes:
Attrition output shape: (294, 2)
Department output shape: (294, 3)
Target data shapes:
Attrition target shape: (294, 2)
Department target shape: (294, 3)


# Summary

In the provided space below, briefly answer the following questions.

1. Is accuracy the best metric to use on this data? Why or why not?

2. What activation functions did you choose for your output layers, and why?

3. Can you name a few ways that this model might be improved?

YOUR ANSWERS HERE

1. Accuracy may not always be the best metric for evaluating the performance of a model, especially for imbalanced datasets. In your case, the Attrition column has only two unique values, suggesting a binary classification task. If one class significantly outnumbers the other, accuracy can give a misleading impression of performance by favoring the majority class. For example, if 90% of employees do not attrite, a model predicting “No Attrition” for all cases will achieve 90% accuracy without actually being useful. Alternative metrics:
	•	Use Precision, Recall, or F1-Score to evaluate the model’s ability to identify minority classes effectively.
	•	Consider ROC-AUC for understanding how well the model separates the classes.

For the Department prediction (a multi-class classification), accuracy can be a reasonable starting metric. However, it’s also useful to evaluate metrics like categorical cross-entropy loss, macro-averaged precision, and recall to ensure balanced performance across all classes.
2. What activation functions did you choose for your output layers, and why?

You used the softmax activation function for both output layers:
	•	Attrition output layer (2 units): Softmax is appropriate because it outputs probabilities for each class (e.g., “Yes” or “No”), ensuring that the probabilities sum to 1. This makes it ideal for binary or multi-class classification tasks.
	•	Department output layer (3 units): Softmax is again suitable here, as it assigns probabilities to each department category, enabling the model to pick the most likely class.

Using softmax aligns well with the loss function (categorical cross-entropy) and is standard practice for classification tasks.
3. Here are a few potential improvements:

a. Feature Engineering:

	•	Include more relevant features, such as interaction terms or polynomial features, that may capture non-linear relationships.
	•	Encode categorical variables like BusinessTravel and MaritalStatus as numerical inputs (using one-hot encoding or embeddings).

b. Model Architecture:

	•	Add dropout layers to reduce the risk of overfitting.
	•	Experiment with different hidden layer configurations, such as varying the number of units or using a different activation function like LeakyReLU for better gradient flow.
	•	Use batch normalization to stabilize and accelerate training.

c. Optimization and Hyperparameter Tuning:

	•	Experiment with different optimizers, such as SGD with momentum, or adjust hyperparameters like learning rate and batch size.
	•	Use tools like Grid Search or Bayesian Optimization to tune hyperparameters systematically.

d. Data Handling:

	•	Address any class imbalance in the Attrition target variable using techniques like oversampling (e.g., SMOTE), undersampling, or class-weight adjustments in the loss function.

e. Model Evaluation:

	•	Implement cross-validation to ensure robust evaluation across multiple splits of the data.
	•	Use confusion matrices and classification reports for deeper insights into performance.

f. Ensemble Learning:

	•	Combine this neural network model with ensemble methods like Random Forest or Gradient Boosting to create a more robust hybrid model.

g. Advanced Techniques:

	•	Use pre-trained embeddings for categorical variables if the dataset size justifies it.
	•	Experiment with regularization techniques such as L2 regularization or early stopping to avoid overfitting.
