## Part 1: Preprocessing

In [None]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras import layers

#  Import and read the attrition data
attrition_df = pd.read_csv('https://static.bc-edx.com/ai/ail-v-1-0/m19/lms/datasets/attrition.csv')
attrition_df.head()

In [None]:
# Determine the number of unique values in each column
attrition_df.nunique()

In [None]:
# Create y_df with the Attrition and Department columns
y_df = attrition_df[['Attrition', 'Department']]

In [None]:
#create a list of the education and age columns to be used as features  
X = attrition_df[['Education', 'Age', 'JobSatisfaction', 'DistanceFromHome', 'YearsAtCompany', 'NumCompaniesWorked', 'TotalWorkingYears', 'YearsInCurrentRole', 'WorkLifeBalance', 'StockOptionLevel']]
X_df = X
print(X_df.dtypes)

In [None]:
# Create a list of at least 10 column names to use as X data
X = attrition_df[['Education', 'Age', 'JobSatisfaction', 'DistanceFromHome', 'YearsAtCompany', 'NumCompaniesWorked', 'TotalWorkingYears', 'YearsInCurrentRole', 'WorkLifeBalance', 'StockOptionLevel']]

# Create X_df using your selected columns
X_df = X

# Show the data types for X_df
X_df.dtypes.head(10)

In [None]:
# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_df, y_df, test_size=0.2, random_state=42)


In [None]:
# Convert your X data to numeric data types however you see fit
# Add new code cells as necessary
X_train.dtypes.head(5)
X_test.dtypes.head(5)

In [None]:
# Create a StandardScaler
from sklearn.preprocessing import StandardScaler

# Fit the StandardScaler to the training data
scaler = StandardScaler()
scaler.fit(X_train)

# Scale the training and testing data
X_train_scaled = pd.DataFrame(
    scaler.transform(X_train),
    columns=X_train.columns,
    index=X_train.index
)
X_test_scaled = pd.DataFrame(
    scaler.transform(X_test),
    columns=X_test.columns,
    index=X_test.index
)

In [None]:
print(X_train_scaled.head())

In [None]:
from sklearn.preprocessing import OneHotEncoder

# Create a OneHotEncoder for the Department column
department_encoder = OneHotEncoder(sparse_output=False)

# Fit the encoder to the training data
department_encoder.fit(y_train[['Department']])
department_columns = department_encoder.get_feature_names_out(['Department'])

# Create two new variables by applying the encoder
# to the training and testing data
y_train_dept_encoded = department_encoder.transform(y_train[['Department']])
y_test_dept_encoded = department_encoder.transform(y_test[['Department']])

y_train_dept_encoded

In [None]:
department_columns

In [None]:
# Create a OneHotEncoder for the Attrition column
attrition_encoder = OneHotEncoder(sparse_output=False)

# Fit the encoder to the training data
attrition_encoder.fit(y_train[['Attrition']])
attrition_columns = attrition_encoder.get_feature_names_out(['Attrition'])

# Create two new variables by applying the encoder
# to the training and testing data
y_train_attr_encoded = attrition_encoder.transform(y_train[['Attrition']])
y_test_attr_encoded = attrition_encoder.transform(y_test[['Attrition']])

y_train_attr_encoded

In [None]:
attrition_columns

## Part 2: Create, Compile, and Train the Model

In [None]:
# Find the number of columns in the X training data.
X_train_scaled.shape

# Create the input layer
input_layer = layers.Input(shape=(X_train_scaled.shape[1],),name='input_features')
# Create the first hidden layer
hidden_layer_1 = layers.Dense(64, activation='relu', name='hidden_layer_1')(input_layer)
# Create at least two shared layers
shared_layer1 = layers.Dense(64, activation='relu', name='shared_layer_1')(hidden_layer_1)
shared_layer2 = layers.Dense(128, activation='relu', name='shared_layer_2')(shared_layer1)

In [None]:
# Create a branch for Department
# with a hidden layer and an output layer

# Create the hidden layer
dept_hidden_layer = layers.Dense(32, activation='relu', name='dept_hidden_layer')(shared_layer2)

# Create the output layer
department_output = layers.Dense(3, activation='softmax', name='department_output')(dept_hidden_layer)

In [None]:
# Create a branch for Attrition
# with a hidden layer and an output layer

# Create the hidden layer
attr_hidden_layer = layers.Dense(32, activation='relu', name='attr_hidden_layer')(shared_layer2)

# Create the output layer
attrition_output = layers.Dense(2, activation='sigmoid', name='attrition_output')(attr_hidden_layer)

In [None]:
# Create the model
model = Model(inputs=input_layer, outputs=[department_output, attrition_output])

# Compile the model
model.compile(optimizer='adam',
              loss={'department_output': 'categorical_crossentropy', 'attrition_output': 'binary_crossentropy'},
              metrics={'department_output': 'accuracy', 'attrition_output': 'accuracy'})

# Summarize the model
model.summary()

In [None]:
# Train the model
history = model.fit(X_train_scaled,
                    {'department_output': y_train_dept_encoded, 'attrition_output': y_train_attr_encoded},
                    epochs=100,
                    batch_size=32,
                    validation_data=(X_test_scaled, {'department_output': y_test_dept_encoded, 'attrition_output': y_test_attr_encoded}),
                    verbose=1)

In [None]:
test_results = model.evaluate(X_test_scaled, {'department_output': y_test_dept_encoded, 'attrition_output': y_test_attr_encoded})
test_results

In [None]:
# Evaluate the model with the testing data
test_results = model.evaluate(X_test_scaled, {'department_output': y_test_dept_encoded, 'attrition_output': y_test_attr_encoded})
test_results

In [None]:
# Print the accuracy for both department and attrition
print(f"Department Accuracy: {test_results[3]}")
print(f"Attrition Accuracy: {test_results[4]}")

In [None]:
# find out the count of unique values in the attrition column
y_df['Attrition'].value_counts()

In [None]:
# find out the count of unique values in the DistanceFromHome column
X['DistanceFromHome'].value_counts()