# HR Churn model creation and deployment

This notebook will create a model to predict if employees are about to leave a company, and then deploy that model to Watson Machine Learning as a web service.

You will need to create service credentials for your instance of Watson Machine Learning, which you can find on the [Cloud resources list](https://cloud.ibm.com/resources).

In [None]:
WML_CREDENTIALS = {
  "apikey": "xxxxxxxxxxxx",
  "iam_apikey_description": "Auto-generated for key cdaxxxxx",
  "iam_apikey_name": "name",
  "iam_role_crn": "crn:v1:bluemix:public:iam::::serviceRole:Writer",
  "iam_serviceid_crn": "crn:v1:bluemix:public:iam-identity::",
  "instance_id": "xxxxxxxx",
  "url": "https://us-south.ml.cloud.ibm.com"
}

Next, you'll need to import the "HR Gold v03.csv" file from your project using the "Find and add data" button at the top right of the menu. Insert it as a pandas DataFrame.

Drop the unnecessary columns from the imported data.

In [None]:
df_data = df_data_1.drop(['FirstName', 'LastName', 'Country', 'StreetAddress', 'City', 'Zipcode', 'Longitude', 'Latitude'], axis=1)
df_data.head()

In [None]:
df_data.dtypes

In [None]:
MODEL_NAME = "HR Churn"
DEPLOYMENT_NAME = "HR Churn - Production"

In [None]:
import sklearn
sklearn.__version__

In [None]:
from watson_machine_learning_client import WatsonMachineLearningAPIClient

client = WatsonMachineLearningAPIClient(WML_CREDENTIALS)

In [None]:
client.repository.list_models()

In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [None]:
categorical_features = ['Gender','Education','EducationField','JobRole',\
                       'BusinessTravel','Department','EnvironmentSatisfaction','JobInvolvement',\
                        'JobLevel','JobSatisfaction','OverTime','PerformanceRating',\
                        'RelationshipSatisfaction','State','AgeWhenHired','MaritalStatus',\
                        'Over18','StockOptionLevel']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

scaled_features = ['Age','YearsAtCompany','NumCompaniesWorked','TotalWorkingYears','TrainingTimesLastYear',\
                   'WorkLifeBalance','YearsInCurrentRole','YearsSinceLastPromotion','YearsWithCurrManager',\
                   'DistanceFromHQ','AgeWhenHired','PercentSalaryHike','MonthlyIncome','DailyRate','HourlyRate']
scale_transformer = Pipeline(steps=[('scale', MinMaxScaler())])

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_features),
        ('scaler', scale_transformer, scaled_features)
    ]
)

clf = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', RandomForestClassifier())])

In [None]:
X = df_data.drop('Attrition', axis=1)
y = df_data['Attrition']

X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=4)

model = clf.fit(X_train, y_train)
res_predict = model.predict(X_test)
print("model score: %.3f" % clf.score(X_test, y_test))
print(classification_report(y_test, res_predict, target_names=["No", "Yes"]))

In [None]:
model_deployment_ids = client.deployments.get_uids()
for deployment_id in model_deployment_ids:
    deployment = client.deployments.get_details(deployment_id)
    model_id = deployment['entity']['deployable_asset']['guid']
    if deployment['entity']['name'] == DEPLOYMENT_NAME:
        print('Deleting deployment id', deployment_id)
        client.deployments.delete(deployment_id)
        print('Deleting model id', model_id)
        client.repository.delete(model_id)
client.repository.list_models()

In [None]:
metadata = {
    client.repository.ModelMetaNames.NAME: MODEL_NAME,
    client.repository.ModelMetaNames.EVALUATION_METHOD: "binary",
    client.repository.ModelMetaNames.EVALUATION_METRICS: [
        {
            "name": "areaUnderROC",
            "value": 0.9,
            "threshold": 0.9
        }
    ]
}

# Name the columns
cols = ['Gender','Education','EducationField','JobRole','BusinessTravel','Department','EnvironmentSatisfaction',\
        'JobInvolvement','JobLevel','JobSatisfaction','OverTime','PerformanceRating','RelationshipSatisfaction',\
        'State','AgeWhenHired','MaritalStatus','Over18','StockOptionLevel','Age','YearsAtCompany',\
        'NumCompaniesWorked','TotalWorkingYears','TrainingTimesLastYear','WorkLifeBalance','YearsInCurrentRole',\
        'YearsSinceLastPromotion','YearsWithCurrManager','DistanceFromHQ','AgeWhenHired','PercentSalaryHike',\
        'MonthlyIncome','DailyRate','HourlyRate','Employee_ID','WorkLifeBalanceScore','LeadershipScore',\
        'CultureScore','Benefits_LeaveScore','CompensationScore','HappinessPercent','MonthlyIncome_SDBIN']

#
saved_model = client.repository.store_model(model=model, meta_props=metadata, 
                                            training_data=X_train, training_target=y_train, 
                                            feature_names=cols, label_column_names=["Attrition"] )
saved_model

In [None]:
model_uid = saved_model['metadata']['guid']
model_uid

In [None]:
print("Deploying model...")

deployment = client.deployments.create(artifact_uid=model_uid, name=DEPLOYMENT_NAME, asynchronous=False)

In [None]:
deployment_uid = client.deployments.get_uid(deployment)

print("Model id: {}".format(model_uid))
print("Deployment id: {}".format(deployment_uid))