In [1]:
import pandas as pd
import numpy as np
import pickle

In [3]:
pickle_in = open('pickled_models.pkl','rb')
models = pickle.load(pickle_in)

In [5]:
models

{1.0: [LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
            intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
            penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
            verbose=0, warm_start=False), 'LogisticRegression'],
 2.0: [DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, presort=False, random_state=None,
              splitter='best'),
  'DecisionTreeClassifier'],
 3.0: [RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
              max_depth=None, max_features='auto', max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_

In [6]:
# Loading the data
df = pd.read_csv("churn_test.csv")

In [7]:
# Replacing empty spaces with Null values
df = df.replace(r'^\s+$', np.nan, regex=True)

In [8]:
# Dropping NA values
df = df.dropna()

In [9]:
# Deleting the custumerID column
del df["customerID"]
# Removing TotalCharges variable from the data
del df["TotalCharges"]
#Converting SeniorCitizen variable into categorical and mapping values of Yes & No to 1 & 0 respectively
df['SeniorCitizen'] = df.SeniorCitizen.map({0:'No', 1:'Yes'})

In [10]:
all_columns_list = df.columns.tolist()
numerical_columns_list = ['tenure','MonthlyCharges']
categorical_columns_list = [e for e in all_columns_list if e not in numerical_columns_list]
for index in categorical_columns_list:
    df[index] = pd.Categorical(df[index])
for index in numerical_columns_list:
    df[index] = pd.to_numeric(df[index])

In [11]:
# Splitting data according to datatypes
num = ['float64', 'int64']
num_df = df.select_dtypes(include=num)
obj_df = df.select_dtypes(exclude=num)

In [12]:
# Creating bins for numerical variables for extensive prediction of churn
tenure_bins = pd.cut(num_df["tenure"], bins=[0,20,60,80], labels=['low','medium','high'])
MonthlyCharges_bins = pd.cut(num_df["MonthlyCharges"], bins=[0,35,60,130], labels=['low','medium','high'])

In [13]:
# Saving numeric variable bins into a dataframe
bins = pd.DataFrame([tenure_bins, MonthlyCharges_bins]).T

In [14]:
# Concatenate bins with object variables
transformed_df = pd.concat([bins,obj_df],axis=1)

In [15]:
dummy_columns = [e for e in transformed_df.columns if e != 'Churn']

In [16]:
# Creating dataframe of dummy variables
df_dummies = pd.get_dummies(data=transformed_df, columns=dummy_columns)

In [19]:
selected_features = ['tenure_low', 'tenure_medium', 'Contract_Two year','Contract_Month-to-month', 'TechSupport_No','PaymentMethod_Electronic check', 'OnlineSecurity_No','InternetService_Fiber optic', 'OnlineBackup_No']

In [20]:
#Exporting new dataset with selected columns
X_selected = df_dummies[selected_features]
y_selected = df_dummies["Churn"]

In [22]:
lrmodel = models[1][0]

In [24]:
predictions = lrmodel.predict(X_selected)

In [25]:
predictions

array(['No', 'Yes', 'Yes', ..., 'No', 'No', 'Yes'], dtype=object)