# Model evaluation on 2017 data



In [1]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import pandas as pd
import pickle

from pyspark.sql.types import DoubleType
from pyspark.ml import Pipeline
from pyspark.ml.feature import VectorAssembler, Binarizer#, OneHotEncoderEstimator, StringIndexer
from pyspark.ml.classification import LogisticRegression, LogisticRegressionModel
from pyspark.sql import functions as F
from pyspark.sql import SparkSession

from pyspark.sql.functions import to_timestamp, datediff, unix_timestamp

import helper as h

In [2]:
churn_data = h.get_merged_data('validation', year='2017', user_year='2017')

Threshold events N: 600
% of users dropped 2.96


In [3]:
churn_data = h.add_high_low_flag(churn_data, False)

In [4]:
churn_data = h.feature_scaling(churn_data)

In [5]:
h.print_user_churn(churn_data)

42.3% of users churned in second period


In [6]:
model_name = 'validate_2017'
numeric_features = [t[0] for t in churn_data.dtypes if t[1] == 'int' or t[1] == 'double']

if model_name is not 'nofilters':
    numeric_features.remove('company')

# These are the predictors/labels
numeric_features.remove('second_period_event_count')
numeric_features.remove('frequency')

# These two are correlated w/ each other and most other events, therefore remove.
if model_name is not 'nofilters':
    numeric_features.remove('public_repos_count')
    numeric_features.remove('public_gists_count')

# Segmentation column
numeric_features.remove('high_low_user')

# Not predictive of anything
numeric_features.remove('time_between_first_last_event')

# Remove or binarize very rare events.
numeric_features.remove('GollumEvent_count')
numeric_features.remove('CommitCommentEvent_count')
numeric_features.remove('MemberEvent_count')
numeric_features.remove('PublicEvent_count')
numeric_features.remove('ReleaseEvent_count')
numeric_features += ['PublicEvent_count_bin']
numeric_features += ['ReleaseEvent_count_bin']

In [7]:
# Load PySpark pipeline and model
spark = SparkSession.builder.appName('App').getOrCreate()

LRmodelNoFilter = LogisticRegressionModel.load("lrModel_nofilters")
LRmodelCompany = LogisticRegressionModel.load("lrModel_company_1")
LRmodelLow = LogisticRegressionModel.load("lrModel_company_0high_low_0")
LRmodelHigh = LogisticRegressionModel.load("lrModel_company_0high_low_1")

print('-----Models loaded-----')
pipeline = Pipeline.load('pipeline')
print('-----Pipeline loaded-----')

-----Models loaded-----
-----Pipeline loaded-----


### Rebalance data set

In [8]:
churners = churn_data.filter(churn_data.second_period_event_count < 1)
churn_count = churners.count()

active = churn_data.filter(churn_data.second_period_event_count > 0)
active_count = active.count()

active = active.sample(False, churn_count / active_count, seed=0)

balanced_data = churners.union(active)

print('N churned users {0}'.format(churn_count))
print('N active users {0}'.format(active.count()))

N churned users 1947
N active users 1908


In [9]:
def predict_and_evaluate(data, model):
    data = data.withColumn("second_period_event_count", 
        data.second_period_event_count.cast(DoubleType())
        )
    pipelineModel = pipeline.fit(data)
    usr_data = pipelineModel.transform(data)
    prediction = model.transform(usr_data)
    prediction = prediction.select(['login', 'probability', 'label',
                                    'prediction']).toPandas()

    prediction['probability'] = prediction.probability.apply(lambda x: x[1])
    h.eval_metrics(prediction.prediction, prediction.label)

In [10]:
predict_and_evaluate(balanced_data, LRmodelNoFilter)

TP, FN | FP, TN
1417, 480 | 565, 1361
---------------------------
Precision: 0.715
Recall:    0.747
Accuracy:  0.727
F1-score:  0.7306


In [11]:
company_users = balanced_data[balanced_data.company == 1]
old = balanced_data[(balanced_data.high_low_user == 1) & (balanced_data.company == 0)]
new = balanced_data[(balanced_data.high_low_user == 0) & (balanced_data.company == 0)]

In [12]:
predict_and_evaluate(company_users, LRmodelCompany)

TP, FN | FP, TN
360, 21 | 77, 27
---------------------------
Precision: 0.824
Recall:    0.945
Accuracy:  0.798
F1-score:  0.8802


In [13]:
predict_and_evaluate(new, LRmodelLow)

TP, FN | FP, TN
2, 52 | 4, 471
---------------------------
Precision: 0.333
Recall:    0.037
Accuracy:  0.894
F1-score:  0.0667


In [14]:
predict_and_evaluate(old, LRmodelHigh)

TP, FN | FP, TN
1095, 367 | 512, 835
---------------------------
Precision: 0.681
Recall:    0.749
Accuracy:  0.687
F1-score:  0.7136
