In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import lifelines
import torch
import pyro

In [2]:
from propensity_prediction.tasks.churn_prediction.main import churn_prediction

# Passing Data & Tasks Config

Data should contain only one object. 

Config includes

- task name
- config of data
    - path to read
    - Fixed columns: 
        - cus_id (customer_id)
        - event (event_type: churn, view)
        - duration (timestamp, week)
    - Features and their Types (category, ordering, numeric, text)
        - integer: category|ordering -> numeric
        - float: numeric
        - string: category|ordering -> text

In [3]:
global_config = \
{'tasks': 'churn_prediction',
 'data_config': {
    'path':'https://raw.githubusercontent.com/CamDavidsonPilon/lifelines/cae49555be8c66a09c95ba28f52189941aa89821/lifelines/datasets/rossi.csv', 
    'History': {
    'user_id':None,
    'user_seniority': 'week',
   'event': 'arrest',
   'metadata': {'fin': 'numeric',
    'age': 'numeric',
    'race': 'numeric',
    'wexp': 'numeric',
    'mar': 'numeric',
    'paro': 'numeric',
    'prio': 'numeric'}}}}
global_config


{'tasks': 'churn_prediction',
 'data_config': {'path': 'https://raw.githubusercontent.com/CamDavidsonPilon/lifelines/cae49555be8c66a09c95ba28f52189941aa89821/lifelines/datasets/rossi.csv',
  'History': {'user_id': None,
   'user_seniority': 'week',
   'event': 'arrest',
   'metadata': {'fin': 'numeric',
    'age': 'numeric',
    'race': 'numeric',
    'wexp': 'numeric',
    'mar': 'numeric',
    'paro': 'numeric',
    'prio': 'numeric'}}}}

In [4]:
# JSON_CONFIG_PATH = 'examples/ChurnPrediction.json'
# import json
# f = open(JSON_CONFIG_PATH, 'r')
# global_config = json.load(f)
# global_config

# Run

Corresponding to 

    propensity_prediction.churn_main.py JSON_CONFIG_PATH

In [5]:
predict, eva, ens_model = churn_prediction(global_config)

Prepare contextual data
Prepare data for models
Train ensemble Ensemble
+ Prepare model LogisticRegression with label arrest
Running mode:  training
    Training with minibacth  0
	+ [iteration 0001] loss: 0.0023
	+ [iteration 0200] loss: 0.0018


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df[self.sur_prob_col] = cumulative_kmf_surprob
  allow_unreachable=True)  # allow_unreachable flag


	+ [iteration 0400] loss: 0.0017
	+ [iteration 0600] loss: 0.0016
	+ [iteration 0800] loss: 0.0016
	+ [iteration 1000] loss: 0.0016
----Running time:  0.7001714706420898
+ Prepare model Bayesian_LogisticRegression with label arrest
Running mode:  training
    Training with minibacth  0
	+ [iteration 0001] loss: 0.0007
	+ [iteration 0200] loss: 0.0006
	+ [iteration 0400] loss: 0.0006
	+ [iteration 0600] loss: 0.0006
	+ [iteration 0800] loss: 0.0006
	+ [iteration 1000] loss: 0.0005
----Running time:  0.7748441696166992
Running mode:  testing
Running mode:  testing
Running mode:  testing
Running mode:  testing
Running mode:  testing
Running mode:  testing
Running mode:  testing
Running mode:  testing


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df[self.sur_prob_col] = cumulative_kmf_surprob


In [6]:
eva

{'model_name': 'Ensemble',
 'evaluation': {'auc': 0.6360028860028859,
  'all_evaluating_methods': [{'method': 'threshold',
    'list_results': [{'threshold_method': 'constant',
      'results': {'accuracy': 0.7586206896551724,
       'precision': 0.0,
       'recall': nan,
       'trueneg_rate': 1.0,
       'f1_score': nan}},
     {'threshold_method': 'baseline',
      'results': {'accuracy': 0.7586206896551724,
       'precision': 0.0,
       'recall': nan,
       'trueneg_rate': 1.0,
       'f1_score': nan}},
     {'threshold_method': 'kmeans',
      'results': {'accuracy': 0.5632183908045977,
       'precision': 0.6190476190476191,
       'recall': 0.3023255813953488,
       'trueneg_rate': 0.5454545454545454,
       'f1_score': 0.40625}},
     {'threshold_method': 'otsu',
      'results': {'accuracy': 0.2413793103448276,
       'precision': 1.0,
       'recall': 0.2413793103448276,
       'trueneg_rate': 0.0,
       'f1_score': 0.3888888888888889}},
     {'threshold_method': 'yen',

In [7]:
from propensity_prediction.auto_preprocess.base import Base_Preprocess

from propensity_prediction.config.model_config import ChurnPrediction_ModelConfig
from propensity_prediction.config.data_config import ChurnPrediction_DataConfig
from propensity_prediction.config.base import Base_TaskConfig
from propensity_prediction.tasks.churn_prediction.churn_prediction import ChurnPrediction_Task


class ChurnPrediction_Config(Base_TaskConfig):
	def __init__(self, global_config):
		data_config = ChurnPrediction_DataConfig(global_config['data_config'])
		model_config = ChurnPrediction_ModelConfig()
		super().__init__(data_config, model_config)

def prepare_task(global_config):
	task_config = ChurnPrediction_Config(global_config)

	data_path = task_config.get_datapath()
	key_types, feature_types = task_config.parse_data_config()
	df = pd.read_csv(data_path, dtype=str)
	auto_preprocess_obj = Base_Preprocess(key_types, feature_types)
	training_data, preprocessed_feature_types = auto_preprocess_obj.auto_preprocess(df)
	task_config.update_metadata(preprocessed_feature_types)
	task_config.update_metadata(preprocessed_feature_types)

	return training_data, task_config

# Test loading

In [8]:
package = ens_model.package_for_save()
package

{'model_name': 'Ensemble',
 'model_package': [{'model_name': 'LogisticRegression',
   'model_package': OrderedDict([('linear.weight',
                 tensor([[-0.4313, -0.3235, -0.4398, -0.3027, -0.6326, -0.0612,  0.4193,  0.0749]])),
                ('linear.bias', tensor([-0.1634]))])},
  {'model_name': 'Bayesian_LogisticRegression',
   'model_package': OrderedDict([('linear.weight',
                 tensor([[-0.1196, -0.6986, -0.1352, -0.4542, -0.2773, -0.2618,  0.4144, -0.2482]])),
                ('linear.bias', tensor([-0.0581]))])}]}

# Evaluate

In [9]:
training_data, task_config = prepare_task(global_config)
load_model = ChurnPrediction_Task(task_config)
load_model = load_model.load_from_savepackage(package)

In [10]:
load_model.evaluate(training_data)

Running mode:  testing
Running mode:  testing
Running mode:  testing
Running mode:  testing


{'model_name': 'Ensemble',
 'evaluation': {'auc': 0.6301307514068191,
  'all_evaluating_methods': [{'method': 'threshold',
    'list_results': [{'threshold_method': 'constant',
      'results': {'accuracy': 0.7361111111111112,
       'precision': 0.0,
       'recall': nan,
       'trueneg_rate': 1.0,
       'f1_score': nan}},
     {'threshold_method': 'baseline',
      'results': {'accuracy': 0.7361111111111112,
       'precision': 0.0,
       'recall': nan,
       'trueneg_rate': 1.0,
       'f1_score': nan}},
     {'threshold_method': 'kmeans',
      'results': {'accuracy': 0.5,
       'precision': 0.7543859649122807,
       'recall': 0.31386861313868614,
       'trueneg_rate': 0.4088050314465409,
       'f1_score': 0.44329896907216493}},
     {'threshold_method': 'otsu',
      'results': {'accuracy': 0.5694444444444444,
       'precision': 0.6754385964912281,
       'recall': 0.3407079646017699,
       'trueneg_rate': 0.5314465408805031,
       'f1_score': 0.4529411764705883}},
    

In [11]:
load_model.list_models[0].evaluate(training_data)

Running mode:  testing


{'model_name': 'LogisticRegression',
 'evaluation': {'auc': 0.6278412225532384,
  'all_evaluating_methods': [{'method': 'threshold',
    'list_results': [{'threshold_method': 'constant',
      'results': {'accuracy': 0.7384259259259259,
       'precision': 0.008771929824561403,
       'recall': 1.0,
       'trueneg_rate': 1.0,
       'f1_score': 0.017391304347826087}},
     {'threshold_method': 'baseline',
      'results': {'accuracy': 0.7361111111111112,
       'precision': 0.0,
       'recall': nan,
       'trueneg_rate': 1.0,
       'f1_score': nan}},
     {'threshold_method': 'kmeans',
      'results': {'accuracy': 0.5208333333333334,
       'precision': 0.7543859649122807,
       'recall': 0.32452830188679244,
       'trueneg_rate': 0.4371069182389937,
       'f1_score': 0.4538258575197889}},
     {'threshold_method': 'otsu',
      'results': {'accuracy': 0.6481481481481481,
       'precision': 0.5,
       'recall': 0.375,
       'trueneg_rate': 0.7012578616352201,
       'f1_scor

In [12]:
load_model.list_models[1].evaluate(training_data)

Running mode:  testing


{'model_name': 'Bayesian_LogisticRegression',
 'evaluation': {'auc': 0.6211381441023943,
  'all_evaluating_methods': [{'method': 'threshold',
    'list_results': [{'threshold_method': 'constant',
      'results': {'accuracy': 0.7361111111111112,
       'precision': 0.0,
       'recall': nan,
       'trueneg_rate': 1.0,
       'f1_score': nan}},
     {'threshold_method': 'baseline',
      'results': {'accuracy': 0.7361111111111112,
       'precision': 0.0,
       'recall': nan,
       'trueneg_rate': 1.0,
       'f1_score': nan}},
     {'threshold_method': 'kmeans',
      'results': {'accuracy': 0.5509259259259259,
       'precision': 0.7543859649122807,
       'recall': 0.3412698412698413,
       'trueneg_rate': 0.4779874213836478,
       'f1_score': 0.46994535519125685}},
     {'threshold_method': 'otsu',
      'results': {'accuracy': 0.5717592592592593,
       'precision': 0.6052631578947368,
       'recall': 0.33014354066985646,
       'trueneg_rate': 0.559748427672956,
       'f1_s

In [13]:
load_model.predict(training_data)

Running mode:  testing
Running mode:  testing
Running mode:  testing
Running mode:  testing


{'model_name': 'Ensemble',
 'predict': array([0., 1., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 1.,
        1., 1., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 1., 0., 0., 0., 1., 1., 0., 0., 1., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 1., 0., 0., 0., 0.,
        0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 1., 1., 1., 0., 0., 1., 1.,
        0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 1., 0., 1., 0., 1., 1., 1., 0., 1., 0., 0., 0., 0., 1., 0.,
        1., 0., 0., 0., 1., 0., 1., 0., 1., 0., 1., 1., 0., 0., 1., 0., 1.,
        0., 0., 1., 0., 1., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1.,
        0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 1., 1., 1., 0.,
        0., 1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 1.,
        0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0.,
        1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0.