# Dependencies

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import lifelines
import torch
import pyro

# Import from projects

In [2]:
from propensity_prediction.tasks.churn_prediction.churn_prediction import ChurnPrediction_Task
from propensity_prediction.config.model_config import ChurnPrediction_ModelConfig
from propensity_prediction.config.data_config import ChurnPrediction_DataConfig
from propensity_prediction.config.base import Base_TaskConfig

# Experiments

In [3]:
from lifelines.datasets import load_rossi   
df = load_rossi()
df.head()

Unnamed: 0,week,arrest,fin,age,race,wexp,mar,paro,prio
0,20,1,0,27,1,0,0,1,3
1,17,1,0,18,1,0,0,1,8
2,25,1,0,19,0,1,0,1,13
3,52,0,1,23,1,1,1,1,1
4,52,0,0,19,0,1,0,1,3


# Config

In [4]:
# INPUT_CONFIG={'dur_col':'week', 'event_col':'arrest', 'feature_col': ['fin', 'age', 'race', 'wexp', 'mar', 'paro', 'prio']}
# FE_CONFIG = [{'method':'pca', 'output_dimension':5, 'args':{'rowvar':False}}]
# MODEL_CONFIG=[{'model_name':'Logistic_Regression', 'training_config':{'learning_rate':0.01, 'epochs':500}},\
#                 {'model_name':'Bayesian_Regression', 'training_config':{'learning_rate':0.005, 'epochs':500}}
#                 ]
# PREDICT_CONFIG={'method':'gettop','ntop':None}
# PIPELINE_CONFIG = {'task': 'churn_prediction', 'input_config':INPUT_CONFIG, 'fe_config':FE_CONFIG, 'model_config':MODEL_CONFIG, 'predict_config':PREDICT_CONFIG}

In [5]:
global_config = {'tasks':'churn_prediction', 'data_config':{'History':{'user_seniority':'week', 'event':'arrest', 'metadata':{'fin':'numeric', 'age':'numeric', 'race':'numeric', 'wexp':'numeric', 'mar':'numeric', 'paro':'numeric', 'prio':'numeric'}}}}
global_config

{'tasks': 'churn_prediction',
 'data_config': {'History': {'user_seniority': 'week',
   'event': 'arrest',
   'metadata': {'fin': 'numeric',
    'age': 'numeric',
    'race': 'numeric',
    'wexp': 'numeric',
    'mar': 'numeric',
    'paro': 'numeric',
    'prio': 'numeric'}}}}

## Applying models


In [6]:
global_config['data_config']

{'History': {'user_seniority': 'week',
  'event': 'arrest',
  'metadata': {'fin': 'numeric',
   'age': 'numeric',
   'race': 'numeric',
   'wexp': 'numeric',
   'mar': 'numeric',
   'paro': 'numeric',
   'prio': 'numeric'}}}

In [7]:
class ChurnPrediction_Config(Base_TaskConfig):
	def __init__(self, global_config):
		data_config = ChurnPrediction_DataConfig(global_config['data_config'])
		model_config = ChurnPrediction_ModelConfig()
		super().__init__(data_config, model_config)
        
task_config = ChurnPrediction_Config(global_config)

In [8]:
ens_model = ChurnPrediction_Task(task_config)
ens_model.train(df)

Train ensemble Ensemble
Training Logistic_Regression with label arrest
Training Logistic Regression!
Epoch 0: Loss=6.4380
Epoch 19: Loss=2.0087
Epoch 39: Loss=0.6761
Epoch 59: Loss=0.6205
Epoch 79: Loss=0.5851
Epoch 99: Loss=0.5696
Epoch 119: Loss=0.5624
Epoch 139: Loss=0.5584
Epoch 159: Loss=0.5557
Epoch 179: Loss=0.5537
Epoch 199: Loss=0.5519
Epoch 219: Loss=0.5501
Epoch 239: Loss=0.5484
Epoch 259: Loss=0.5467
Epoch 279: Loss=0.5450
Epoch 299: Loss=0.5434
Epoch 319: Loss=0.5418
Epoch 339: Loss=0.5402
Epoch 359: Loss=0.5387
Epoch 379: Loss=0.5372
Epoch 399: Loss=0.5357
Epoch 419: Loss=0.5343
Epoch 439: Loss=0.5329
Epoch 459: Loss=0.5315
Epoch 479: Loss=0.5301
Epoch 499: Loss=0.5288
Training Bayesian_Regression with label arrest


[<propensity_prediction.tasks.churn_prediction.churnprediction_base.ChurnPrediction_Base at 0x7f382bed9190>,
 <propensity_prediction.tasks.churn_prediction.churnprediction_base.ChurnPrediction_Base at 0x7f382bed5bd0>]

In [9]:
print ('Predicting labels: ', ens_model.get_probabilities(df))

Predicting labels:  {'model_name': 'Ensemble', 'probabilities': array([[0.78910169, 0.2108983 ],
       [0.70343313, 0.29656688],
       [0.71086311, 0.28913688],
       [0.8645685 , 0.13543152],
       [0.78252581, 0.21747418],
       [0.80535674, 0.19464324],
       [0.82806659, 0.17193343],
       [0.83543393, 0.16456607],
       [0.76055184, 0.23944818],
       [0.73831511, 0.26168488],
       [0.84010634, 0.15989365],
       [0.90245238, 0.09754763],
       [0.75448093, 0.24551908],
       [0.88866597, 0.11133403],
       [0.73447174, 0.26552827],
       [0.82504275, 0.17495726],
       [0.72326609, 0.2767339 ],
       [0.74272278, 0.25727721],
       [0.72277075, 0.27722926],
       [0.8082315 , 0.19176848],
       [0.77324978, 0.22675024],
       [0.9494198 , 0.05058019],
       [0.86639416, 0.13360585],
       [0.8706845 , 0.12931548],
       [0.78618261, 0.21381738],
       [0.80273864, 0.19726135],
       [0.86158296, 0.13841705],
       [0.73316294, 0.26683703],
       [0.78

  return torch.tensor(np_floatarr).float()


In [10]:
print ('Predicting labels: ', ens_model.predict(df))

Predicting labels:  {'model_name': 'Ensemble', 'predict': array([0., 1., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 1., 0., 1., 0., 1.,
       1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0.,
       0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 1., 1., 0., 0., 0., 1., 1.,
       0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 1., 1., 0., 1., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0.,
       1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0., 1., 1., 0., 0.,
       0., 0., 1., 0., 1., 1., 1., 1., 0., 0., 0., 1., 1., 0., 0., 0., 1.,
       1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 1., 0.,
       0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 1., 0., 0., 0., 0.,
       0., 1., 0., 0., 0., 1., 0., 0., 0.,

In [11]:
print ('Evaluating results: ', ens_model.evaluate(df))

AttributeError: 'numpy.ndarray' object has no attribute 'numpy'

In [None]:
# print (PIPELINE_CONFIG['model_config'])
# lr_model = Training_ChurnPrediction(PIPELINE_CONFIG['model_config'][0], PIPELINE_CONFIG['fe_config'], INPUT_CONFIG)
# lr_model.train(df)
# print ('Predicting labels: ', lr_model.predict(df, binarize_config={'method':'threshold','thres_type':'kmeans'}))

In [None]:
# print ('Evaluating results: ', lr_model.evaluate(df))

In [None]:
# print (PIPELINE_CONFIG['model_config'])
# bayes_model = Training_ChurnPrediction(PIPELINE_CONFIG['model_config'][1], PIPELINE_CONFIG['fe_config'], INPUT_CONFIG)
# bayes_model.train(df)
# print ('Predicting labels: ', bayes_model.predict(df, binarize_config={'method':'threshold','thres_type':'kmeans'}))

In [None]:
# print ('Evaluating results: ', bayes_model.evaluate(df))