In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math

In [2]:
%matplotlib inline

# Data Discription
- customerid
- tenure: the month
- action: whether provide discount
- retain: whether the customer stay in the system in the next period

In [4]:
data = pd.read_excel('data/BLE_data.xlsx')

In [5]:
data.head()

Unnamed: 0,customer,tenure,Action,C,Retain
0,1,0,1,1.400842,1
1,1,1,0,5.405517,1
2,1,2,1,12.714706,1
3,1,3,0,21.032069,0
4,2,0,1,1.509191,0


# Problem Definition
- whether the customer retain?
- the decision variable is action
## Select DV and IV
- DV: retain ratio
- IV: action and retain
# Modeling
- $utility = b_0*Action+b_1*C+b_2$


In [36]:
def givefunction(df):
    def objfunc(args):
        b0,b1,b2 = args[0],args[1],args[2]
        U = b0*df['Action']+b1*df['C']+b2
        p0 = 1/(1+np.exp(U))
        p1 = 1-p0
        ll = -np.log10(np.where(df.Retain==1,p1,p0)+0.00001).sum()
        return ll
    return objfunc

In [41]:
from scipy.optimize import minimize
res = minimize(givefunction(data),np.array((0,0,0)),method='BFGS')

In [42]:
res.x

array([ 2.57357216,  0.08819177, -1.24677445])

# Evaluation

In [65]:
df = data.copy()
b0,b1,b2 = res.x
U = b0*df['Action']+b1*df['C']+b2
p0 = 1/(1+np.exp(U))
p1 = 1-p0
ll = np.log10(np.where(df.Retain==1,p1,p0)+0.00001).sum()

In [66]:
predict = np.where(p0>p1,0,1)

In [67]:
correct_predict = np.where(data.Retain==predict,1,0)

In [68]:
hitrate = correct_predict.sum()/len(correct_predict)
hitrate

0.8722466960352423

## Likelihood ratio index
- LRI = $1-\frac{lnL}{lnL_0}$, where $lnL_0 = n[plnp+(1-p)+(1-p)ln(1-p)]$
- 分母为不用模型时的likelihood，分子为用模型的likelihood

In [69]:
p = data.Retain.sum()/data.Retain.count()
n = data.Retain.count()

In [70]:
logl_0 = n*p*np.log10(p)+n*(1-p)*np.log10(1-p)

In [71]:
lri = 1-ll/logl_0

In [72]:
lri

0.3045004297693279