# Decision Tree Markov Chain

Numerical example from:<br>
[DTMC: An Actionable e-Customer Lifetime Value ModelBased on Markov Chains and Decision Trees](http://liacs.leidenuniv.nl/~puttenpwhvander/library/200706icec75-paauwe.pdf)

In [179]:
from itertools import product

import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier

In [91]:
customers = pd.DataFrame(
    data={
        'customer': [1, 2, 2, 2, 3, 4, 4, 4, 4],
        'period': [1, 1, 6, 10, 6, 3, 11, 14, 16],
        'recency': [0, 0, 5, 4, 0, 0, 8, 3, 2],
        'frequency': [1, 2, 2, 1, 3, 4, 4, 5, 2],
        'monetary': [10, 40, 100, 50, 75, 125, 150, 250, 75]
    }
)
customers

Unnamed: 0,customer,period,recency,frequency,monetary
0,1,1,0,1,10
1,2,1,0,2,40
2,2,6,5,2,100
3,2,10,4,1,50
4,3,6,0,3,75
5,4,3,0,4,125
6,4,11,8,4,150
7,4,14,3,5,250
8,4,16,2,2,75


In [108]:
def tree(df):
    "Same tree as used in the paper above"
    if df['frequency'] <= 2:
        if df['recency'] == 0:
            return 1
        else:
            return 2
    else:
        if df['recency'] == 0:
            return 3
        else:
            return 4

In [109]:
customers['segment'] = customers.apply(tree, axis=1)
customers

Unnamed: 0,customer,period,recency,frequency,monetary,segment
0,1,1,0,1,10,1
1,2,1,0,2,40,1
2,2,6,5,2,100,2
3,2,10,4,1,50,2
4,3,6,0,3,75,3
5,4,3,0,4,125,3
6,4,11,8,4,150,4
7,4,14,3,5,250,4
8,4,16,2,2,75,2


In [114]:
contribution = customers.groupby('segment').mean()['monetary']
# add begin and dead states
contribution = contribution.append(pd.Series(index=[0, 5], data=[0, 0]))
contribution = contribution.sort_index()
contribution

0      0.0
1     25.0
2     75.0
3    100.0
4    200.0
5      0.0
dtype: float64

In [211]:
customers_fill = pd.DataFrame(list(product(range(1, 17),range(1, 5))), columns=['period', 'customer'])
customers_fill['recency'] = np.zeros(4*16)
customers_fill['frequency'] = np.zeros(4*16)

customers_fill = customers_fill.join(first_occured, rsuffix='_min', on='customer')
customers_fill = customers_fill[customers_fill['period'] >= customers_fill['period_min']]
customers_fill = customers_fill.drop('period_min', axis=1)

temp = customers_fill.join(customers.set_index(['customer', 'period']), on=['customer', 'period'], rsuffix='_orig')
temp = temp[temp['recency_orig'].isna()][customers_fill.columns]

customers_fill = pd.concat([temp, customers[customers_fill.columns]])
customers_fill = customers_fill.sort_values(['customer', 'period'])


class recency_filler(object):
    
    def __init__(self):
        self.customer = None
        self.recency = None
        
    def __call__(self, df):
        if df['customer'] != self.customer:
            self.customer = df['customer']
            self.recency = 0
        
        if df['frequency'] == 0:
            self.recency += 1
            return self.recency
        else:
            self.recency = 0
            return df['recency']
        

customers_fill['recency'] = customers_fill.apply(recency_filler(), axis=1)
customers_fill = customers_fill[(customers_fill['frequency']<0) | \
                                ((customers_fill['frequency']==0) & (customers_fill['recency']==6))]

In [212]:
customers_fill.head(64)

Unnamed: 0,period,customer,recency,frequency
24,7,1,6.0,0.0
61,16,2,6.0,0.0
46,12,3,6.0,0.0
35,9,4,6.0,0.0
