# 1. Importing

In [1]:
from propensity_prediction.utils import generate_features
# from propensity_prediction.utils import cale_bypercentile, cal_correlation, _get_groupdata
from propensity_prediction.utils import r2_score

In [2]:
from propensity_prediction.model import FeatureImpact, compute_clusters, visualize_clusters

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 2. Prepare data

In [4]:
rangelen = 30
def extract_daterange(date_list, rangelen = 30, recent_order = 'True'):
    date_max = date_list.max()
    days = None
    if type(date_max) == np.int64:
        days = date_list.apply(lambda d: date_max - d)
    elif type(date_max) == pd._libs.tslibs.timestamps.Timestamp:
        days = date_list.apply(lambda date: (date_max - date).days)
    if recent_order: 
        days = days.max() - days
    daterange = (days/rangelen).astype(int)
    return daterange 

In [5]:
# StatFeature_Define = [ {'key':'CustomerID', 'data':'InvoiceNo', 'agg_metric':'nunique', 'value_name':'NumInvoice'},\
# {'key':'CustomerID', 'data':'TotalPurchase', 'agg_metric':'sum', 'value_name':'TotalPurchase'}, \
# {'key':'CustomerID', 'data':'TotalPurchase', 'agg_metric':'min', 'value_name':'MinOrder'}, \
# {'key':'CustomerID', 'data':'TotalPurchase', 'agg_metric':'max', 'value_name':'MaxOrder'}, \
# # {'key':'CustomerID', 'data':'TotalPurchase', 'agg_metric':'std', 'value_name':'StdOrder'}, \
# {'key':'CustomerID', 'data':'Recency', 'agg_metric':'min', 'value_name':'LastPurchase'}, \
# {'key':'CustomerID', 'data':'Recency', 'agg_metric':'max', 'value_name':'ActiveDays'}, \
# ]
# HighLevelFeature_Define = [
# {'key1':'CustomerID', 'key2':'InvoiceNo', 'data':'TotalPurchase', 'agg_metric_1':'mean', 'agg_metric_2':'sum', 'value_name':'AvgPurchasePerInvoice'}, \
# {'key1':'CustomerID', 'key2':'InvoiceNo', 'data':'Quantity', 'agg_metric_1':'mean', 'agg_metric_2':'sum', 'value_name':'AvgQuantityPerInvoice'}, \
# {'key1':'CustomerID', 'key2':'InvoiceNo', 'data':'StockCode', 'agg_metric_1':'mean', 'agg_metric_2':'count', 'value_name':'AvgProductPerInvoice'}]

## 2.1 Read and preprocess data

In [6]:
def _cast_type(df):
    # Types
    df['Quantity'] = df['Quantity'].astype(int)
    df['UnitPrice'] = df['UnitPrice'].astype(float)
    df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
    return df

def _remove_invalid(df):
    # Preprocessing
    df2 = df.copy()
    df2 = df2.drop_duplicates()
    df2 = df2[df2['Quantity']>0]
    df2 = df2[df2['UnitPrice']>0]    
    return df2

def _add_orderfeatures(df):
    from datetime import timedelta
    df['InvoiceDate_Range'] = extract_daterange(df['InvoiceDate'])
    df['TotalPurchase'] = df['Quantity']*df['UnitPrice']
    date_max = df['InvoiceDate'].max()
    df['Recency'] = df['InvoiceDate'].apply(lambda date: (date_max - date).days)
    return df

def preprocess():
    df = pd.read_excel("http://archive.ics.uci.edu/ml/machine-learning-databases/00352/Online%20Retail.xlsx", dtype = "str")
    df = _cast_type(df)
    df2 = _remove_invalid(df)
    df2 = _add_orderfeatures(df2)
    return df2 

In [7]:
df = preprocess()
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,InvoiceDate_Range,TotalPurchase,Recency
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850,United Kingdom,0,15.3,373
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850,United Kingdom,0,20.34,373
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850,United Kingdom,0,22.0,373
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850,United Kingdom,0,20.34,373
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850,United Kingdom,0,20.34,373


## 2.2 Create training data

In [8]:
activedate_mapping = None
def _get_activedates(date_df, invoicedate_name = 'InvoiceDate'):
    active_col = 'Active_%s'%invoicedate_name
    global activedate_mapping
    if activedate_mapping is None: 
        userdate_df = date_df.groupby(['CustomerID'])[invoicedate_name].min().reset_index(name = active_col)
        activedate_mapping = dict(zip(userdate_df['CustomerID'], userdate_df[active_col]))    
    return activedate_mapping

def get_ages(date_df, invoicedate_name = 'InvoiceDate'): #Recency
    active_col = 'Active_%s'%invoicedate_name
    global activedate_mapping
    activedate_mapping = _get_activedates(date_df, invoicedate_name)
    activedates = date_df.apply(lambda r: activedate_mapping[r['CustomerID']], axis = 1)
    ages = date_df[invoicedate_name] - activedates
    return ages

# get age of customer when having a invoice
def cal_InvAge(data, rangelen = 30):
    date_df = data[['CustomerID', 'InvoiceDate']]
    date_df['InvoiceDate_Range'] = extract_daterange(date_df['InvoiceDate'], rangelen) + 1
    date_df['CustomerInv_Age'] = np.array(get_ages(date_df, 'InvoiceDate_Range')).astype(int) + 1
    return date_df

In [9]:
def _cal_LTV(data, group_name, purchase_name):
    ltv_df = data.groupby(['CustomerID', group_name])[purchase_name].sum().reset_index(name = 'LTV')
    return ltv_df
    
def _filter_Age(date_df, age):
    filter_df = date_df[date_df['CustomerInv_Age'] == age].drop_duplicates(['CustomerID', 'InvoiceDate'])[['CustomerID', 'InvoiceDate', 'CustomerInv_Age']]
    return filter_df

def get_interaction_byAge(data, date_df, age):
    idx_df = _filter_Age(date_df, age)
    df = _cal_LTV(data.merge(idx_df, on = ['CustomerID', 'InvoiceDate'], how = 'left'), group_name = 'CustomerInv_Age', purchase_name='TotalPurchase')
    df['CustomerInv_Age'] = np.array(df['CustomerInv_Age']).astype(int)
    return df

In [10]:
def create_trainingdata(df, active_dategrp = None):
    # Date Features
    data_0 = df.groupby(['CustomerID', 'InvoiceNo', 'InvoiceDate'])['TotalPurchase'].sum().reset_index()
    date_df = cal_InvAge(data_0, rangelen = 30)

    all_1age_df = []
    max_age = date_df['CustomerInv_Age'].max()
    for start_age in range(max_age+1):
        start_df = get_interaction_byAge(data_0, date_df, start_age).rename(columns = {'CustomerInv_Age': 'Customer_Age'})
        end_df = get_interaction_byAge(data_0, date_df, start_age + 1).rename(columns = {'CustomerInv_Age': 'Customer_NextAge', 'LTV': 'Next_LTV'})

        # create a training row: customerid, current_age, next_age, current ltv, next_ltv
        df_temp = pd.merge(start_df, end_df, how = 'inner', on = ['CustomerID'])
        # add InvoiceDate_Range
        df_temp = pd.merge(df_temp, date_df[['InvoiceDate_Range', 'CustomerID', 'CustomerInv_Age']].drop_duplicates(), \
                           left_on = ['CustomerID', 'Customer_Age'], right_on = ['CustomerID', 'CustomerInv_Age'])
        all_1age_df.append(df_temp[['InvoiceDate_Range', 'CustomerID', 'Customer_Age', 'Customer_NextAge', 'LTV', 'Next_LTV']])
    data_train = pd.concat(all_1age_df)

    return data_train

In [11]:
data_train = create_trainingdata(df)
data_train.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,InvoiceDate_Range,CustomerID,Customer_Age,Customer_NextAge,LTV,Next_LTV
0,1,12347,1,2,711.79,475.39
1,1,12348,1,2,892.8,227.44
2,3,12352,1,2,296.5,1265.31
3,2,12359,1,2,547.5,1838.91
4,9,12364,1,2,623.92,79.8


## 2.3 Generate profile features

In [171]:
def _cal_slope(x, y):
    from scipy.stats import linregress
    slope, intercept, r_value, p_value, std_err = linregress(x, y)
    return slope

def _cal_momentum(dates):
    return _cal_slope(-dates, range(len(dates)))

def _cal_AvgBetweenOrders(dates):
    return _cal_slope(range(len(dates)), -dates)

def cal_OrderDateFeature(data, keylist = ['CustomerID'], Recency_name = 'Recency'):
    customer_orderdate = data.sort_values(by = Recency_name, ascending = False).groupby(keylist)[Recency_name].unique().reset_index(name = 'List_Recency')
    customer_orderdate['AvgTimeBetweenOrder'] = customer_orderdate['List_Recency'].apply(lambda r: _cal_AvgBetweenOrders(r))
    customer_orderdate['OrderMomentum'] = customer_orderdate['List_Recency'].apply(lambda r: _cal_momentum(r))
    del customer_orderdate['List_Recency']
    for key in ['AvgTimeBetweenOrder', 'OrderMomentum']:
        m = customer_orderdate[key].mean()
        if np.isnan(m):
            m = 1.0
        customer_orderdate[key] = customer_orderdate[key].fillna(m)
    return customer_orderdate

In [236]:
import pandas as pd

def _get_StatFeature(transaction_df, keylist, data_name, agg_metric='mean', value_name='value'):
	'''
	Getting summary of the information from transaction of items
	For example:
		Counting the number of invoices of users in a month
		Counting the number of purchases of items in a month
	keylist: ['CustomerID'] or ['InvoiceNo'] or ['CustomerID', 'InvoiceNo']
	data_name and sum_metric:
		data_name = 'InvoiceNo', sum_metric='count': Counting the number of invoice of all users/products
		data_name = 'Quantity', sum_metric='mean': average number of quantity that products are purchased
	'''
	return transaction_df.groupby(keylist).agg(agg_metric)[data_name].reset_index(name=value_name)

def _get_HighLevelFeature(transaction_df, key1, key2, data_name, agg_metric_key1='mean', agg_metric_key2='mean', value_name = 'value'):
	'''
	Getting summary of the information of key1 from list of values (data_name) of key2 in transaction
	For example: 
		key1='user_id' key2='item_id' data_name='invoicecount'
		u00                 iA          13
		u00                 iB          20
		u01                 iA          18
	Output
		key1='user_id'    colum_name=avg_byitem_invoicecount
		u00                 16.5
		u01                 18.0
	'''
	stat_key2 = _get_StatFeature(transaction_df, key2, data_name, agg_metric_key2, value_name=data_name)
	df_tmp = transaction_df[key1 + key2].drop_duplicates()    
	return df_tmp.merge(stat_key2, on = key2).groupby(key1).agg(agg_metric_key1)[data_name].reset_index(name=value_name)

def generate_features(data, keylist = ['CustomerID'], StatFeature_Define = {}, HighLevelFeature_Define = {}):
	df = data[keylist].drop_duplicates()
	for f in StatFeature_Define:
		feature_df = _get_StatFeature(data, keylist=f['key'], data_name=f['data'], agg_metric=f['agg_metric'], value_name=f['value_name'])
		df = df.merge(feature_df, on = keylist)
	for f in HighLevelFeature_Define:
		feature_df = _get_HighLevelFeature(data, f['key1'], f['key2'], f['data'], f['agg_metric_1'], f['agg_metric_2'], f['value_name'])
		df = df.merge(feature_df, on = keylist)        
	return df         

In [237]:
StatFeature_Define = [ \
{'key': ['CustomerID', 'Customer_Age'], 'data':'InvoiceDate_Range', 'agg_metric':'nunique', 'value_name':'NumInvoice'},\
{'key': ['CustomerID', 'Customer_Age'], 'data':'TotalPurchase', 'agg_metric':'sum', 'value_name':'TotalPurchase'}, \
{'key': ['CustomerID', 'Customer_Age'], 'data':'TotalPurchase', 'agg_metric':'min', 'value_name':'MinOrder'}, \
{'key': ['CustomerID', 'Customer_Age'], 'data':'TotalPurchase', 'agg_metric':'max', 'value_name':'MaxOrder'}, \
# {'key':'CustomerID', 'data':'TotalPurchase', 'agg_metric':'std', 'value_name':'StdOrder'}, \
{'key': ['CustomerID', 'Customer_Age'], 'data':'Recency', 'agg_metric':'min', 'value_name':'LastPurchase'}, \
# {'key':'key', 'data':'Recency', 'agg_metric':'max', 'value_name':'ActiveDays'}, \
]
HighLevelFeature_Define = [
{'key1': ['CustomerID', 'Customer_Age'], 'key2':['InvoiceDate_Range'], 'data':'TotalPurchase', 'agg_metric_1':'mean', 'agg_metric_2':'sum', 'value_name':'AvgPurchasePerInvoice'}, \
{'key1':['CustomerID', 'Customer_Age'], 'key2':['InvoiceDate_Range'], 'data':'Quantity', 'agg_metric_1':'mean', 'agg_metric_2':'sum', 'value_name':'AvgQuantityPerInvoice'}, \
{'key1': ['CustomerID', 'Customer_Age'], 'key2':['InvoiceDate_Range'], 'data':'StockCode', 'agg_metric_1':'mean', 'agg_metric_2':'count', 'value_name':'AvgProductPerInvoice'}]

In [281]:
def _generate_behavior_byage(df, keylist, StatFeature_Define, HighLevelFeature_Define):
    stat_features = generate_features(df, keylist = keylist, StatFeature_Define=StatFeature_Define, HighLevelFeature_Define=HighLevelFeature_Define)
    max_age = stat_features[keylist[1]].max()
    stat_features['LastPurchase'] = stat_features.apply(lambda r: r['LastPurchase'] - (max_age - r[keylist[1]]) * rangelen, axis = 1)
    
    order_features = cal_OrderDateFeature(df, keylist = keylist)
    agebehavior_features = pd.merge(stat_features, order_features, how = 'inner', on = keylist)
    return agebehavior_features

In [282]:
def create_profile_byage(df, data_train):
    keylist = ['CustomerID', 'Customer_Age']
    tmp = df.merge(data_train[keylist + ['InvoiceDate_Range']], on = ['CustomerID', 'InvoiceDate_Range'])
    profile_features = _generate_behavior_byage(tmp, keylist, StatFeature_Define = StatFeature_Define, HighLevelFeature_Define = HighLevelFeature_Define)
    
    feature_names = list(set(profile_features.columns) - set(['CustomerID', 'Customer_Age']))
    profile_features = profile_features[keylist + feature_names]
    profile_features['Customer_Age'] = np.array(profile_features['Customer_Age']).astype(int)
    
    return profile_features

In [283]:
profile_features = create_profile_byage(df, data_train)
profile_features.head()

Unnamed: 0,CustomerID,Customer_Age,AvgProductPerInvoice,TotalPurchase,AvgQuantityPerInvoice,AvgPurchasePerInvoice,OrderMomentum,MinOrder,MaxOrder,AvgTimeBetweenOrder,LastPurchase,NumInvoice
0,18097,1,9261,474.66,149440,260331.15,0.153529,20.16,87.0,10.465475,10,1
1,16656,1,9261,310.28,149440,260331.15,0.153529,19.8,118.8,10.465475,10,1
2,13094,1,9261,152.64,149440,260331.15,0.333333,76.32,76.32,3.0,7,1
3,17315,1,9261,521.37,149440,260331.15,0.0625,0.95,91.8,16.0,-6,1
4,16255,1,9261,299.6,149440,260331.15,0.153529,0.95,30.24,10.465475,10,1


## 2.4 Generate Groups of Keys

- Training: Key = (CustomerID, Customer_AgeRange)
            Groups = clustering based on ltv_rate

- Testing: Key = (CustomerID, current Age)
            Groups = predicting based on profile
            
- Data: (Keys, Groups, Profile)

In [266]:
def _get_clusters(arr, num_clusters = 3):
    clusters = compute_clusters(arr, num_clusters = num_clusters)
#     sizes, silhouette = visualize_clusters(arr, clusters, pair_plot = False, visualize_tsne = False, visualize_pca = False)    
#     print ('Silhouette score: ', silhouette)
    return clusters

In [267]:
ltv_lograte = np.log(data_train['Next_LTV'] / data_train['LTV'])
arr = np.array(ltv_lograte).reshape(-1, 1)
profile_features['group_ltvrate'] = _get_clusters(arr, num_clusters = 3)

arr = np.array(np.log(data_train['LTV'])).reshape(-1, 1)
profile_features['group_ltv'] = _get_clusters(arr, num_clusters = 3)

arr = np.hstack([np.array(np.log(data_train['LTV'])).reshape(-1, 1), np.array(ltv_lograte).reshape(-1, 1)])
profile_features['group_ltv+rate'] = _get_clusters(arr, num_clusters = 3)
profile_features.head()

Unnamed: 0,CustomerID,Customer_Age,AvgProductPerInvoice,TotalPurchase,AvgQuantityPerInvoice,AvgPurchasePerInvoice,OrderMomentum,MinOrder,MaxOrder,AvgTimeBetweenOrder,LastPurchase,NumInvoice,group_ltvrate,group_ltv,group_ltv+rate
0,18097,1,9261,474.66,149440,260331.15,0.153529,20.16,87.0,10.465475,10,1,0,0,1
1,16656,1,9261,310.28,149440,260331.15,0.153529,19.8,118.8,10.465475,10,1,1,0,0
2,13094,1,9261,152.64,149440,260331.15,0.333333,76.32,76.32,3.0,7,1,2,0,2
3,17315,1,9261,521.37,149440,260331.15,0.0625,0.95,91.8,16.0,-6,1,2,0,2
4,16255,1,9261,299.6,149440,260331.15,0.153529,0.95,30.24,10.465475,10,1,1,0,0


# 3. Feature impacts

In [268]:
def cal_featureimpact(target, data, features = ['InvoiceDate_Range', 'Customer_Age'], log_mode = True):
    fi_obj = FeatureImpact()
    tmp = data[features].copy()
    if log_mode:
        f = np.log(data[features]+1e-6)
        f = (f - np.min(f))/(np.max(f) - np.min(f))
        tmp[features] = f
    tmp['target'] = np.array(target)
    feature_impacts = fi_obj.cal_featureimpact(tmp, 'target', features)
    fi_df = fi_obj.parsing_featureimpact(feature_impacts).iloc[: , 1:]
    return fi_df[fi_df['Summary_Metric'].isin(['delta_cor', 'delta_cor_local'])].sort_values(by = ['Summary_Metric', 'Impact'], ascending = [1, 0])

In [269]:
cal_featureimpact(data_train['LTV'], data = data_train, log_mode = False)



Unnamed: 0,Feature,Summary_Metric,Impact
9,Customer_Age,delta_cor,0.105092
4,InvoiceDate_Range,delta_cor,0.01665
8,Customer_Age,delta_cor_local,0.078169
3,InvoiceDate_Range,delta_cor_local,0.012175


In [270]:
cal_featureimpact(data_train['Next_LTV']/data_train['LTV'], data = data_train, log_mode = False)



Unnamed: 0,Feature,Summary_Metric,Impact
9,Customer_Age,delta_cor,0.094092
4,InvoiceDate_Range,delta_cor,0.085294
3,InvoiceDate_Range,delta_cor_local,0.063192
8,Customer_Age,delta_cor_local,0.030747


In [344]:
profile_features.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4590 entries, 0 to 4589
Data columns (total 16 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   CustomerID             4590 non-null   object 
 1   Customer_AgeRange      4590 non-null   int64  
 2   AvgTimeBetweenOrder    4590 non-null   float64
 3   ActiveDays             4590 non-null   int64  
 4   AvgQuantityPerInvoice  4590 non-null   int64  
 5   MinOrder               4590 non-null   float64
 6   LastPurchase           4590 non-null   int64  
 7   AvgPurchasePerInvoice  4590 non-null   float64
 8   AvgProductPerInvoice   4590 non-null   int64  
 9   OrderMomentum          4590 non-null   float64
 10  NumInvoice             4590 non-null   int64  
 11  MaxOrder               4590 non-null   float64
 12  TotalPurchase          4590 non-null   float64
 13  group_ltvrate          4590 non-null   int32  
 14  group_ltv              4590 non-null   int32  
 15  grou

In [345]:
feature_col = list(set(profile_features.columns) - set(['CustomerID', 'Customer_Age', 'TotalPurchase', 'group_ltvrate', 'group_ltv', 'group_ltv+rate']))
cal_featureimpact(data_train['LTV'], data = profile_features, features = feature_col, log_mode = False)



Unnamed: 0,Feature,Summary_Metric,Impact
24,LastPurchase,delta_cor,0.111665
9,ActiveDays,delta_cor,0.105936
19,MinOrder,delta_cor,0.100072
34,AvgProductPerInvoice,delta_cor,0.092081
14,AvgQuantityPerInvoice,delta_cor,0.084178
29,AvgPurchasePerInvoice,delta_cor,0.083273
49,MaxOrder,delta_cor,0.047832
4,AvgTimeBetweenOrder,delta_cor,0.008458
39,OrderMomentum,delta_cor,0.007366
44,NumInvoice,delta_cor,


# 4. Training data

In [12]:
data_train[data_train.CustomerID == '13089'].reset_index().iloc[:, 1:]

Unnamed: 0,InvoiceDate_Range,CustomerID,Customer_Age,Customer_NextAge,LTV,Next_LTV
0,1,13089,1,2,5953.21,4938.83
1,2,13089,2,3,4938.83,4665.58
2,3,13089,3,4,4665.58,2786.65
3,4,13089,4,5,2786.65,5065.12
4,5,13089,5,6,5065.12,4466.8
5,6,13089,6,7,4466.8,3747.98
6,7,13089,7,8,3747.98,3470.38
7,8,13089,8,9,3470.38,3170.16
8,9,13089,9,10,3170.16,7320.7
9,10,13089,10,11,7320.7,2869.68


In [273]:
profile_features[profile_features.CustomerID == '13089'].reset_index().iloc[:, 1:]

Unnamed: 0,CustomerID,Customer_Age,AvgProductPerInvoice,TotalPurchase,AvgQuantityPerInvoice,AvgPurchasePerInvoice,OrderMomentum,MinOrder,MaxOrder,AvgTimeBetweenOrder,LastPurchase,NumInvoice,group_ltvrate,group_ltv,group_ltv+rate
0,13089,1,9261,4938.83,149440,260331.15,0.210464,4.56,419.4,4.535714,-15,1,2,1,0
1,13089,2,7974,4665.58,127943,217646.66,0.180575,5.04,788.4,5.028571,-10,1,0,2,2
2,13089,3,8858,2786.65,159718,267466.68,0.133136,5.04,122.4,7.5,-9,1,0,1,0
3,13089,4,8719,5065.12,152679,221875.01,0.295652,8.5,262.8,3.238095,-13,1,0,0,2
4,13089,5,11627,4466.8,192104,357079.7,0.248798,5.04,394.2,3.942857,-10,1,0,0,1
5,13089,6,11638,3747.98,198997,339359.38,0.144,5.04,106.2,6.3,-10,1,0,0,1
6,13089,7,11486,3470.38,175760,300562.86,0.18738,5.04,162.24,4.9,-9,1,0,1,0
7,13089,8,12816,3170.16,247089,406776.1,0.128096,5.04,232.0,7.5,-7,1,0,0,1
8,13089,9,13413,7320.7,245497,450870.44,0.103844,5.04,177.6,7.7,-15,1,0,1,0
9,13089,10,17814,2869.68,264865,498462.51,0.184922,5.04,207.36,5.2,-9,1,0,0,1


In [280]:
df[(df.CustomerID == '13089') & (df.Recency>0) & (df.Recency<30)]['InvoiceDate_Range'].unique()

array([11, 12])

In [258]:
data_train.to_csv('../data/LTV/data_train.csv', index=False)

In [259]:
profile_features.to_csv('../data/LTV/profile_features.csv', index=False)