In [1]:
import pandas as pd
import numpy as np
import sklearn
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import plotly
import plotly.offline as py
import plotly.graph_objs as go
import plotly.express as px

  import pandas.util.testing as tm


In [2]:
# Load data
df = pd.read_csv('transactions.csv')

In [3]:
df.dtypes

trans_id        int64
trans_dt       object
cust_id         int64
prod_id         int64
item_qty        int64
item_price    float64
dtype: object

In [None]:
### RFM Model

In [4]:
#find min and max date
df['trans_dt'].min()
df['trans_dt'].max()

'2016-06-18 17:51:00'

In [5]:
import datetime as dt
NOW = dt.datetime(2016,6,19)
df['trans_dt'] = pd.to_datetime(df['trans_dt'])

In [6]:
df['revenue'] = df['item_qty'] * df['item_price']

In [7]:
rfmTable = df.groupby('cust_id').agg({'trans_dt': lambda x: (NOW - x.max()).days,
                                        'trans_id': lambda x: len(x),  
                                        'revenue': lambda x: x.sum()})

rfmTable['trans_dt'] = rfmTable['trans_dt'].astype(int)
rfmTable.rename(columns={'trans_dt': 'recency', 
                         'trans_id': 'frequency', 
                         'revenue': 'monetary_value'}, inplace=True)
rfmTable.head()

Unnamed: 0_level_0,recency,frequency,monetary_value
cust_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4402,168,1,24.99
11248,168,6,194.75
12064,168,2,93.98
15088,168,5,231.95
66706,72,3,75.78


In [8]:
#split quantiles
quantiles = rfmTable.quantile(q=[0.2,0.4,0.6,0.8])
quantiles = quantiles.to_dict()

In [18]:
segmented_rfm = rfmTable

In [19]:
def RScore(x,p,d):
    if x <= d[p][0.20]:
        return 5
    elif x <= d[p][0.40]:
        return 4
    elif x <= d[p][0.60]: 
        return 3
    elif x <= d[p][0.80]: 
        return 2
    else:
        return 1
    
def FMScore(x,p,d):
    if x <= d[p][0.20]:
        return 1
    elif x <= d[p][0.40]:
        return 2
    elif x <= d[p][0.60]: 
        return 3
    elif x <= d[p][0.80]: 
        return 4
    else:
        return 5

In [20]:
#segment the data
segmented_rfm['r_quartile'] = segmented_rfm['recency'].apply(RScore, args=('recency',quantiles,))
segmented_rfm['f_quartile'] = segmented_rfm['frequency'].apply(FMScore, args=('frequency',quantiles,))
segmented_rfm['m_quartile'] = segmented_rfm['monetary_value'].apply(FMScore, args=('monetary_value',quantiles,))
segmented_rfm.head()

Unnamed: 0_level_0,recency,frequency,monetary_value,r_quartile,f_quartile,m_quartile,RFMScore
cust_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
4402,168,1,24.99,4,1,1,211
11248,168,6,194.75,4,5,5,255
12064,168,2,93.98,4,3,4,234
15088,168,5,231.95,4,5,5,255
66706,72,3,75.78,5,4,3,143


In [21]:
#add rfm
segmented_rfm['RFMScore'] = segmented_rfm.r_quartile.map(str) + segmented_rfm.f_quartile.map(str) + segmented_rfm.m_quartile.map(str)
segmented_rfm.head()

Unnamed: 0_level_0,recency,frequency,monetary_value,r_quartile,f_quartile,m_quartile,RFMScore
cust_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
4402,168,1,24.99,4,1,1,411
11248,168,6,194.75,4,5,5,455
12064,168,2,93.98,4,3,4,434
15088,168,5,231.95,4,5,5,455
66706,72,3,75.78,5,4,3,543


In [22]:
#output
from datetime import date
from datetime import datetime
from datetime import timedelta 

current_time = datetime.now()
info = current_time.strftime('%m%d%Y')
output_filename = ("rfm_" + str(info[:4]) + str(info[6:]) + ".csv")
segmented_rfm.to_csv(output_filename, sep=',', header=True)