## Данные:
Датасет заказов с онлайн магазина

InvoiceNo - номер заказа\
CustomerCode - идентификатор пользователя	\
InvoiceDate	- дата заказа\
Amount - сумма заказа

## Задачи:
Провести RFM-анализ


In [1]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv('./RFM_ht_data.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
df.InvoiceDate = pd.to_datetime(df.InvoiceDate)

In [5]:
df.CustomerCode = df.CustomerCode.astype('str')

In [6]:
df.dtypes

InvoiceNo               object
CustomerCode            object
InvoiceDate     datetime64[ns]
Amount                 float64
dtype: object

In [7]:
df

Unnamed: 0,InvoiceNo,CustomerCode,InvoiceDate,Amount
0,C0011810010001,19067290,2020-09-01,1716.00
1,C0011810010017,13233933,2020-09-01,1489.74
2,C0011810010020,99057968,2020-09-01,151.47
3,C0011810010021,80007276,2020-09-01,146.72
4,C0011810010024,13164076,2020-09-01,104.00
...,...,...,...,...
332725,S0081810310459,14092500,2020-09-30,3801.87
332726,S0081810310461,99065678,2020-09-30,5769.88
332727,S0081810310462,19029918,2020-09-30,736.88
332728,S0081810310463,13020033,2020-09-30,1475.20


In [8]:
last_date = df.InvoiceDate.max()

In [9]:
last_date

Timestamp('2020-09-30 00:00:00')

In [10]:

rfmTable = df.groupby('CustomerCode').agg({'InvoiceDate': lambda x: (last_date - x.max()).days, # Recency #Количество дней с последнего заказа
                                        'InvoiceNo': lambda x: len(x),      # Frequency #Количество заказов
                                        'Amount': lambda x: x.sum()}) # Monetary Value #Общая сумма по всем заказам

rfmTable['InvoiceDate'] = rfmTable['InvoiceDate'].astype(int)
rfmTable.rename(columns={'InvoiceDate': 'recency', 
                         'InvoiceNo': 'frequency', 
                         'Amount': 'monetary_value'}, inplace=True)

In [11]:
rfmTable #подготовил основные данные

Unnamed: 0_level_0,recency,frequency,monetary_value
CustomerCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
02213019,19,1,1609.20
02213042,22,3,9685.48
02213071,29,1,415.00
02213088,23,1,305.00
02213092,25,1,1412.88
...,...,...,...
99099927,10,1,961.10
99099936,0,1,1521.78
99099959,8,2,1444.56
99099963,19,1,3018.91


In [12]:
#Методы выставляющие RFM-баллы метрикам
def RClass(value,parameter_name,quantiles_table):
    if value <= quantiles_table[parameter_name][0.25]:
        return 1
    elif value <= quantiles_table[parameter_name][0.50]:
        return 2
    elif value <= quantiles_table[parameter_name][0.75]: 
        return 3
    else:
        return 4


def FMClass(value, parameter_name,quantiles_table):
    if value <= quantiles_table[parameter_name][0.25]:
        return 4
    elif value <= quantiles_table[parameter_name][0.50]:
        return 3
    elif value <= quantiles_table[parameter_name][0.75]: 
        return 2
    else:
        return 1


In [13]:
rfmSegmentation = rfmTable

In [14]:
quantiles = rfmTable.quantile(q = [0.25, 0.5, 0.75])
quantiles #подсчет квантилей по метрикам

Unnamed: 0,recency,frequency,monetary_value
0.25,2.0,1.0,765.0
0.5,8.0,2.0,1834.48
0.75,16.0,3.0,4008.84


In [15]:

rfmSegmentation['R_Quartile'] = rfmSegmentation['recency'].apply(RClass, args=('recency',quantiles))

rfmSegmentation['F_Quartile'] = rfmSegmentation['frequency'].apply(FMClass, args=('frequency',quantiles))

rfmSegmentation['M_Quartile'] = rfmSegmentation['monetary_value'].apply(FMClass, args=('monetary_value',quantiles))

rfmSegmentation['RFMClass'] = rfmSegmentation.R_Quartile.map(str)+ rfmSegmentation.F_Quartile.map(str) + rfmSegmentation.M_Quartile.map(str)

In [16]:
rfmSegmentation.sort_values('frequency')
#Итоговый датафрейм с RFM оценкой

Unnamed: 0_level_0,recency,frequency,monetary_value,R_Quartile,F_Quartile,M_Quartile,RFMClass
CustomerCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
02213019,19,1,1609.20,4,4,3,443
18053075,17,1,1565.80,4,4,3,443
18053072,11,1,530.64,3,4,4,344
18053055,26,1,3323.12,4,4,2,442
18053019,15,1,1239.23,3,4,3,343
...,...,...,...,...,...,...,...
99003061,0,90,398759.35,1,1,1,111
19080880,27,99,169930.72,4,1,1,411
13032521,0,106,389309.92,1,1,1,111
13215452,0,113,85334.40,1,1,1,111


In [27]:
rfmSegmentation.reset_index().groupby('RFMClass', as_index = False)['CustomerCode'].count().sort_values('CustomerCode')
#Как распределены пользователи в зависимости от RFM-оценки

Unnamed: 0,RFMClass,CustomerCode
51,414,2
35,314,33
3,114,60
19,214,60
55,424,63
...,...,...
16,211,5847
47,344,6593
62,443,6729
0,111,9705
