In [9]:
import pandas as pd
import datetime
import seaborn as sns
import matplotlib.pyplot as plt
class RFM:
    def __init__(self):
        self.RFM = pd.DataFrame()
        self.RFM_outlier_free = pd.DataFrame() 
        
    
    def get_rfm_values(self,data):
        Todays_date = datetime.datetime(2020, 6, 15)
        self.RFM = data.groupby('UserId').agg({'TransactionTime': lambda date: (Todays_date - date.max()).days,
                                        'TransactionId': lambda num: len(num),
                                        'TotalPrice': lambda price: price.sum()})
        self.RFM.columns=['Recency','Frequency','Monetary']
        self.RFM['Recency'] = self.RFM['Recency'].astype(int)
        self.RFM['Frequency'] = self.RFM['Frequency'].astype(int)
        self.RFM['Monetary'] = self.RFM['Monetary'].astype(float)
        print(self.RFM.head(5))
    
    def calculate_rfm_score(self):
        self.RFM['R']   = pd.qcut(self.RFM['Recency'], 4, ['1','2','3','4'])
        self.RFM['F']   = pd.qcut(self.RFM['Frequency'], 4, ['4','3','2','1'])
        self.RFM['M']   = pd.qcut(self.RFM['Monetary'], 4, ['4','3','2','1'])
        self.RFM['R']   = self.RFM['R'].astype(int)
        self.RFM['F']   = self.RFM['F'].astype(int)
        self.RFM['M']   = self.RFM['M'].astype(int)
        self.RFM['RFM'] = self.RFM['R'].astype(str) + self.RFM['F'].astype(str) + self.RFM['M'].astype(str)
        print(self.RFM.head(5))
       
    def remove_outliers(self):
        Q1 = self.RFM.quantile(0.25)
        Q3 = self.RFM.quantile(0.75)
        IQR = Q3 - Q1
        #print('Q1\n',Q1,'\nQ3\n',Q3,'\nIQR\n',IQR)
        self.RFM_outlier_free = self.RFM[(self.RFM['Recency'] > (Q1['Recency'] - 1.5 * IQR['Recency']))&(self.RFM['Recency'] < (Q3['Recency'] + 1.5 * IQR['Recency']))]
        self.RFM_outlier_free = self.RFM_outlier_free[(self.RFM_outlier_free['Frequency'] > (Q1['Frequency'] - 1.5 * IQR['Frequency']))&(self.RFM_outlier_free['Frequency'] < (Q3['Frequency'] + 1.5 * IQR['Frequency']))]
        self.RFM_outlier_free = self.RFM_outlier_free[(self.RFM_outlier_free['Monetary'] > (Q1['Monetary'] - 1.5 * IQR['Monetary']))&(self.RFM_outlier_free['Monetary'] < (Q3['Monetary'] + 1.5 * IQR['Monetary']))]
        print("Number of outliers removed  = ",len(self.RFM)-len(self.RFM_outlier_free))
    
    def plot_boxplot(self,data,out_free=True):
        if out_free == True:
            sns.boxplot(data['Recency'],color='red',showfliers=False)
            plt.show()
            sns.boxplot(data['Frequency'],color='Blue',showfliers=False)
            plt.show()
            sns.boxplot(data['Monetary'],color='Green',showfliers=False)
            plt.show()
        else:
            sns.boxplot(data['Recency'],color='red',showfliers=True)
            plt.show()
            sns.boxplot(data['Frequency'],color='Blue',showfliers=True)
            plt.show()
            sns.boxplot(data['Monetary'],color='Green',showfliers=True)
            plt.show()
        