In [6]:
import sys
import os
import datetime
#Loading utils for required funcs
%run {os.path.join('..', 'src', 'utils.py')}

## RFM
##### Recency, Frequency, Monetary value (RFM) is a model used in marketing analysis that segments a company's consumer base by their purchasing patterns or habits. In particular, it evaluates customers Recency (how long ago they made a purchase), Frequency (how often they make purchases), and Monetary value (how much money they spend).


In [None]:
#read our dataset
rfm_data = pd.read_csv(os.path.join("..","data","potentials.csv"))

###  Step 1 - > Recency

In [7]:
#we will count information until today
reference_date = datetime.datetime.today().date()

In [10]:
# creating extra columns
rfm_data['hours_since_last_update'] = (pd.to_datetime(reference_date) - pd.to_datetime(rfm_data['last_update_at'])).astype('timedelta64[h]')
rfm_data['months_since_product_started'] = round((reference_date.year - pd.to_datetime(rfm_data['product_started_at']).dt.year) * 12 + (reference_date.month - pd.to_datetime(rfm_data['product_started_at']).dt.month))
rfm_data['days_since_last_call'] = (pd.to_datetime(reference_date) - pd.to_datetime(rfm_data['last_call'])).astype('timedelta64[D]')
rfm_data['days_since_last_touch'] = (pd.to_datetime(reference_date) - pd.to_datetime(rfm_data['last_touch'])).astype('timedelta64[D]')
rfm_data['days_since_last_seen'] = (pd.to_datetime(reference_date) - pd.to_datetime(rfm_data['last_seen_at'])).astype('timedelta64[D]')

In [12]:
rfm_data.drop(['last_update_at', 'product_started_at', 'last_call', 'last_touch', 'last_seen_at'], axis=1, inplace=True)

### Step 2 - > Frequency Part-Monetary Part
##### For **frequency**, we can use the following  : *lead_count*,*view_count*, *call_count* and *touch_count* and for **Monetary** our necessary columns will be : *budget_value*,*current_product_price*

In [16]:
rfm_data = rfm_data[['provider_id', 'lead_count','view_count',
                    'budget_value','current_products_price', 'touch_count','call_count', 'hours_since_last_update', 
                    'months_since_product_started','days_since_last_call', 'days_since_last_touch','days_since_last_seen']]

In [17]:
#our finalized dataframe to work on can be seen here: 
rfm_data.head(2)

Unnamed: 0,provider_id,lead_count,view_count,budget_value,current_products_price,touch_count,call_count,hours_since_last_update,months_since_product_started,days_since_last_call,days_since_last_touch,days_since_last_seen
0,68,834,10998,2250.0,0.0,18,9,1211.0,10,51.0,51.0,42.0
1,88,196,1513,875.0,0.0,12,5,2746.0,4,43.0,43.0,42.0


In [None]:
def create_recency_val():
    """
    this function will be using recency based columns to create a recency score
    """
    rfm_data['Recency'] = 
    return

In [None]:
def create_frequency_val():
    """
    this function will be using frequency based columns to create a recency score
    """
    rfm_data['Frequency'] = 
    return

In [None]:
def create_monetary_val():
    """
    this function will be using frequency based columns to create a recency score
    """
    rfm_data['Monetary'] = 
    return

In [None]:
#now let's create our final RFM dataset to evaluate :
rfm_data = rfm_data[["provider_id","Recency","Monetary","Frequency"]]

In [None]:
quantiles = rfm_data.quantile(q=[0.25,0.5,0.75])
quantiles

In [None]:
quantiles.to_dict()

In [None]:
def RScore(x,p,d):
    if x <= d[p][0.25]:
        return 4
    elif x <= d[p][0.50]:
        return 3
    elif x <= d[p][0.75]: 
        return 2
    else:
        return 1
def FMScore(x,p,d):
    if x <= d[p][0.25]:
        return 1
    elif x <= d[p][0.50]:
        return 2
    elif x <= d[p][0.75]: 
        return 3
    else:
        return 4

In [None]:
rfm_segmentation = rfm_data
rfm_segmentation['R_Quartile'] = rfm_segmentation['Recency'].apply(RScore, args=('Recency',quantiles,))
rfm_segmentation['F_Quartile'] = rfm_segmentation['Frequency'].apply(FMScore, args=('Frequency',quantiles,))
rfm_segmentation['M_Quartile'] = rfm_segmentation['Monetary'].apply(FMScore, args=('Monetary',quantiles,))

In [None]:
rfm_segmentation.head()

In [None]:
rfm_segmentation['RFMScore'] = rfm_segmentation.R_Quartile.map(str) \
                            + rfm_segmentation.F_Quartile.map(str) \
                            + rfm_segmentation.M_Quartile.map(str)
rfm_segmentation.head()

In [None]:
rfm_segmentation['RFMScore_num'] = rfm_segmentation.R_Quartile \
                            + rfm_segmentation.F_Quartile \
                            + rfm_segmentation.M_Quartile
rfm_segmentation.head()

In [None]:
rfm_segmentation.RFMScore.nunique()

In [None]:
#Saving the feature engineering results as CSV file
rfm_segmentation.to_csv(Path("..","data","rfm_segmentation.csv"),index=False)