In [59]:
import pandas as pd

data = pd.read_csv("rfm_xmas19.txt", parse_dates=["trans_date"])
#parse_dates allows us to immediately set the appropriate date type to the selected columns
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 125000 entries, 0 to 124999
Data columns (total 3 columns):
customer_id    125000 non-null object
trans_date     125000 non-null datetime64[ns]
tran_amount    125000 non-null int64
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 2.9+ MB
None


In [60]:
data.head()

Unnamed: 0,customer_id,trans_date,tran_amount
0,FM5295,2017-11-11,35
1,FM4768,2019-12-15,39
2,FM2122,2017-11-26,52
3,FM1217,2016-08-16,99
4,FM1850,2018-08-20,78


#### Last Purchase by Custumer ID

In [61]:
group_by_customer=data.groupby(['customer_id'])
best_churn= data[['customer_id','trans_date']].groupby(['customer_id']).max()

In [62]:
best_churn.head()

Unnamed: 0_level_0,trans_date
customer_id,Unnamed: 1_level_1
FM1112,2019-10-14
FM1113,2019-11-09
FM1114,2019-11-12
FM1115,2019-12-05
FM1116,2019-05-25


#### Customers Who Churned 

In [63]:
import datetime
cutoff_day = datetime.datetime(2019, 10, 16) ###assigning it datetimee
print(type(cutoff_day))
best_churn.info()
best_churn["trans_date"]=pd.to_datetime(best_churn["trans_date"])  ##converting it datetimee
best_churn['churned']=best_churn["trans_date"].apply(lambda x: 1 if (x-cutoff_day).days<0 else 0)

<class 'datetime.datetime'>
<class 'pandas.core.frame.DataFrame'>
Index: 6889 entries, FM1112 to FM9000
Data columns (total 1 columns):
trans_date    6889 non-null datetime64[ns]
dtypes: datetime64[ns](1)
memory usage: 107.6+ KB


In [64]:
best_churn.head()

Unnamed: 0_level_0,trans_date,churned
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1
FM1112,2019-10-14,1
FM1113,2019-11-09,0
FM1114,2019-11-12,0
FM1115,2019-12-05,0
FM1116,2019-05-25,1


#### Ranking Mechanism to find the best customer 

1. Amount spent and 
2. number of purchases made, and 
The scores should be the same weight. This means that a customer who spent a lot is worth as much as someone who made many purchases.
3. Since the values are different in scale, it will have an impact on the ranking, so using the min-max scaling is the solution. 



In [65]:
best_churn["nr_of_transactions"] = group_by_customer.size()
best_churn["amount_spent"] = group_by_customer['tran_amount'].sum()
best_churn.drop(columns=['trans_date'],inplace=True)
best_churn.sample(5)

Unnamed: 0_level_0,churned,nr_of_transactions,amount_spent
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
FM2877,0,24,1677
FM7207,0,12,604
FM1548,0,23,1755
FM4887,0,23,1516
FM2058,0,26,1776


In [67]:
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
def normalize(cols):
   mm_scaler = preprocessing.MinMaxScaler()
   for col in cols:
     best_churn['scaled_{}'.format(col)]= mm_scaler.fit_transform(best_churn[[col]])
   return best_churn

In [69]:
best_churn=normalize(['nr_of_transactions','amount_spent'])

In [70]:
best_churn.head()

Unnamed: 0_level_0,churned,nr_of_transactions,amount_spent,scaled_nr_of_transactions,scaled_amount_spent
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
FM1112,1,15,1012,0.314286,0.309986
FM1113,0,20,1490,0.457143,0.481681
FM1114,0,19,1432,0.428571,0.460848
FM1115,0,22,1659,0.514286,0.542385
FM1116,1,13,857,0.257143,0.25431


#### Scaled weighted score for number of transactions and amount spent

In [71]:
best_churn['score']=(best_churn['scaled_nr_of_transactions']*0.5+best_churn['scaled_amount_spent']*0.5)*100

In [72]:
best_churn.head()

Unnamed: 0_level_0,churned,nr_of_transactions,amount_spent,scaled_nr_of_transactions,scaled_amount_spent,score
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
FM1112,1,15,1012,0.314286,0.309986,31.213567
FM1113,0,20,1490,0.457143,0.481681,46.941195
FM1114,0,19,1432,0.428571,0.460848,44.470956
FM1115,0,22,1659,0.514286,0.542385,52.833539
FM1116,1,13,857,0.257143,0.25431,25.57266


In [73]:
best_churn.sort_values(by='score', ascending=False,inplace=True)

In [74]:
best_churn.head()

Unnamed: 0_level_0,churned,nr_of_transactions,amount_spent,scaled_nr_of_transactions,scaled_amount_spent,score
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
FM4424,0,39,2933,1.0,1.0,100.0
FM4320,1,38,2647,0.971429,0.89727,93.434934
FM3799,1,36,2513,0.914286,0.849138,88.171182
FM5109,0,35,2506,0.885714,0.846624,86.616892
FM3805,1,35,2453,0.885714,0.827586,85.665025


Identify the following based on the business goal
1. number of coupouns and 
2. the value for each of the coupon and 
3. who should be getting it

In [76]:
coupon = data['tran_amount'].mean()*0.30
nr_of_customers = 1000//coupon

In [77]:
nr_of_customers

51.0

#### so final we have a $20 coupoun for 50 churned customers

In [79]:
top_50_churned=best_churn.loc[best_churn["churned"] == 1].head(50)

In [80]:
top_50_churned

Unnamed: 0_level_0,churned,nr_of_transactions,amount_spent,scaled_nr_of_transactions,scaled_amount_spent,score
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
FM4320,1,38,2647,0.971429,0.89727,93.434934
FM3799,1,36,2513,0.914286,0.849138,88.171182
FM3805,1,35,2453,0.885714,0.827586,85.665025
FM5752,1,33,2612,0.828571,0.884698,85.663485
FM4074,1,34,2462,0.857143,0.830819,84.398091
FM1215,1,35,2362,0.885714,0.794899,84.030686
FM2620,1,35,2360,0.885714,0.794181,83.994766
FM1580,1,33,2329,0.828571,0.783046,80.58087
FM2951,1,32,2382,0.8,0.802083,80.104167
FM3163,1,31,2413,0.771429,0.813218,79.232348
