In [1]:
########Customer Segmentation with RFM Analysis######

In [2]:
#Business Problem

In [3]:
#FLO, an online shoe store, wants to segment its customers and determine marketing strategies according to these segments. 
#To this end, 
#the behaviors of the customers will be defined and groups will be formed according to the clustering in these behaviors.

In [4]:
#It consists of the information obtained from the past shopping behaviors of customers
#who made their last shopping from Flo as OmniChannel (both online and offline shopping) in the years 2020-2021.

In [5]:
##The Story of Data Set
#master_id : Unique customer id,
# order_channel : Which channel of the shopping platform is used (Android, ios, Desktop, Mobile),
# last_order_channel : The channel where the most recent purchase was made,
# first_order_date : Date of the customer's first purchase,
# last_order_date :Date of the customer's last purchase,
# last_order_date_online : The date of the last purchase made by the customer on the online platform,
# last_order_date_offline : The date of the last purchase made by the customer on the offline platform,
# order_num_total_ever_online :The total number of purchases made by the customer on the online platform,
# order_num_total_ever_offline :The total number of purchases made by the customer on the offline platform
# customer_value_total_ever_offline : Total fee paid by the customer for offline purchases,
# customer_value_total_ever_online : Total fee paid by the customer for online purchases,
# interested_in_categories_12 : List of categories the customer has shopped in the last 12 months

In [6]:
#Task 1: Understanding and Preparing the Data

In [7]:
#Step1: Read the flo_data_20K.csv data. Make a copy of the dataframe.

In [8]:
import pandas as pd
import numpy as np
import seaborn as sns
import datetime as dt
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

df_ = pd.read_csv(r'C:\Users\esran\Desktop\DATA SET\flo_data_20k.csv')
df = df_.copy()

In [9]:
#Step 2: In the dataset
# a. top 10 observations,

In [10]:
df.head(10)

Unnamed: 0,master_id,order_channel,last_order_channel,first_order_date,last_order_date,last_order_date_online,last_order_date_offline,order_num_total_ever_online,order_num_total_ever_offline,customer_value_total_ever_offline,customer_value_total_ever_online,interested_in_categories_12
0,cc294636-19f0-11eb-8d74-000d3a38a36f,Android App,Offline,2020-10-30,2021-02-26,2021-02-21,2021-02-26,4.0,1.0,139.99,799.38,[KADIN]
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,Android App,Mobile,2017-02-08,2021-02-16,2021-02-16,2020-01-10,19.0,2.0,159.97,1853.58,"[ERKEK, COCUK, KADIN, AKTIFSPOR]"
2,69b69676-1a40-11ea-941b-000d3a38a36f,Android App,Android App,2019-11-27,2020-11-27,2020-11-27,2019-12-01,3.0,2.0,189.97,395.35,"[ERKEK, KADIN]"
3,1854e56c-491f-11eb-806e-000d3a38a36f,Android App,Android App,2021-01-06,2021-01-17,2021-01-17,2021-01-06,1.0,1.0,39.99,81.98,"[AKTIFCOCUK, COCUK]"
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,Desktop,Desktop,2019-08-03,2021-03-07,2021-03-07,2019-08-03,1.0,1.0,49.99,159.99,[AKTIFSPOR]
5,e585280e-aae1-11e9-a2fc-000d3a38a36f,Desktop,Offline,2018-11-18,2021-03-13,2018-11-18,2021-03-13,1.0,2.0,150.87,49.99,[KADIN]
6,c445e4ee-6242-11ea-9d1a-000d3a38a36f,Android App,Android App,2020-03-04,2020-10-18,2020-10-18,2020-03-04,3.0,1.0,59.99,315.94,[AKTIFSPOR]
7,3f1b4dc8-8a7d-11ea-8ec0-000d3a38a36f,Mobile,Offline,2020-05-15,2020-08-12,2020-05-15,2020-08-12,1.0,1.0,49.99,113.64,[COCUK]
8,cfbda69e-5b4f-11ea-aca7-000d3a38a36f,Android App,Android App,2020-01-23,2021-03-07,2021-03-07,2020-01-25,3.0,2.0,120.48,934.21,"[ERKEK, COCUK, KADIN]"
9,1143f032-440d-11ea-8b43-000d3a38a36f,Mobile,Mobile,2019-07-30,2020-10-04,2020-10-04,2019-07-30,1.0,1.0,69.98,95.98,"[KADIN, AKTIFSPOR]"


In [11]:
# b. Variable names

In [12]:
df.columns

Index(['master_id', 'order_channel', 'last_order_channel', 'first_order_date',
       'last_order_date', 'last_order_date_online', 'last_order_date_offline',
       'order_num_total_ever_online', 'order_num_total_ever_offline',
       'customer_value_total_ever_offline', 'customer_value_total_ever_online',
       'interested_in_categories_12'],
      dtype='object')

In [13]:
# c. descriptive statistics

In [14]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
order_num_total_ever_online,19945.0,3.111,4.226,1.0,1.0,2.0,4.0,200.0
order_num_total_ever_offline,19945.0,1.914,2.063,1.0,1.0,1.0,2.0,109.0
customer_value_total_ever_offline,19945.0,253.923,301.533,10.0,99.99,179.98,319.97,18119.14
customer_value_total_ever_online,19945.0,497.322,832.602,12.99,149.98,286.46,578.44,45220.13


In [15]:
# d.Sum of Null values

In [16]:
df.isnull().sum()

master_id                            0
order_channel                        0
last_order_channel                   0
first_order_date                     0
last_order_date                      0
last_order_date_online               0
last_order_date_offline              0
order_num_total_ever_online          0
order_num_total_ever_offline         0
customer_value_total_ever_offline    0
customer_value_total_ever_online     0
interested_in_categories_12          0
dtype: int64

In [17]:
# e.Variable types, review.

In [18]:
df.dtypes

master_id                             object
order_channel                         object
last_order_channel                    object
first_order_date                      object
last_order_date                       object
last_order_date_online                object
last_order_date_offline               object
order_num_total_ever_online          float64
order_num_total_ever_offline         float64
customer_value_total_ever_offline    float64
customer_value_total_ever_online     float64
interested_in_categories_12           object
dtype: object

In [19]:
# Step 3: Omnichannel means that customers shop from both online and offline platforms.
# Create new variables for the total number of purchases and spending of each customer.

In [20]:
df["total_count"] = df["order_num_total_ever_online"] + df["order_num_total_ever_offline"]
df["total_price"] = df["customer_value_total_ever_offline"] + df["customer_value_total_ever_online"]

In [21]:
# Step 4: Examine the variable types. Change the type of variables that express date to date.

In [23]:
date_columns = [col for col in df.columns if "date" in col]
df[date_columns] = df[date_columns].apply(pd.to_datetime)
df.dtypes

master_id                                    object
order_channel                                object
last_order_channel                           object
first_order_date                     datetime64[ns]
last_order_date                      datetime64[ns]
last_order_date_online               datetime64[ns]
last_order_date_offline              datetime64[ns]
order_num_total_ever_online                 float64
order_num_total_ever_offline                float64
customer_value_total_ever_offline           float64
customer_value_total_ever_online            float64
interested_in_categories_12                  object
total_count                                 float64
total_price                                 float64
dtype: object

In [24]:
# Step 5: Look at the distribution of the number of customers in the shopping channels, 
#the total number of products purchased and the total expenditures.

In [25]:
df.groupby("order_channel").agg({"master_id":"count",
                                 "total_count":"sum",
                                 "total_price":"sum"})

Unnamed: 0_level_0,master_id,total_count,total_price
order_channel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Android App,9495,52269.0,7819062.76
Desktop,2735,10920.0,1610321.46
Ios App,2833,15351.0,2525999.93
Mobile,4882,21679.0,3028183.16


In [26]:
# Step6: List the top 10 customers with the highest earnings.

In [27]:
df.sort_values("total_price", ascending=False)[:10]

Unnamed: 0,master_id,order_channel,last_order_channel,first_order_date,last_order_date,last_order_date_online,last_order_date_offline,order_num_total_ever_online,order_num_total_ever_offline,customer_value_total_ever_offline,customer_value_total_ever_online,interested_in_categories_12,total_count,total_price
11150,5d1c466a-9cfd-11e9-9897-000d3a38a36f,Android App,Desktop,2013-10-11,2021-04-30,2021-04-30,2020-12-24,200.0,2.0,684.97,45220.13,"[AKTIFCOCUK, ERKEK, COCUK, KADIN, AKTIFSPOR]",202.0,45905.1
4315,d5ef8058-a5c6-11e9-a2fc-000d3a38a36f,Android App,Android App,2018-08-06,2021-02-23,2021-02-23,2020-07-06,67.0,1.0,130.49,36687.8,"[AKTIFCOCUK, ERKEK, KADIN, AKTIFSPOR]",68.0,36818.29
7613,73fd19aa-9e37-11e9-9897-000d3a38a36f,Ios App,Offline,2014-01-14,2021-05-18,2021-01-30,2021-05-18,81.0,1.0,1263.76,32654.34,"[ERKEK, COCUK, KADIN, AKTIFSPOR]",82.0,33918.1
13880,7137a5c0-7aad-11ea-8f20-000d3a38a36f,Ios App,Offline,2021-03-01,2021-04-13,2021-03-18,2021-04-13,10.0,1.0,538.94,30688.47,"[ERKEK, KADIN, AKTIFSPOR]",11.0,31227.41
9055,47a642fe-975b-11eb-8c2a-000d3a38a36f,Android App,Offline,2021-04-07,2021-04-27,2021-04-07,2021-04-27,1.0,3.0,18119.14,2587.2,[AKTIFSPOR],4.0,20706.34
7330,a4d534a2-5b1b-11eb-8dbd-000d3a38a36f,Desktop,Desktop,2020-02-16,2021-04-30,2021-04-30,2020-12-18,66.0,4.0,843.68,17599.89,"[ERKEK, KADIN, AKTIFSPOR]",70.0,18443.57
8068,d696c654-2633-11ea-8e1c-000d3a38a36f,Ios App,Ios App,2017-05-10,2021-04-13,2021-04-13,2019-08-15,69.0,1.0,82.48,16836.09,"[AKTIFCOCUK, ERKEK, COCUK, KADIN, AKTIFSPOR]",70.0,16918.57
163,fef57ffa-aae6-11e9-a2fc-000d3a38a36f,Mobile,Desktop,2016-11-08,2021-05-12,2021-05-12,2020-07-09,36.0,1.0,180.73,12545.37,"[ERKEK, AKTIFSPOR]",37.0,12726.1
7223,cba59206-9dd1-11e9-9897-000d3a38a36f,Android App,Android App,2013-02-21,2021-05-09,2021-05-09,2020-01-25,130.0,1.0,49.99,12232.25,"[AKTIFCOCUK, ERKEK, KADIN, AKTIFSPOR]",131.0,12282.24
18767,fc0ce7a4-9d87-11e9-9897-000d3a38a36f,Desktop,Desktop,2018-11-24,2020-11-11,2020-11-11,2019-12-06,18.0,2.0,64.97,12038.18,"[ERKEK, KADIN]",20.0,12103.15


In [28]:
# Step 7: List the top 10 customers who ordered the most.

In [29]:
df.sort_values("total_count", ascending=False)[:10]
df.groupby("master_id").agg({"total_count" : "sum"}).sort_values("total_count", ascending=False)[:10]

Unnamed: 0_level_0,total_count
master_id,Unnamed: 1_level_1
5d1c466a-9cfd-11e9-9897-000d3a38a36f,202.0
cba59206-9dd1-11e9-9897-000d3a38a36f,131.0
a57f4302-b1a8-11e9-89fa-000d3a38a36f,111.0
fdbe8304-a7ab-11e9-a2fc-000d3a38a36f,88.0
329968c6-a0e2-11e9-a2fc-000d3a38a36f,83.0
73fd19aa-9e37-11e9-9897-000d3a38a36f,82.0
44d032ee-a0d4-11e9-a2fc-000d3a38a36f,77.0
b27e241a-a901-11e9-a2fc-000d3a38a36f,75.0
d696c654-2633-11ea-8e1c-000d3a38a36f,70.0
a4d534a2-5b1b-11eb-8dbd-000d3a38a36f,70.0


In [30]:
# Step 8 : Functionalize all the data pre-process.

In [31]:
def data_prep(dataframe):
    dataframe["total_count"] = dataframe["order_num_total_ever_online"] + dataframe["order_num_total_ever_offline"]
    dataframe["total_price"] = dataframe["customer_value_total_ever_offline"] + dataframe["customer_value_total_ever_online"]
    
    date_columns = dataframe.columns[dataframe.columns.str.contains("date")]
    dataframe[date_columns] = dataframe[date_columns].apply(pd.to_datetime)
    return dataframe

In [32]:
data_prep(df)

Unnamed: 0,master_id,order_channel,last_order_channel,first_order_date,last_order_date,last_order_date_online,last_order_date_offline,order_num_total_ever_online,order_num_total_ever_offline,customer_value_total_ever_offline,customer_value_total_ever_online,interested_in_categories_12,total_count,total_price
0,cc294636-19f0-11eb-8d74-000d3a38a36f,Android App,Offline,2020-10-30,2021-02-26,2021-02-21,2021-02-26,4.000,1.000,139.990,799.380,[KADIN],5.000,939.370
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,Android App,Mobile,2017-02-08,2021-02-16,2021-02-16,2020-01-10,19.000,2.000,159.970,1853.580,"[ERKEK, COCUK, KADIN, AKTIFSPOR]",21.000,2013.550
2,69b69676-1a40-11ea-941b-000d3a38a36f,Android App,Android App,2019-11-27,2020-11-27,2020-11-27,2019-12-01,3.000,2.000,189.970,395.350,"[ERKEK, KADIN]",5.000,585.320
3,1854e56c-491f-11eb-806e-000d3a38a36f,Android App,Android App,2021-01-06,2021-01-17,2021-01-17,2021-01-06,1.000,1.000,39.990,81.980,"[AKTIFCOCUK, COCUK]",2.000,121.970
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,Desktop,Desktop,2019-08-03,2021-03-07,2021-03-07,2019-08-03,1.000,1.000,49.990,159.990,[AKTIFSPOR],2.000,209.980
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19940,727e2b6e-ddd4-11e9-a848-000d3a38a36f,Android App,Offline,2019-09-21,2020-07-05,2020-06-05,2020-07-05,1.000,2.000,289.980,111.980,"[ERKEK, AKTIFSPOR]",3.000,401.960
19941,25cd53d4-61bf-11ea-8dd8-000d3a38a36f,Desktop,Desktop,2020-03-01,2020-12-22,2020-12-22,2020-03-01,1.000,1.000,150.480,239.990,[AKTIFSPOR],2.000,390.470
19942,8aea4c2a-d6fc-11e9-93bc-000d3a38a36f,Ios App,Ios App,2019-09-11,2021-05-24,2021-05-24,2019-09-11,2.000,1.000,139.980,492.960,[AKTIFSPOR],3.000,632.940
19943,e50bb46c-ff30-11e9-a5e8-000d3a38a36f,Android App,Android App,2019-03-27,2021-02-13,2021-02-13,2021-01-08,1.000,5.000,711.790,297.980,"[ERKEK, AKTIFSPOR]",6.000,1009.770


In [35]:
# Task 2: Calculating RFM Metrics

In [36]:
df["last_order_date"].max()

Timestamp('2021-05-30 00:00:00')

In [37]:
today_date = dt.datetime(2021, 6, 1)
type(today_date)

datetime.datetime

In [38]:
df['total_count'].nunique()

63

In [39]:
#Step 1: Make the definitions of Recency, Frequency and Monetary.

In [None]:
#"""recency: time since last purchase
#  frequency: total repeat purchases
#  monetary: average earnings per purchase"""

In [None]:
#Step 2: Calculate the Recency, Frequency and Monetary metrics for the customer.

In [40]:
rfm = df.groupby('master_id').agg({'last_order_date': lambda last_order_date: (today_date - last_order_date.max()).days,
                                     'total_count': lambda total_count: total_count,  
                                     'total_price': lambda total_price: total_price.sum()})

In [41]:
rfm.head()

Unnamed: 0_level_0,last_order_date,total_count,total_price
master_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
00016786-2f5a-11ea-bb80-000d3a38a36f,10,5.0,776.07
00034aaa-a838-11e9-a2fc-000d3a38a36f,298,3.0,269.47
000be838-85df-11ea-a90b-000d3a38a36f,213,4.0,722.69
000c1fe2-a8b7-11ea-8479-000d3a38a36f,27,7.0,874.16
000f5e3e-9dde-11ea-80cd-000d3a38a36f,20,7.0,1620.33


In [43]:
#Step 3: Assign your calculated metrics to a variable named rfm.
#Step 4: Change the names of the metrics you created to recency, frequency, and monetary.

In [44]:
rfm.columns = ['recency', 'frequency', 'monetary']

In [45]:
#Task 3: Calculation of RF Score

In [46]:
#Step 1: Convert the Recency, Frequency and Monetary metrics to scores between 1-5 with the help of qcut.

In [47]:
rfm["recency_score"] = pd.qcut(rfm['recency'], 5, labels=[5, 4, 3, 2, 1])

# 0-100, 0-20, 20-40, 40-60, 60-80, 80-100

rfm["frequency_score"] = pd.qcut(rfm['frequency'].rank(method="first"), 5, labels=[1, 2, 3, 4, 5])

rfm["monetary_score"] = pd.qcut(rfm['monetary'], 5, labels=[1, 2, 3, 4, 5])

In [48]:
#Step 2: Record these scores as recency_score, frequency_score, and monetary_score. 
#Step 3: Express recency_score and frequency_score as a single variable and save as RF_SCORE.

In [49]:
rfm["RF_SCORE"] = (rfm['recency_score'].astype(str) +
                    rfm['frequency_score'].astype(str))

In [50]:
rfm.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
recency,19945.0,134.458,103.281,2.0,43.0,111.0,202.0,367.0
frequency,19945.0,5.025,4.743,2.0,3.0,4.0,6.0,202.0
monetary,19945.0,751.244,895.402,44.98,339.98,545.27,897.78,45905.1


In [52]:
#Task 4: Defining RF Score by Segment

In [53]:
seg_map = {
    r'[1-2][1-2]': 'hibernating',
    r'[1-2][3-4]': 'at_Risk',
    r'[1-2]5': 'cant_loose',
    r'3[1-2]': 'about_to_sleep',
    r'33': 'need_attention',
    r'[3-4][4-5]': 'loyal_customers',
    r'41': 'promising',
    r'51': 'new_customers',
    r'[4-5][2-3]': 'potential_loyalists',
    r'5[4-5]': 'champions'
}

In [54]:
rfm['segment'] = rfm['RF_SCORE'].replace(seg_map, regex=True)

In [55]:
#Quest 5: Time for Action!

In [None]:
#Step1: Examine the recency, frequency and monetary averages of the segments.

In [56]:
rfm[["segment", "recency", "frequency", "monetary"]].groupby("segment").agg(["mean", "count"])

Unnamed: 0_level_0,recency,recency,frequency,frequency,monetary,monetary
Unnamed: 0_level_1,mean,count,mean,count,mean,count
segment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
about_to_sleep,114.032,1643,2.407,1643,361.649,1643
at_Risk,242.329,3152,4.47,3152,648.325,3152
cant_loose,235.159,1194,10.717,1194,1481.652,1194
champions,17.142,1920,8.965,1920,1410.709,1920
hibernating,247.426,3589,2.391,3589,362.583,3589
loyal_customers,82.558,3375,8.356,3375,1216.257,3375
need_attention,113.037,806,3.739,806,553.437,806
new_customers,17.976,673,2.0,673,344.049,673
potential_loyalists,36.87,2925,3.311,2925,533.741,2925
promising,58.695,668,2.0,668,334.153,668


In [None]:
#Step2: With the help of RFM analysis, 
#find the customers in the relevant profile for the 2 cases given below and save the customer ids as csv.

In [None]:
#a.FLO includes a new brand of women's shoes in its structure. 
#The product prices of the brand it includes are above the general customer preferences.
#Therefore, it is desirable to contact the customers in the profile that will be interested in the promotion of the brand and the sales of the product.
#Customers to be contacted privately from loyal customers(champions,loyal_customers)
#and people who shop from the female category.
#Save these customers id numbers in the csv file

In [69]:
rfm_ = pd.merge(df,rfm, on='master_id')

In [70]:
rfm_.head()

Unnamed: 0,master_id,order_channel,last_order_channel,first_order_date,last_order_date,last_order_date_online,last_order_date_offline,order_num_total_ever_online,order_num_total_ever_offline,customer_value_total_ever_offline,customer_value_total_ever_online,interested_in_categories_12,total_count,total_price,recency,frequency,monetary,recency_score,frequency_score,monetary_score,RF_SCORE,segment
0,cc294636-19f0-11eb-8d74-000d3a38a36f,Android App,Offline,2020-10-30,2021-02-26,2021-02-21,2021-02-26,4.0,1.0,139.99,799.38,[KADIN],5.0,939.37,95,5.0,939.37,3,4,4,34,loyal_customers
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,Android App,Mobile,2017-02-08,2021-02-16,2021-02-16,2020-01-10,19.0,2.0,159.97,1853.58,"[ERKEK, COCUK, KADIN, AKTIFSPOR]",21.0,2013.55,105,21.0,2013.55,3,5,5,35,loyal_customers
2,69b69676-1a40-11ea-941b-000d3a38a36f,Android App,Android App,2019-11-27,2020-11-27,2020-11-27,2019-12-01,3.0,2.0,189.97,395.35,"[ERKEK, KADIN]",5.0,585.32,186,5.0,585.32,2,4,3,24,at_Risk
3,1854e56c-491f-11eb-806e-000d3a38a36f,Android App,Android App,2021-01-06,2021-01-17,2021-01-17,2021-01-06,1.0,1.0,39.99,81.98,"[AKTIFCOCUK, COCUK]",2.0,121.97,135,2.0,121.97,3,1,1,31,about_to_sleep
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,Desktop,Desktop,2019-08-03,2021-03-07,2021-03-07,2019-08-03,1.0,1.0,49.99,159.99,[AKTIFSPOR],2.0,209.98,86,2.0,209.98,3,1,1,31,about_to_sleep


In [75]:
new = rfm_["new customers"] =rfm_.loc[((rfm_["segment"] == "loyal_customers") | (rfm_["segment"] == "champions")) & ((rfm_["interested_in_categories_12"].str.contains("KADIN")))]["master_id"]

In [76]:
new.to_csv("yeni_marka_hedef_müşteri_id.csv", index=False)

In [77]:
rfm_.head()

Unnamed: 0,master_id,order_channel,last_order_channel,first_order_date,last_order_date,last_order_date_online,last_order_date_offline,order_num_total_ever_online,order_num_total_ever_offline,customer_value_total_ever_offline,customer_value_total_ever_online,interested_in_categories_12,total_count,total_price,recency,frequency,monetary,recency_score,frequency_score,monetary_score,RF_SCORE,segment,new customers
0,cc294636-19f0-11eb-8d74-000d3a38a36f,Android App,Offline,2020-10-30,2021-02-26,2021-02-21,2021-02-26,4.0,1.0,139.99,799.38,[KADIN],5.0,939.37,95,5.0,939.37,3,4,4,34,loyal_customers,cc294636-19f0-11eb-8d74-000d3a38a36f
1,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f,Android App,Mobile,2017-02-08,2021-02-16,2021-02-16,2020-01-10,19.0,2.0,159.97,1853.58,"[ERKEK, COCUK, KADIN, AKTIFSPOR]",21.0,2013.55,105,21.0,2013.55,3,5,5,35,loyal_customers,f431bd5a-ab7b-11e9-a2fc-000d3a38a36f
2,69b69676-1a40-11ea-941b-000d3a38a36f,Android App,Android App,2019-11-27,2020-11-27,2020-11-27,2019-12-01,3.0,2.0,189.97,395.35,"[ERKEK, KADIN]",5.0,585.32,186,5.0,585.32,2,4,3,24,at_Risk,
3,1854e56c-491f-11eb-806e-000d3a38a36f,Android App,Android App,2021-01-06,2021-01-17,2021-01-17,2021-01-06,1.0,1.0,39.99,81.98,"[AKTIFCOCUK, COCUK]",2.0,121.97,135,2.0,121.97,3,1,1,31,about_to_sleep,
4,d6ea1074-f1f5-11e9-9346-000d3a38a36f,Desktop,Desktop,2019-08-03,2021-03-07,2021-03-07,2019-08-03,1.0,1.0,49.99,159.99,[AKTIFSPOR],2.0,209.98,86,2.0,209.98,3,1,1,31,about_to_sleep,


In [78]:
# Action 2

# Planning %40 discount for male and child products. 
#With categories related to this discount people who are good customers 
#but not shopping for a long time and customers which we shouldnt lost , sleeping customers
#and new comers customers specially targeted. 
#Save the id numbers of these customers to the csv file.

In [80]:
new2 = rfm_.loc[(rfm_["segment"] == "cant_loose") | (rfm_["segment"] == "hibernating") | (rfm_["segment"] == "new_customers") &
                  ((df["interested_in_categories_12"].str.contains("ERKEK") | (df["interested_in_categories_12"].str.contains("COCUK"))))]["master_id"]

In [81]:
new2.to_csv("yeni_hedef_müşteri_id.csv", index=False)