In [1]:
#import necessary library
import numpy as np # linear algebra
import pandas as pd # data manipulation and analysis
import matplotlib.pyplot as plt # data visualization
import seaborn as sns # data visualization
sns.set_style('whitegrid') # set style for visualization
import warnings # ignore warnings
warnings.filterwarnings('ignore')

from initial_report import *

In [2]:
#import customer data
df_customer=pd.read_csv("cleaned_customer_data.csv")
df_customer.head()

Unnamed: 0,customer_id,became_member_on,gender,age,income
0,0610b486422d4921ae7d2bf64640c50b,2017-07-15,F,55,112000.0
1,78afa995795e4d85b5d9ceeca43f5fef,2017-05-09,F,75,100000.0
2,e2127556f4f64592b11af22de27a7932,2018-04-26,M,68,70000.0
3,389bc3fa690240e798340f5a15918d5c,2018-02-09,M,65,53000.0
4,2eeac8d8feae4a8cad5a6af0499a211d,2017-11-11,M,58,51000.0


In [3]:
#get initial report
initial_report(df_customer)

 *** DATA CLEANING CHECKLIST ***
----------------------------------------
*** Structure:
- Total Rows: 14825
- Total Columns: 5
- Column Names: ['customer_id', 'became_member_on', 'gender', 'age', 'income']

*** Data Types:
  customer_id: object
  became_member_on: object
  gender: object
  age: int64
  income: float64

*** Mixed Data Types:

*** Distinct Values per Column:
  customer_id: 14825
  became_member_on: 1707
  gender: 3
  age: 84
  income: 91

*** Null Values and Percentages:


*** Duplicates: 0

*** Negative or Zero Values:

*** Basic Statistics:
                age         income
count  14825.000000   14825.000000
mean      54.393524   65404.991568
std       17.383705   21598.299410
min       18.000000   30000.000000
25%       42.000000   49000.000000
50%       55.000000   64000.000000
75%       66.000000   80000.000000
max      101.000000  120000.000000

*** Category Description:
                             customer_id became_member_on gender
count                       

Problems:
1. became member on column is object type- [change to datetype]

In [4]:
# Convert 'became_member_on' to datetime
df_customer['became_member_on'] = pd.to_datetime(df_customer['became_member_on'])
print(df_customer['became_member_on'].dtype)

datetime64[ns]


In [5]:
# Create age group column
df_customer['age_group'] = np.select(
    [
        (df_customer['age'] >= 18) & (df_customer['age'] <= 34),
        (df_customer['age'] >= 35) & (df_customer['age'] <= 49),
        (df_customer['age'] >= 50) & (df_customer['age'] <= 64),
        (df_customer['age'] >= 65) & (df_customer['age'] <= 79),
        (df_customer['age'] >= 80) & (df_customer['age'] <= 110)
    ],
    [
        'Young Adult',
        'Middle Age Adult',
        'Older Adult',
        'Senior',
        'Elderly'
    ],
    default='Unknown'
)

#print
df_customer.head()

Unnamed: 0,customer_id,became_member_on,gender,age,income,age_group
0,0610b486422d4921ae7d2bf64640c50b,2017-07-15,F,55,112000.0,Older Adult
1,78afa995795e4d85b5d9ceeca43f5fef,2017-05-09,F,75,100000.0,Senior
2,e2127556f4f64592b11af22de27a7932,2018-04-26,M,68,70000.0,Senior
3,389bc3fa690240e798340f5a15918d5c,2018-02-09,M,65,53000.0,Senior
4,2eeac8d8feae4a8cad5a6af0499a211d,2017-11-11,M,58,51000.0,Older Adult


In [6]:
#get value counts for age group
df_customer.age_group.value_counts()

age_group
Older Adult         5150
Senior              3164
Middle Age Adult    3153
Young Adult         2256
Elderly             1102
Name: count, dtype: int64

In [7]:
#create income group

bins = [0, 44000, 84000, float('inf')]
labels = ['Low Income', 'Middle Income', 'High Income']

df_customer['income_group'] = pd.cut(df_customer['income'], bins=bins, labels=labels, right=True)

#check dataframe
df_customer.head()

Unnamed: 0,customer_id,became_member_on,gender,age,income,age_group,income_group
0,0610b486422d4921ae7d2bf64640c50b,2017-07-15,F,55,112000.0,Older Adult,High Income
1,78afa995795e4d85b5d9ceeca43f5fef,2017-05-09,F,75,100000.0,Senior,High Income
2,e2127556f4f64592b11af22de27a7932,2018-04-26,M,68,70000.0,Senior,Middle Income
3,389bc3fa690240e798340f5a15918d5c,2018-02-09,M,65,53000.0,Senior,Middle Income
4,2eeac8d8feae4a8cad5a6af0499a211d,2017-11-11,M,58,51000.0,Older Adult,Middle Income


In [8]:
#get value counts for income group
df_customer.income_group.value_counts()

income_group
Middle Income    8941
High Income      3015
Low Income       2869
Name: count, dtype: int64

In [9]:
#get gender percentage
gender_percent = (
    df_customer.groupby('gender').size() / len(df_customer) * 100
)
gender_percent=round(gender_percent,2)
print(gender_percent)


gender
F    41.34
M    57.23
O     1.43
dtype: float64


In [10]:
#create group for membership year
df_customer['membership_year'] = df_customer['became_member_on'].astype(str).str[:4].astype(int)
df_customer.head()

Unnamed: 0,customer_id,became_member_on,gender,age,income,age_group,income_group,membership_year
0,0610b486422d4921ae7d2bf64640c50b,2017-07-15,F,55,112000.0,Older Adult,High Income,2017
1,78afa995795e4d85b5d9ceeca43f5fef,2017-05-09,F,75,100000.0,Senior,High Income,2017
2,e2127556f4f64592b11af22de27a7932,2018-04-26,M,68,70000.0,Senior,Middle Income,2018
3,389bc3fa690240e798340f5a15918d5c,2018-02-09,M,65,53000.0,Senior,Middle Income,2018
4,2eeac8d8feae4a8cad5a6af0499a211d,2017-11-11,M,58,51000.0,Older Adult,Middle Income,2017


In [11]:
#get value counts for membership year
df_customer.membership_year.value_counts()

membership_year
2017    5599
2018    3669
2016    3024
2015    1597
2014     662
2013     274
Name: count, dtype: int64

In [12]:
#drop unnecessary columns
df_customer = df_customer.drop(columns=['became_member_on', 'age','income'])
df_customer.head()

Unnamed: 0,customer_id,gender,age_group,income_group,membership_year
0,0610b486422d4921ae7d2bf64640c50b,F,Older Adult,High Income,2017
1,78afa995795e4d85b5d9ceeca43f5fef,F,Senior,High Income,2017
2,e2127556f4f64592b11af22de27a7932,M,Senior,Middle Income,2018
3,389bc3fa690240e798340f5a15918d5c,M,Senior,Middle Income,2018
4,2eeac8d8feae4a8cad5a6af0499a211d,M,Older Adult,Middle Income,2017


In [13]:
#import customer data
df_event=pd.read_csv("cleaned_events.csv")
df_event.head()

Unnamed: 0,customer_id,event,time,offer_id,amount,reward
0,78afa995795e4d85b5d9ceeca43f5fef,offer received,0,9b98b8c7a33c4b65b9aebfe6a799e6d9,,
1,a03223e636434f42ac4c3df47e8bac43,offer received,0,0b1e1539f2cc45b7b9fa7c272da2e1d7,,
2,e2127556f4f64592b11af22de27a7932,offer received,0,2906b810c7d4411798c6938adc9daaa5,,
3,8ec6ce2a7e7949b1bf142def7d0e0586,offer received,0,fafdcd668e3743c1bb461111dcafc2a4,,
4,68617ca6246f4fbc85e91a2a49552598,offer received,0,4d5c57ea9a6940dd891ad53e9dbe8da0,,


In [14]:
#merge df_customer and df_event
df_customer_events = pd.merge(df_customer, df_event, on='customer_id', how='left')
df_customer_events.head()

Unnamed: 0,customer_id,gender,age_group,income_group,membership_year,event,time,offer_id,amount,reward
0,0610b486422d4921ae7d2bf64640c50b,F,Older Adult,High Income,2017,transaction,18,,21.51,
1,0610b486422d4921ae7d2bf64640c50b,F,Older Adult,High Income,2017,transaction,144,,32.28,
2,0610b486422d4921ae7d2bf64640c50b,F,Older Adult,High Income,2017,offer received,408,9b98b8c7a33c4b65b9aebfe6a799e6d9,,
3,0610b486422d4921ae7d2bf64640c50b,F,Older Adult,High Income,2017,offer received,504,3f207df678b143eea3cee63160fa8bed,,
4,0610b486422d4921ae7d2bf64640c50b,F,Older Adult,High Income,2017,transaction,528,,23.22,


In [15]:
df_customer_events.event.value_counts()

event
transaction        123957
offer received      66501
offer viewed        49860
offer completed     32070
Name: count, dtype: int64

In [16]:
#total_transaction_amount
total_transaction_amount = df_customer_events['amount'].sum()
print(total_transaction_amount)

1734942.4


In [17]:
# Average transaction_amount
avg_transaction_amount = df_customer_events['amount'].mean()

print(avg_transaction_amount)


13.996324531894123


In [18]:
#get average spend per customer
distinct_customers_count = df_customer_events['customer_id'].nunique()
#Calculate the total transaction amount per customer
df_transactions=df_customer_events[df_customer_events.event=="transaction"]
total_transaction_amount = df_transactions['amount'].sum()
#get average
avg_spend_per_customer=total_transaction_amount/distinct_customers_count
print(avg_spend_per_customer)


117.02815514333896


In [19]:
#total spend based on age group
total_spend_age_group=df_customer_events.groupby("age_group")["amount"].sum()
print(total_spend_age_group)

age_group
Elderly             147026.37
Middle Age Adult    343178.68
Older Adult         652728.13
Senior              401065.92
Young Adult         190943.30
Name: amount, dtype: float64


In [20]:
#average spend based on age group
avg_spend_age_group=df_customer_events.groupby("age_group")["amount"].mean()
print(avg_spend_age_group)

age_group
Elderly             17.090128
Middle Age Adult    12.425906
Older Adult         16.323100
Senior              16.326722
Young Adult          8.236350
Name: amount, dtype: float64


In [21]:
#no. of transaction based on age_group
number_of_trans_age_group=df_transactions.groupby("age_group")["event"].value_counts()
print(number_of_trans_age_group)

age_group         event      
Elderly           transaction     8603
Middle Age Adult  transaction    27618
Older Adult       transaction    39988
Senior            transaction    24565
Young Adult       transaction    23183
Name: count, dtype: int64


In [22]:
#get transaction quantity by each customer
transactions_per_customer = df_transactions.groupby('customer_id').size().sort_values(ascending=False)
print(transactions_per_customer )

customer_id
79d9d4f86aca4bed9290350fb43817c2    36
8dbfa485249f409aa223a2130f40634a    36
94de646f7b6041228ca7dec82adb97d2    35
5e60c6aa3b834e44b822ea43a3efea26    32
b1f4ece7d49342628a9ed77aee2cde58    32
                                    ..
4f4777c7c22648f58098e89a34426e07     1
f8eff6816be34191b76e9b4614ce5224     1
4eeaab922a754d58a14733cfd2952279     1
0f628df0d3fb4e1192f156de98ec4a0b     1
f904dad31185443c909f7722c28e9840     1
Length: 14492, dtype: int64


In [23]:
#total spend based on membership year
total_spend_mem_year=df_customer_events.groupby("membership_year")["amount"].sum()
print(total_spend_mem_year)

membership_year
2013     23375.98
2014     55311.60
2015    245725.55
2016    519551.67
2017    658287.36
2018    232690.24
Name: amount, dtype: float64


In [24]:
#total spending by gender
df_customer_events.groupby("gender")["amount"].sum()

gender
F    863695.00
M    844890.86
O     26356.54
Name: amount, dtype: float64

In [25]:
df_customer_events.head()

Unnamed: 0,customer_id,gender,age_group,income_group,membership_year,event,time,offer_id,amount,reward
0,0610b486422d4921ae7d2bf64640c50b,F,Older Adult,High Income,2017,transaction,18,,21.51,
1,0610b486422d4921ae7d2bf64640c50b,F,Older Adult,High Income,2017,transaction,144,,32.28,
2,0610b486422d4921ae7d2bf64640c50b,F,Older Adult,High Income,2017,offer received,408,9b98b8c7a33c4b65b9aebfe6a799e6d9,,
3,0610b486422d4921ae7d2bf64640c50b,F,Older Adult,High Income,2017,offer received,504,3f207df678b143eea3cee63160fa8bed,,
4,0610b486422d4921ae7d2bf64640c50b,F,Older Adult,High Income,2017,transaction,528,,23.22,


In [26]:
df_customer_events.event.value_counts()

event
transaction        123957
offer received      66501
offer viewed        49860
offer completed     32070
Name: count, dtype: int64

In [27]:
df_completed=df_customer_events[df_customer_events.event=="offer completed"]
df_completed.head()

Unnamed: 0,customer_id,gender,age_group,income_group,membership_year,event,time,offer_id,amount,reward
5,0610b486422d4921ae7d2bf64640c50b,F,Older Adult,High Income,2017,offer completed,528,9b98b8c7a33c4b65b9aebfe6a799e6d9,,5.0
9,78afa995795e4d85b5d9ceeca43f5fef,F,Senior,High Income,2017,offer completed,132,9b98b8c7a33c4b65b9aebfe6a799e6d9,,5.0
20,78afa995795e4d85b5d9ceeca43f5fef,F,Senior,High Income,2017,offer completed,510,ae264e3637204a6fb9bb56bc8210ddfd,,10.0
21,78afa995795e4d85b5d9ceeca43f5fef,F,Senior,High Income,2017,offer completed,510,f19421c1d4aa40978ebb69ca19b0e20d,,5.0
34,e2127556f4f64592b11af22de27a7932,M,Senior,Middle Income,2018,offer completed,522,9b98b8c7a33c4b65b9aebfe6a799e6d9,,5.0


In [28]:
#get rows where transaction time and completed time is same
matching_pairs = pd.merge(
    df_transactions[['customer_id', 'time']].drop_duplicates(),
    df_completed[['customer_id', 'time']].drop_duplicates(),
    on=['customer_id', 'time'],
    how='inner'
)
matching_pairs

Unnamed: 0,customer_id,time
0,0610b486422d4921ae7d2bf64640c50b,528
1,78afa995795e4d85b5d9ceeca43f5fef,132
2,78afa995795e4d85b5d9ceeca43f5fef,510
3,e2127556f4f64592b11af22de27a7932,522
4,389bc3fa690240e798340f5a15918d5c,60
...,...,...
29576,9dc1421481194dcd9400aec7c9ae6366,360
29577,9dc1421481194dcd9400aec7c9ae6366,414
29578,9dc1421481194dcd9400aec7c9ae6366,594
29579,e4052622e5ba45a8b96b59aba68cf068,54


In [29]:
#filter df_customer_events
df_matching_rows = pd.merge(df_customer_events, matching_pairs, on=['customer_id', 'time'], how='inner')
df_matching_rows

Unnamed: 0,customer_id,gender,age_group,income_group,membership_year,event,time,offer_id,amount,reward
0,0610b486422d4921ae7d2bf64640c50b,F,Older Adult,High Income,2017,transaction,528,,23.22,
1,0610b486422d4921ae7d2bf64640c50b,F,Older Adult,High Income,2017,offer completed,528,9b98b8c7a33c4b65b9aebfe6a799e6d9,,5.0
2,78afa995795e4d85b5d9ceeca43f5fef,F,Senior,High Income,2017,transaction,132,,19.89,
3,78afa995795e4d85b5d9ceeca43f5fef,F,Senior,High Income,2017,offer completed,132,9b98b8c7a33c4b65b9aebfe6a799e6d9,,5.0
4,78afa995795e4d85b5d9ceeca43f5fef,F,Senior,High Income,2017,transaction,510,,21.72,
...,...,...,...,...,...,...,...,...,...,...
66770,9dc1421481194dcd9400aec7c9ae6366,F,Elderly,Middle Income,2016,offer completed,594,ae264e3637204a6fb9bb56bc8210ddfd,,10.0
66771,e4052622e5ba45a8b96b59aba68cf068,F,Older Adult,Middle Income,2017,transaction,54,,21.55,
66772,e4052622e5ba45a8b96b59aba68cf068,F,Older Adult,Middle Income,2017,offer completed,54,2298d6c36e964ae4a3e7e9706d1fb8c2,,3.0
66773,e4052622e5ba45a8b96b59aba68cf068,F,Older Adult,Middle Income,2017,transaction,480,,30.57,


In [30]:
#get only the transaction and completed event
df_final=df_matching_rows[df_matching_rows.event.isin(["transaction","offer completed"])]
df_final

Unnamed: 0,customer_id,gender,age_group,income_group,membership_year,event,time,offer_id,amount,reward
0,0610b486422d4921ae7d2bf64640c50b,F,Older Adult,High Income,2017,transaction,528,,23.22,
1,0610b486422d4921ae7d2bf64640c50b,F,Older Adult,High Income,2017,offer completed,528,9b98b8c7a33c4b65b9aebfe6a799e6d9,,5.0
2,78afa995795e4d85b5d9ceeca43f5fef,F,Senior,High Income,2017,transaction,132,,19.89,
3,78afa995795e4d85b5d9ceeca43f5fef,F,Senior,High Income,2017,offer completed,132,9b98b8c7a33c4b65b9aebfe6a799e6d9,,5.0
4,78afa995795e4d85b5d9ceeca43f5fef,F,Senior,High Income,2017,transaction,510,,21.72,
...,...,...,...,...,...,...,...,...,...,...
66770,9dc1421481194dcd9400aec7c9ae6366,F,Elderly,Middle Income,2016,offer completed,594,ae264e3637204a6fb9bb56bc8210ddfd,,10.0
66771,e4052622e5ba45a8b96b59aba68cf068,F,Older Adult,Middle Income,2017,transaction,54,,21.55,
66772,e4052622e5ba45a8b96b59aba68cf068,F,Older Adult,Middle Income,2017,offer completed,54,2298d6c36e964ae4a3e7e9706d1fb8c2,,3.0
66773,e4052622e5ba45a8b96b59aba68cf068,F,Older Adult,Middle Income,2017,transaction,480,,30.57,


In [31]:
#how many transaction from offers
df_final.event.value_counts()

event
offer completed    32070
transaction        29581
Name: count, dtype: int64

In [32]:
#total spend for transaction
df_final["amount"].sum()

np.float64(606355.75)

In [33]:
#total reward 
df_final["reward"].sum()

np.float64(158630.0)

In [34]:
#import offer data
df_offer=pd.read_csv("cleaned_offers.csv")
df_offer.head()

Unnamed: 0,offer_id,offer_type,difficulty,reward,duration,channels
0,ae264e3637204a6fb9bb56bc8210ddfd,bogo,10,10,7,"['email', 'mobile', 'social']"
1,4d5c57ea9a6940dd891ad53e9dbe8da0,bogo,10,10,5,"['web', 'email', 'mobile', 'social']"
2,3f207df678b143eea3cee63160fa8bed,informational,0,0,4,"['web', 'email', 'mobile']"
3,9b98b8c7a33c4b65b9aebfe6a799e6d9,bogo,5,5,7,"['web', 'email', 'mobile']"
4,0b1e1539f2cc45b7b9fa7c272da2e1d7,discount,20,5,10,"['web', 'email']"


In [35]:
# Convert the numerical columns to strings before concatenating
df_offer['difficulty'] = df_offer['difficulty'].astype(str)
df_offer['reward'] = df_offer['reward'].astype(str)
df_offer['duration'] = df_offer['duration'].astype(str)

# Create the new column by concatenating the strings with hyphens
df_offer['offer_key'] = (
    df_offer['offer_type'] + '-' + 
    df_offer['difficulty'] + '-' + 
    df_offer['reward'] + '-' + 
    df_offer['duration']
)

# Display the DataFrame with the new column
print("DataFrame with the new 'offer_key' column:")
print(df_offer[['offer_id', 'offer_key']].head())

DataFrame with the new 'offer_key' column:
                           offer_id            offer_key
0  ae264e3637204a6fb9bb56bc8210ddfd         bogo-10-10-7
1  4d5c57ea9a6940dd891ad53e9dbe8da0         bogo-10-10-5
2  3f207df678b143eea3cee63160fa8bed  informational-0-0-4
3  9b98b8c7a33c4b65b9aebfe6a799e6d9           bogo-5-5-7
4  0b1e1539f2cc45b7b9fa7c272da2e1d7     discount-20-5-10


In [36]:
#load df_offer
df_offer.head()

Unnamed: 0,offer_id,offer_type,difficulty,reward,duration,channels,offer_key
0,ae264e3637204a6fb9bb56bc8210ddfd,bogo,10,10,7,"['email', 'mobile', 'social']",bogo-10-10-7
1,4d5c57ea9a6940dd891ad53e9dbe8da0,bogo,10,10,5,"['web', 'email', 'mobile', 'social']",bogo-10-10-5
2,3f207df678b143eea3cee63160fa8bed,informational,0,0,4,"['web', 'email', 'mobile']",informational-0-0-4
3,9b98b8c7a33c4b65b9aebfe6a799e6d9,bogo,5,5,7,"['web', 'email', 'mobile']",bogo-5-5-7
4,0b1e1539f2cc45b7b9fa7c272da2e1d7,discount,20,5,10,"['web', 'email']",discount-20-5-10


In [37]:
#merge df_final and df_offer
#merge df_customer and df_event
df_final_offers = pd.merge(df_final, df_offer, on='offer_id', how='left')
df_final_offers.head()

Unnamed: 0,customer_id,gender,age_group,income_group,membership_year,event,time,offer_id,amount,reward_x,offer_type,difficulty,reward_y,duration,channels,offer_key
0,0610b486422d4921ae7d2bf64640c50b,F,Older Adult,High Income,2017,transaction,528,,23.22,,,,,,,
1,0610b486422d4921ae7d2bf64640c50b,F,Older Adult,High Income,2017,offer completed,528,9b98b8c7a33c4b65b9aebfe6a799e6d9,,5.0,bogo,5.0,5.0,7.0,"['web', 'email', 'mobile']",bogo-5-5-7
2,78afa995795e4d85b5d9ceeca43f5fef,F,Senior,High Income,2017,transaction,132,,19.89,,,,,,,
3,78afa995795e4d85b5d9ceeca43f5fef,F,Senior,High Income,2017,offer completed,132,9b98b8c7a33c4b65b9aebfe6a799e6d9,,5.0,bogo,5.0,5.0,7.0,"['web', 'email', 'mobile']",bogo-5-5-7
4,78afa995795e4d85b5d9ceeca43f5fef,F,Senior,High Income,2017,transaction,510,,21.72,,,,,,,


In [38]:
#initial_report of df_final_offers
initial_report(df_final_offers)

 *** DATA CLEANING CHECKLIST ***
----------------------------------------
*** Structure:
- Total Rows: 61651
- Total Columns: 16
- Column Names: ['customer_id', 'gender', 'age_group', 'income_group', 'membership_year', 'event', 'time', 'offer_id', 'amount', 'reward_x', 'offer_type', 'difficulty', 'reward_y', 'duration', 'channels', 'offer_key']

*** Data Types:
  customer_id: object
  gender: object
  age_group: object
  income_group: category
  membership_year: int64
  event: object
  time: int64
  offer_id: object
  amount: float64
  reward_x: float64
  offer_type: object
  difficulty: object
  reward_y: object
  duration: object
  channels: object
  offer_key: object

*** Mixed Data Types:
  offer_id:
    - str: 32070
    - float: 29581
  offer_type:
    - str: 32070
    - float: 29581
  difficulty:
    - str: 32070
    - float: 29581
  reward_y:
    - str: 32070
    - float: 29581
  duration:
    - str: 32070
    - float: 29581
  channels:
    - str: 32070
    - float: 29581
  offe

In [39]:
#check whether reward_x and reward_y is same
df_final_offers['reward_y'] = pd.to_numeric(df_final_offers['reward_y'], errors='coerce')
df_not_same = df_final_offers[df_final_offers['reward_x'] != df_final_offers['reward_y']]

df_not_same

Unnamed: 0,customer_id,gender,age_group,income_group,membership_year,event,time,offer_id,amount,reward_x,offer_type,difficulty,reward_y,duration,channels,offer_key
0,0610b486422d4921ae7d2bf64640c50b,F,Older Adult,High Income,2017,transaction,528,,23.22,,,,,,,
2,78afa995795e4d85b5d9ceeca43f5fef,F,Senior,High Income,2017,transaction,132,,19.89,,,,,,,
4,78afa995795e4d85b5d9ceeca43f5fef,F,Senior,High Income,2017,transaction,510,,21.72,,,,,,,
7,e2127556f4f64592b11af22de27a7932,M,Senior,Middle Income,2018,transaction,522,,18.42,,,,,,,
10,389bc3fa690240e798340f5a15918d5c,M,Senior,Middle Income,2018,transaction,60,,9.54,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61641,9dc1421481194dcd9400aec7c9ae6366,F,Elderly,Middle Income,2016,transaction,360,,10.75,,,,,,,
61643,9dc1421481194dcd9400aec7c9ae6366,F,Elderly,Middle Income,2016,transaction,414,,11.03,,,,,,,
61645,9dc1421481194dcd9400aec7c9ae6366,F,Elderly,Middle Income,2016,transaction,594,,12.57,,,,,,,
61647,e4052622e5ba45a8b96b59aba68cf068,F,Older Adult,Middle Income,2017,transaction,54,,21.55,,,,,,,


In [40]:
#drop reward_y and rename reward_x to reward
df_final_offers = df_final_offers.drop(columns=['reward_y'])
df_final_offers.rename(columns={'reward_x': 'reward'}, inplace=True)
df_final_offers.head()


Unnamed: 0,customer_id,gender,age_group,income_group,membership_year,event,time,offer_id,amount,reward,offer_type,difficulty,duration,channels,offer_key
0,0610b486422d4921ae7d2bf64640c50b,F,Older Adult,High Income,2017,transaction,528,,23.22,,,,,,
1,0610b486422d4921ae7d2bf64640c50b,F,Older Adult,High Income,2017,offer completed,528,9b98b8c7a33c4b65b9aebfe6a799e6d9,,5.0,bogo,5.0,7.0,"['web', 'email', 'mobile']",bogo-5-5-7
2,78afa995795e4d85b5d9ceeca43f5fef,F,Senior,High Income,2017,transaction,132,,19.89,,,,,,
3,78afa995795e4d85b5d9ceeca43f5fef,F,Senior,High Income,2017,offer completed,132,9b98b8c7a33c4b65b9aebfe6a799e6d9,,5.0,bogo,5.0,7.0,"['web', 'email', 'mobile']",bogo-5-5-7
4,78afa995795e4d85b5d9ceeca43f5fef,F,Senior,High Income,2017,transaction,510,,21.72,,,,,,


In [43]:
#which offer type was more succesful
df_final_offers.offer_type.value_counts()

offer_type
discount    16970
bogo        15100
Name: count, dtype: int64

In [44]:
#which offer key was more succesful
df_final_offers.offer_key.value_counts()

offer_key
discount-10-2-10    4957
discount-7-3-7      4847
bogo-5-5-7          4141
bogo-5-5-5          4074
discount-10-2-7     3860
bogo-10-10-7        3604
discount-20-5-10    3306
bogo-10-10-5        3281
Name: count, dtype: int64