# Predict Customer Personality to Boost Marketing Campaign by Using Machine Learning

## Task 1 : Conversion Rate Analysis Based On Income, Spending And Age
Goals : Find a pattern of consumer behavior.<br>
Objective : 
- Feature engineering 
- Analyze Conversion Rate with other variables such as age, income, expenses, etc 

### Import Library

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### Load Data

In [None]:
pd.set_option('display.max_columns', None)
df = pd.read_csv('./data/marketing_campaign_data.csv')
df.sample(10)

In [None]:
df.sample(20)

### Feature Engineering
New Features :
- Age                = age for each customer
- AgeGroup           = age group for better interpretation in analysis ahead
- HasKid             = the customer has kid or not
- TotalAcceptedCmp   = How many campaigns does the customer receive after the campaign is carried out?
- Conversions        = What actions do they take while visiting our platform? 
- ConversionRate     = Records the percentage of customers who have completed a desired action

In [None]:
# make a copy of df for feature engineering
dfe = df.copy()
dfe.drop(columns='Unnamed: 0', inplace=True)

# new column age
dfe['Age'] = 2024 - dfe['Year_Birth']

# new column age group
age_grouping = [
    (dfe['Age'] >= 60),
    (dfe['Age'] >= 40 ) & (dfe['Age'] < 60),
    (dfe['Age'] >= 28) & (dfe['Age'] < 40)
]
age_category = ['Old Adults', 'Middled-aged Adults', 'Young Adults']
dfe['AgeGroup'] = np.select(age_grouping, age_category)

# new column HasKid
def has_kid(row):
    if row['Kidhome'] > 0 or row['Teenhome'] > 0:
        return 'yes'
    else:
        return 'no'
dfe['Parent'] = dfe.apply(has_kid, axis=1)

# Num child column
dfe['NumChild'] = dfe['Kidhome'] + dfe['Teenhome']

# new column TotalAcceptedCmp
dfe['TotalAcceptedCmp'] = dfe['AcceptedCmp1'] + dfe['AcceptedCmp2'] + dfe['AcceptedCmp3'] + dfe['AcceptedCmp4'] + dfe['AcceptedCmp5']

# new column TotalSpending
dfe['TotalSpending'] = dfe['MntCoke'] + dfe['MntFruits'] + dfe['MntMeatProducts'] + dfe['MntFishProducts'] + dfe['MntSweetProducts'] + dfe['MntGoldProds']

# Total Transaction column
dfe['TotalTrx'] = dfe['NumDealsPurchases'] + dfe['NumWebPurchases'] + dfe['NumCatalogPurchases'] + dfe['NumStorePurchases']

# Online Transaction column
dfe['OnlineTrx'] = dfe['NumDealsPurchases'] + dfe['NumWebPurchases']

# Fix NumWebVisit value where online transaction and accepted campaign more than 1
def webvisit(data):
    if data['NumWebVisitsMonth'] == 0 and data['OnlineTrx'] > 0 and data['TotalAcceptedCmp'] > 0:
        return 1
    else:
        return data['NumWebVisitsMonth']
dfe['NumWebVisitsMonth'] = dfe.apply(webvisit, axis=1)

# ConversionRate column
dfe['ConversionRate'] =  dfe['OnlineTrx'] / dfe['NumWebVisitsMonth'] + dfe['TotalAcceptedCmp']

In [None]:
dfe[['Education', 'Marital_Status', 'Income','Recency','NumWebVisitsMonth',
       'Complain', 'Z_CostContact', 'Z_Revenue', 'Response',
       'Age', 'AgeGroup', 'Parent', 'NumChild', 'TotalAcceptedCmp',
       'TotalSpending', 'TotalTrx', 'OnlineTrx', 'ConversionRate']].sample(10)

<!-- what do we want from the customer to do/ what we desired that the customer will do (conversion)?
- Spending on our platform
- Generate Online Transaction
- Accept our campaign
- 

What's our effort so the customer do the conversion?
-  -->

In [None]:
dfe.describe()

### EDA

In [None]:
plt.figure(figsize=(12,8), facecolor='lightblue')
sns.scatterplot(x='Income', y='ConversionRate', data=dfe, color='green')
sns.despine()
plt.xlim(0, 200000000)
plt.ylim(0, 10)
plt.title('Correlation Between Conversion Rate and Income', fontsize=19, fontweight='bold', y=1.05)

In [None]:
plt.figure(figsize=(12,8), facecolor='lightblue')
sns.scatterplot(x='Income', y='ConversionRate', data=dfe, color='green')
sns.despine()
plt.xlim(0, 200000000)
plt.ylim(0, 10)
plt.title('Correlation Between Conversion Rate and Income', fontsize=19, fontweight='bold', y=1.05)

In [None]:
plt.figure(figsize=(10,8))
sns.scatterplot(x='Age', y='ConversionRate', data=dfe)
plt.title('Correlation Between Conversion Rate and Age', fontsize=18, fontweight='bold')

In [None]:
plt.figure(figsize=(10,8))
sns.scatterplot(x='TotalSpending', y='ConversionRate', data=dfe)
# plt.ylim(0, 10)
plt.title('Correlation Between Conversion Rate and Total Spending', fontsize=18, fontweight='bold')
# plt.text(50, 365, 'Correlated+', color='green', fontsize=13, fontstyle='oblique')

In [None]:
plt.figure(figsize=(10,8))
sns.barplot(x='AgeGroup', y='ConversionRate', data=dfe, palette='Set1', errorbar=None)

In [None]:
plt.figure(figsize=(10,8))

sns.barplot(x='Education', y='ConversionRate', data=dfe, palette='Set1', errorbar=None)

In [None]:
num = dfe.select_dtypes(['int64', 'float64'])
plt.figure(figsize=(20, 8))
sns.heatmap(num.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()