# Marketing Case Study

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt

import warnings
warnings.filterwarnings("ignore")

## Customer Segmentation

A key objective is to create a predictive model which allows the company to maximize the profits of the next marketing campaign.

In [None]:
# load data
with open("./data/marketing_data.csv", "r") as f:
    data = pd.read_csv(f)

data.drop(columns=['ID'], inplace=True)
data.drop_duplicates(inplace=True)
data.head()

In [None]:
def clean_income(df):
    df.rename(columns={" Income ":"Income"}, inplace=True)
    df['Income'] = df['Income'].str.replace("$","").str.replace(",","")
    df['Income'] = df['Income'].astype(float)
    return df

clean_income(data)
data['Income'] = data['Income'].fillna(data['Income'].median())

In [None]:
def group_mstatus(df):
    '''Function to consolidate the single-equivalent marital status options into one.'''
    solo_status = ['YOLO', 'Alone', 'Absurd']
    df['Marital_Status'] = df['Marital_Status'].replace(solo_status, 'Single')
    return df

group_mstatus(data)

In [None]:
# calculate age of customer
data['Age'] = dt.date.today() - data['Year_Birth']
data.drop(columns=['Year_Birth'], inplace=True)

In [None]:
# create dependents feature for kid and teen at home
data['Dependents'] = data['Kidhome'] + data['Teenhome']

In [1]:
# convert Dt_Customer to datetime object
data['Dt_Customer'] = pd.to_datetime(data['Dt_Customer'], format= '%m/%d/%y')
today = pd.to_datetime("today").strftime('%m-%d-%Y')

def customer_loyalty(df):
    ''' Calculate how long a customer has been part of a loyalty program (in years)'''
    # df['Length_customer'] = pd.to_datetime(dt.date.today()) - df['Dt_Customer']
    df['Length_customer'] = pd.to_datetime(today) - df['Dt_Customer']
    df['Length_customer'] = [float(str(df['Length_customer'][x])[:4]) for x in df.index]
    df['Length_customer'] = round(df['Length_customer']/365,2)
    df.rename(columns = {'Length_customer':'Length_customer_yrs'}, inplace = True)
    df.drop(columns='Dt_Customer', inplace = True)
    return df

customer_loyalty(data)

NameError: name 'pd' is not defined

In [None]:
# calculate total amount spent per customer
data['Total_Spent'] = np.sum(data.filter(regex='Mnt'), axis = 1)

# calculate percentage spent on each different category
for cat in data.filter(regex='Mnt', axis = 1).columns:
    if (cat != "Total_Spent"):
        data[cat] = round(data[cat] / data["Total_spent"] * 100, 2)

In [None]:
# total number of purchases made by customer

In [None]:
# total number of past accepted campaigns
data['Total_Accepted_Camp'] = np.sum(data.filter(regex="Cmp"), axis=1)

def get_cmp_cols(df):
    col_list = df.columns.to_list()
    return [col for col in col_list if 'Cmp' in col]

cmp_cols = get_cmp_cols(data)

In [None]:
# data.drop(columns=cmp_cols, inplace=True)

## Pre-Processing Data

Performing customer segmentation using machine learning requires some preprocessing of the data.
- First, if the data are skewed, then the data needs to be transformed
- Next, there are a few categorical features that need to be encoded
- Finally, the data are centered and scaled using StandardScaler() method.

In [None]:
# encode the education column
# data['Education'] = pd.get_dummies(data['Education'], prefix = 'edu')
# data['Marital_Status'] = pd.get_dummies(data['Marital_Status'], prefix = 'mar')

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, StandardScaler
from sklearn.cluster import KMeans


In [None]:
# use pipeline to prepare these steps for unsupervised learning

nom_cat = ['Marital_Status']
one_hot_encoder = OneHotEncoder(handle_unknown="ignore")

ord_cat = ['Education']
ord_encoder = OrdinalEncoder()

preprocessor = ColumnTransformer(
    transformers= [
        ("nom", one_hot_encoder, nom_cat),
        ("ord", ord_encoder, ord_cat)
    ]
)

In [None]:
clf = Pipeline(
    steps=[("preprocessor", preprocessor), ("scaler", )]
)

In [None]:
# data.drop(columns=['Education', 'Marital_Status'], inplace=True)