# Customer Propensity Modelling

## Data Visualization and Analysis

In [102]:
import pandas as pd
import numpy as np
import seaborn as sns
import scipy.stats as stats

import plotly.express as px
import plotly.graph_objects as go
from jupyter_dash import JupyterDash
from dash import Dash, dcc, html
from dash.dependencies import Input, Output

In [62]:
marketing_df = pd.read_csv('./Data/cleaned_marketing_engineered.csv')

In [4]:
marketing_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2240 entries, 0 to 2239
Data columns (total 28 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Education            2240 non-null   object 
 1   Marital_Status       2240 non-null   object 
 2   Income               2240 non-null   float64
 3   Dt_Customer          2240 non-null   object 
 4   Recency              2240 non-null   int64  
 5   MntWines             2240 non-null   float64
 6   MntFruits            2240 non-null   float64
 7   MntMeatProducts      2240 non-null   float64
 8   MntFishProducts      2240 non-null   float64
 9   MntSweetProducts     2240 non-null   float64
 10  MntGoldProds         2240 non-null   float64
 11  NumDealsPurchases    2240 non-null   int64  
 12  NumWebPurchases      2240 non-null   int64  
 13  NumCatalogPurchases  2240 non-null   int64  
 14  NumStorePurchases    2240 non-null   int64  
 15  NumWebVisitsMonth    2240 non-null   i

In [5]:
products_list = ['MntWines', 'MntFruits', 'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts', 'MntGoldProds'] 
products_list_name = ['Wine', 'Fruits', 'Meat', 'Fish', 'Sweet', 'Gold']

percentage_of_product_sales_list = [round((marketing_df[i].sum(axis=0) / marketing_df.TotalMnt.sum(axis=0))*100, 2) for i in products_list]

In [6]:
app = JupyterDash(__name__)

fig = px.bar(x = products_list_name, 
             y = percentage_of_product_sales_list, 
             color = percentage_of_product_sales_list,
             labels = {'x' : 'Products', 'y' : 'Percentage of Product Sales'},
             title = 'Percentage of Product Sales vs Products',
             text_auto = True)

fig.update_layout(xaxis={'categoryorder':'total descending'})

app.layout = html.Div([
    dcc.Graph(figure=fig)
])

app.run_server(mode='inline')

In [7]:
purchase_type_list = ['NumWebPurchases', 'NumStorePurchases', 'NumCatalogPurchases']
purchase_type_list_name = ['Web Purchases', 'Store Purchases', 'Catalog Purchases']

percentage_of_purchase_sales_list = [round((marketing_df[i].sum(axis=0) / marketing_df.TotalPurchases.sum(axis=0))*100, 2) for i in purchase_type_list]

In [8]:
app = JupyterDash(__name__)

fig = px.bar(x = purchase_type_list_name, 
             y = percentage_of_purchase_sales_list , 
             color = percentage_of_purchase_sales_list ,
             labels = {'x' : 'Purchase Funnels', 'y' : 'Total Number of Purchases'},
             title = 'Percentage of Total Number of Purchases vs Purchase Funnel',
             text_auto = True)

fig.update_layout(xaxis={'categoryorder':'total descending'})

app.layout = html.Div([
    dcc.Graph(figure=fig)
])

app.run_server(mode='inline')

In [9]:
correlation_check_columns_list = []
for column in marketing_df.columns.tolist():
    if marketing_df[column].dtypes in ['float32', 'float64', 'int32', 'int64']:
        if marketing_df[column].nunique() >= 3:
            correlation_check_columns_list.append(column)

In [10]:
correlation_df = marketing_df.copy()
correlation_df = correlation_df.loc[:, correlation_check_columns_list]

In [11]:
def correlation_threshold_background(cell_value):

    greater = 'background-color: green;'
    lower = 'background-color: red;'
    default = ''

    if type(cell_value) in [float, int]:
        if cell_value >= 0.7:
            return greater
        elif cell_value <= -0.6:
            return lower
    return default

correlation_df.corr().style.applymap(correlation_threshold_background)

Unnamed: 0,Income,Recency,MntWines,MntFruits,MntMeatProducts,MntFishProducts,MntSweetProducts,MntGoldProds,NumDealsPurchases,NumWebPurchases,NumCatalogPurchases,NumStorePurchases,NumWebVisitsMonth,Minorshome,TotalMnt,TotalPurchases,TotalCampPar,Age
Income,1.0,0.005502,0.719414,0.530146,0.68168,0.544097,0.543625,0.406891,-0.122392,0.482107,0.685688,0.667524,-0.647464,-0.347911,0.814439,0.765232,0.382722,0.209738
Recency,0.005502,1.0,0.016064,-0.004306,0.023056,0.001079,0.02267,0.016693,-0.001098,-0.010726,0.02511,0.000799,-0.021445,0.018053,0.020433,0.00641,-0.01257,0.018347
MntWines,0.719414,0.016064,1.0,0.389637,0.562667,0.399753,0.386581,0.387516,0.01094,0.542265,0.635226,0.6421,-0.320653,-0.351909,0.891839,0.75649,0.512358,0.152842
MntFruits,0.530146,-0.004306,0.389637,1.0,0.543105,0.594804,0.567164,0.392995,-0.132114,0.296735,0.487917,0.461758,-0.418383,-0.394853,0.614229,0.520686,0.161002,0.011817
MntMeatProducts,0.68168,0.023056,0.562667,0.543105,1.0,0.568402,0.523846,0.350609,-0.122415,0.293761,0.723827,0.479659,-0.53947,-0.502208,0.842965,0.623315,0.309313,0.026214
MntFishProducts,0.544097,0.001079,0.399753,0.594804,0.568402,1.0,0.57987,0.422875,-0.139361,0.293681,0.534478,0.459855,-0.446003,-0.425503,0.642818,0.537538,0.17783,0.038442
MntSweetProducts,0.543625,0.02267,0.386581,0.567164,0.523846,0.57987,1.0,0.369724,-0.1201,0.348544,0.490924,0.448756,-0.423294,-0.383137,0.603016,0.536019,0.200237,0.014975
MntGoldProds,0.406891,0.016693,0.387516,0.392995,0.350609,0.422875,0.369724,1.0,0.049085,0.421836,0.437697,0.381678,-0.250719,-0.266095,0.524262,0.512427,0.193741,0.050598
NumDealsPurchases,-0.122392,-0.001098,0.01094,-0.132114,-0.122415,-0.139361,-0.1201,0.049085,1.0,0.234185,-0.008617,0.068879,0.347633,0.439684,-0.065112,0.117887,-0.123949,0.055258
NumWebPurchases,0.482107,-0.010726,0.542265,0.296735,0.293761,0.293681,0.348544,0.421836,0.234185,1.0,0.378376,0.502713,-0.055846,-0.146361,0.519837,0.765923,0.191544,0.142002


In [188]:
app = JupyterDash(__name__)

app.layout = html.Div([
    html.Div([

        html.Div([
            dcc.Dropdown(
                correlation_df.columns.tolist(),
                'Income',
                id='xaxis-column'
            ),
        ], style={'width': '48%', 'display': 'inline-block'}),

        html.Div([
            dcc.Dropdown(
                correlation_df.columns.tolist(),
                'Income',
                id='yaxis-column'
            ),
        ], style={'width': '48%', 'float': 'right', 'display': 'inline-block'})
    ]),
    
    dcc.Graph(id = 'correlation-scatter-plot', figure=fig)
])

@app.callback(
    Output('correlation-scatter-plot', 'figure'),
    Input('xaxis-column', 'value'),
    Input('yaxis-column', 'value'))

def update_graph(xaxis_column_name, yaxis_column_name):

    fig = px.scatter(x = marketing_df[xaxis_column_name],
                     y = marketing_df[yaxis_column_name], 
                     trendline = 'ols', 
                     labels = {'x' :  xaxis_column_name, 'y': yaxis_column_name}, 
                     title = yaxis_column_name + ' vs ' + xaxis_column_name
                     )

    fig.update_layout(margin={'l': 40, 'b': 40, 't': 60, 'r': 0}, hovermode='closest')

    return fig

app.run_server(mode='inline')

In [24]:
number_of_complaints = marketing_df.Complain.sum(axis = 0)

complaints_percentage = round((number_of_complaints / len(marketing_df))*100, 2)
non_complaints_percentage = 100 - complaints_percentage

In [43]:
app = JupyterDash(__name__)

labels = ['No Complaints', 'Complaints']
values = [non_complaints_percentage, complaints_percentage]

fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.5, pull=[0, 0.1], name="Percentage of Complaints")])

fig.update_traces(hoverinfo="label+percent+name")

fig.update_layout(
    title_text="Percentage of Complaints in the last two years",
    
    annotations=[dict(text='Complaints', x=0.5, y=0.5, font_size=20, showarrow=False)]
)

app.layout = html.Div([
    dcc.Graph(figure=fig)
])

app.run_server(mode='inline')

In [63]:
app = JupyterDash(__name__)

app.layout = html.Div([
    html.Div([

        html.Div([
            dcc.Dropdown(
                correlation_df.columns.tolist() + ['Response'],
                'Income',
                id='xaxis-column'
            ),
        ], style={'width': '48%', 'display': 'inline-block'}),

        html.Div([
            dcc.Dropdown(
                correlation_df.columns.tolist() + ['Response'],
                'Income',
                id='yaxis-column'
            ),
        ], style={'width': '48%', 'float': 'right', 'display': 'inline-block'})
    ]),
    
    dcc.Graph(id = 'box-plot', figure=fig)
])

@app.callback(
    Output('box-plot', 'figure'),
    Input('xaxis-column', 'value'),
    Input('yaxis-column', 'value'))

def update_graph(xaxis_column_name, yaxis_column_name):

    fig = px.box(x = marketing_df[xaxis_column_name],
                 y = marketing_df[yaxis_column_name], 
                 labels = {'x' :  xaxis_column_name, 'y': yaxis_column_name}, 
                 title = yaxis_column_name + ' vs ' + xaxis_column_name + '  - Box Plot'
                )

    fig.update_layout(margin={'l': 40, 'b': 40, 't': 60, 'r': 0}, hovermode='closest')

    return fig

app.run_server(mode='inline')

In [190]:
def relation_categorical_testing(df, row, col):
    col_levels = df[col].unique().tolist()
    row_levels = df[row].unique().tolist()

    count_list = []
    count_level_list = []
    cross_tab_data_list = []
    for i in row_levels:
        for j in col_levels:
            count_level_list.append((i,j))
            count_list.append(len(df.loc[(df[row] == i) & (df[col] == j)]))

    for i, j in zip(count_level_list, count_list):
        for k in range(j):
            cross_tab_data_list.append([i[0], i[1]])

    df = pd.DataFrame(cross_tab_data_list, columns = [row, col]) 

    data_crosstab = pd.crosstab(df[row],
                                df[col],
                               margins=True, margins_name="Total")

    # significance level
    alpha = 0.05

    # Calcualtion of Chisquare
    chi_square = 0
    rows = df[row].unique()
    columns = df[col].unique()
    for i in col_levels:
        for j in row_levels:
            O = data_crosstab[i][j]
            E = data_crosstab[i]['Total'] * data_crosstab['Total'][j] / data_crosstab['Total']['Total']
            chi_square += (O-E)**2/E

    # The p-value approach
    print("Approach : The p-value approach to hypothesis testing in the decision rule")
    p_value = 1 - stats.chi2.cdf(chi_square, (len(row_levels)-1)*(len(col_levels)-1))
    conclusion = "Failed to reject the null hypothesis."
    if p_value <= alpha:
        
        data_crosstab_without_margins = pd.crosstab(df[row],
                                df[col],
                               margins=False)
    
    
        expected_crosstab = pd.DataFrame(
                                    columns = data_crosstab_without_margins.columns.tolist(), 
                                    index = data_crosstab_without_margins.index.tolist(),
                                    data = stats.chi2_contingency(observed = data_crosstab_without_margins)[3] 
                                    )
        chi_square_table = (data_crosstab_without_margins - expected_crosstab)**2 / expected_crosstab
        fig = px.imshow(chi_square_table, text_auto=True, aspect="auto")
        fig.show()
        
        conclusion = "Null Hypothesis is rejected."

    print("chisquare-score is:", chi_square, "and p value is:", p_value)
    print(conclusion)

> Null Hypothesis - No Relationship between the given categorical columns

In [189]:
relation_categorical_testing(marketing_df, 'TotalCampPar', 'Response')

Approach : The p-value approach to hypothesis testing in the decision rule


chisquare-score is: 408.01229650215237  and p value is: 0.0
Null Hypothesis is rejected.


In [193]:
campaigns_list = ['AcceptedCmp1', 'AcceptedCmp2', 'AcceptedCmp3', 'AcceptedCmp4', 'AcceptedCmp5', 'Response'] 
campaigns_list_name = ['1st Campaign', '2nd Campaign', '3rd Campaign', '4th Campaign', '5th Campaign', 'Current Campaign']

percentage_of_accepted_campaigns_list = [round((marketing_df[i].sum(axis=0) / len(marketing_df))*100, 2) for i in campaigns_list]

In [195]:
app = JupyterDash(__name__)

fig = px.bar(x = campaigns_list_name, 
             y = percentage_of_accepted_campaigns_list, 
             color = percentage_of_accepted_campaigns_list,
             labels = {'x' : 'Campaign Number', 'y' : 'Percentage of Accepted Campaigns'},
             title = 'Accepted Campaigns vs Campaign Number',
             text_auto = True)

fig.update_layout(xaxis={'categoryorder':'total descending'})

app.layout = html.Div([
    dcc.Graph(figure=fig)
])

app.run_server(mode='inline')