In [1]:
import numpy as np
import pandas as pd 
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity
import os
import sys
import ipywidgets as widgets
from IPython.display import clear_output
import pickle as pk
from IPython.display import HTML
import warnings
warnings.filterwarnings(action="ignore")

In [2]:
# LOAD DATA
with open('cusSegData.pk','rb') as f:
    kmean,scale = pk.load(f)
f.close()

In [3]:
def run(inp,kmean,scale):

    cols = ['CUST_ID',
    'BALANCE',
    'BALANCE_FREQUENCY',
    'PURCHASES',
    'ONEOFF_PURCHASES',
    'INSTALLMENTS_PURCHASES',
    'CASH_ADVANCE',
    'PURCHASES_FREQUENCY',
    'ONEOFF_PURCHASES_FREQUENCY',
    'PURCHASES_INSTALLMENTS_FREQUENCY',
    'CASH_ADVANCE_FREQUENCY',
    'CASH_ADVANCE_TRX',
    'PURCHASES_TRX',
    'CREDIT_LIMIT',
    'PAYMENTS',
    'MINIMUM_PAYMENTS',
    'PRC_FULL_PAYMENT',
    'TENURE']
    data = pd.DataFrame(inp.reshape(1,-1),columns=cols,dtype='float')
    x = rangeConv(data)
    x= scale.transform(x)
    pred = kmean.predict(x)

    print("Customer belongs to Cluster: ", pred.item())

In [4]:
def rangeConv(data):

    columns=['BALANCE', 'PURCHASES', 'ONEOFF_PURCHASES', 'INSTALLMENTS_PURCHASES', 'CASH_ADVANCE', 'CREDIT_LIMIT',
        'PAYMENTS', 'MINIMUM_PAYMENTS']

    for c in columns:
        
        Range=c+'_RANGE'
        data[Range]=0        
        data.loc[((data[c]>0)&(data[c]<=500)),Range]=1
        data.loc[((data[c]>500)&(data[c]<=1000)),Range]=2
        data.loc[((data[c]>1000)&(data[c]<=3000)),Range]=3
        data.loc[((data[c]>3000)&(data[c]<=5000)),Range]=4
        data.loc[((data[c]>5000)&(data[c]<=10000)),Range]=5
        data.loc[((data[c]>10000)),Range]=6

    columns=['BALANCE_FREQUENCY', 'PURCHASES_FREQUENCY', 'ONEOFF_PURCHASES_FREQUENCY', 'PURCHASES_INSTALLMENTS_FREQUENCY', 
            'CASH_ADVANCE_FREQUENCY', 'PRC_FULL_PAYMENT']

    for c in columns:
        
        Range=c+'_RANGE'
        data[Range]=0
        data.loc[((data[c]>0)&(data[c]<=0.1)),Range]=1
        data.loc[((data[c]>0.1)&(data[c]<=0.2)),Range]=2
        data.loc[((data[c]>0.2)&(data[c]<=0.3)),Range]=3
        data.loc[((data[c]>0.3)&(data[c]<=0.4)),Range]=4
        data.loc[((data[c]>0.4)&(data[c]<=0.5)),Range]=5
        data.loc[((data[c]>0.5)&(data[c]<=0.6)),Range]=6
        data.loc[((data[c]>0.6)&(data[c]<=0.7)),Range]=7
        data.loc[((data[c]>0.7)&(data[c]<=0.8)),Range]=8
        data.loc[((data[c]>0.8)&(data[c]<=0.9)),Range]=9
        data.loc[((data[c]>0.9)&(data[c]<=1.0)),Range]=10

    columns=['PURCHASES_TRX', 'CASH_ADVANCE_TRX']  

    for c in columns:
        
        Range=c+'_RANGE'
        data[Range]=0
        data.loc[((data[c]>0)&(data[c]<=5)),Range]=1
        data.loc[((data[c]>5)&(data[c]<=10)),Range]=2
        data.loc[((data[c]>10)&(data[c]<=15)),Range]=3
        data.loc[((data[c]>15)&(data[c]<=20)),Range]=4
        data.loc[((data[c]>20)&(data[c]<=30)),Range]=5
        data.loc[((data[c]>30)&(data[c]<=50)),Range]=6
        data.loc[((data[c]>50)&(data[c]<=100)),Range]=7
        data.loc[((data[c]>100)),Range]=8

    data.drop(['CUST_ID', 'BALANCE', 'BALANCE_FREQUENCY', 'PURCHASES',
        'ONEOFF_PURCHASES', 'INSTALLMENTS_PURCHASES', 'CASH_ADVANCE',
        'PURCHASES_FREQUENCY',  'ONEOFF_PURCHASES_FREQUENCY',
        'PURCHASES_INSTALLMENTS_FREQUENCY', 'CASH_ADVANCE_FREQUENCY',
        'CASH_ADVANCE_TRX', 'PURCHASES_TRX', 'CREDIT_LIMIT', 'PAYMENTS',
        'MINIMUM_PAYMENTS', 'PRC_FULL_PAYMENT' ], axis=1, inplace=True)

    X= np.asarray(data)

    return X

<h2 align="middle"><b><font size="10">CUSTOMER SEGMENTATION USING CREDIT CARD BEHAVIOUR</b></h2>

In [32]:
label = widgets.HTML(
    value="<font size='3'> This is a customer segmentation tool to cluster customers based on their credit card behaviour. This tool is based on a financial dataset and provides marketing strategies to banks to cater to the needs of their customer. Unsupervised K-means algorithm is used to segment about 9000 customers into 4 clusters based on their credit card data. Using these clusters, banks can create different programs catering to a specific customer group. As shown in the below figure we can see clear clusters forming, suggesting different credit card behaviours.",
    align= 'center',
    positioning = 'top',
    placeholder='',
    description='',
)

display(label)

HTML(value="<font size='3'> This is a customer segmentation tool to cluster customers based on their credit ca…

In [13]:
html1 = '<img src="cusSegGraph.jpg" width="900" height="900" align="middle"/>'
HTML(html1)

In [33]:
label = widgets.HTML(
    value="<font size='3'> Based on a new input, a prediction for the cluster can be made using the deployed clustering model. Enter the feature values of the customer and hit predict to get the group allocated to the customer.",
    align= 'center',
    positioning = 'top',
    placeholder='',
    description='',
)

display(label)

HTML(value="<font size='3'> Based on a new input, a prediction for the cluster can be made using the deployed …

In [34]:
cols=['BALANCE', 'BALANCE_FREQUENCY', 'PURCHASES',
       'ONEOFF_PURCHASES', 'INSTALLMENTS_PURCHASES', 'CASH_ADVANCE',
       'PURCHASES_FREQUENCY', 'ONEOFF_PURCHASES_FREQUENCY',
       'PURCHASES_INSTALLMENTS_FREQUENCY', 'CASH_ADVANCE_FREQUENCY',
       'CASH_ADVANCE_TRX', 'PURCHASES_TRX', 'CREDIT_LIMIT', 'PAYMENTS',
       'MINIMUM_PAYMENTS', 'PRC_FULL_PAYMENT', 'TENURE']
dictInps = {k:None for k in cols}
maxinps = [19043.13856,
 1.0,
 49039.57,
 40761.25,
 22500.0,
 47137.211760000006,
 1.0,
 1.0,
 1.0,
 1.5,
 123,
 358,
 30000.0,
 50721.483360000006,
 76406.20752000001,
 1.0,
 12]
mininps=[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0,
 0,
 50.0,
 0.0,
 0.019163,
 0.0,
 6]

In [7]:
for c in cols:
    dictInps[c] =widgets.FloatSlider(
        value=0.1,
        min=mininps[cols.index(c)],
        max=maxinps[cols.index(c)],
        step=0.02,
        description=c+":",
        disabled=False,
        continuous_update=False,
        orientation='horizontal',
        readout=True,
        readout_format='.2f',
    )

In [31]:
label = widgets.HTML(
    value="<font size='3'> The following input parameters can be customised to predict cluster:<ul> \
    <li> BALANCE : Balance amount left in their account to make purchases </li> \
    <li> BALANCEFREQUENCY : How frequently the Balance is updated, score between 0 and 1 (1 = frequently updated, 0 = not frequently updated) </li> \
    <li> PURCHASES : Amount of purchases made from account </li>\
    <li> ONEOFFPURCHASES : Maximum purchase amount done in one-go </li> \
    <li> INSTALLMENTSPURCHASES : Amount of purchase done in installment   </li> \
    <li> CASHADVANCE : Cash in advance given by the user  </li> \
    <li> PURCHASESFREQUENCY : How frequently the Purchases are being made, score between 0 and 1 (1 = frequently purchased, 0 = not frequently purchased) </li> \
    <li> ONEOFFPURCHASESFREQUENCY : How frequently Purchases are happening in one-go (1 = frequently purchased, 0 = not frequently purchased   </li> \
    <li> PURCHASESINSTALLMENTSFREQUENCY : How frequently purchases in installments are being done (1 = frequently done, 0 = not frequently done)   </li> \
    <li> CASHADVANCEFREQUENCY : How frequently the cash in advance being paid   </li> \
    <li> CASHADVANCETRX : Number of Transactions made with 'Cash in Advanced'  </li> \
    <li> PURCHASESTRX : Numbe of purchase transactions made   </li> \
    <li> CREDITLIMIT : Limit of Credit Card for user   </li> \
    <li> PAYMENTS : Amount of Payment done by user  </li> \
    <li> MINIMUM_PAYMENTS : Minimum amount of payments made by user   </li> \
    <li> PRCFULLPAYMENT : Percent of full payment paid by user   </li> \
    <li> TENURE : Tenure of credit card service for user   </li> </ul>",
    align= 'center',
    positioning = 'top',
    placeholder='',
    description='',
)

display(label)

HTML(value="<font size='3'> The following input parameters can be customised to predict cluster:<ul>     <li> …

<h2><b><font size="4">Change the following parameters to predict customer group:</b></h2>

In [8]:
for c in cols:
    display(dictInps[c])

FloatSlider(value=0.1, continuous_update=False, description='BALANCE:', max=19043.13856, step=0.02)

FloatSlider(value=0.1, continuous_update=False, description='BALANCE_FREQUENCY:', max=1.0, step=0.02)

FloatSlider(value=0.1, continuous_update=False, description='PURCHASES:', max=49039.57, step=0.02)

FloatSlider(value=0.1, continuous_update=False, description='ONEOFF_PURCHASES:', max=40761.25, step=0.02)

FloatSlider(value=0.1, continuous_update=False, description='INSTALLMENTS_PURCHASES:', max=22500.0, step=0.02)

FloatSlider(value=0.1, continuous_update=False, description='CASH_ADVANCE:', max=47137.211760000006, step=0.02…

FloatSlider(value=0.1, continuous_update=False, description='PURCHASES_FREQUENCY:', max=1.0, step=0.02)

FloatSlider(value=0.1, continuous_update=False, description='ONEOFF_PURCHASES_FREQUENCY:', max=1.0, step=0.02)

FloatSlider(value=0.1, continuous_update=False, description='PURCHASES_INSTALLMENTS_FREQUENCY:', max=1.0, step…

FloatSlider(value=0.1, continuous_update=False, description='CASH_ADVANCE_FREQUENCY:', max=1.5, step=0.02)

FloatSlider(value=0.1, continuous_update=False, description='CASH_ADVANCE_TRX:', max=123.0, step=0.02)

FloatSlider(value=0.1, continuous_update=False, description='PURCHASES_TRX:', max=358.0, step=0.02)

FloatSlider(value=50.0, continuous_update=False, description='CREDIT_LIMIT:', max=30000.0, min=50.0, step=0.02…

FloatSlider(value=0.1, continuous_update=False, description='PAYMENTS:', max=50721.483360000006, step=0.02)

FloatSlider(value=0.1, continuous_update=False, description='MINIMUM_PAYMENTS:', max=76406.20752000001, min=0.…

FloatSlider(value=0.1, continuous_update=False, description='PRC_FULL_PAYMENT:', max=1.0, step=0.02)

FloatSlider(value=6.0, continuous_update=False, description='TENURE:', max=12.0, min=6.0, step=0.02)

In [9]:
def get_vals(dictInps,cols):
    out = [1]
    for c in cols:
        out.append(dictInps[c].value)
    return np.array(out)

In [10]:
go_button =widgets.Button(
    description='PREDICT',
    disabled=False,
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click to predict the customer cluster',
    icon=''
)
go_output = widgets.Output()

def go_button_clicked(b):
    with go_output:
        clear_output()
        inp = get_vals(dictInps,cols)
        run(inp,kmean,scale)
        
go_button.on_click(go_button_clicked)

display(go_button,go_output)

Button(button_style='success', description='PREDICT', style=ButtonStyle(), tooltip='Click to predict the custo…

Output()