In [4]:
### DATA Preparation:

### DATA DICTIONARY:

***CUST_ID:*** Credit card holder ID
***BALANCE:*** Monthly average balance (based on daily balance averages)
***BALANCE_FREQUENCY:*** Ratio of last 12 months with balance
***PURCHASES:*** Total purchase amount spent during last 12 months
***ONEOFF_PURCHASES:*** Total amount of one-off purchases
***INSTALLMENTS_PURCHASES:*** Total amount of installment purchases
***CASH_ADVANCE:*** Total cash-advance amount
***PURCHASES_ FREQUENCY:*** Frequency of purchases (Percent of months with at least one purchase)
***ONEOFF_PURCHASES_FREQUENCY:*** Frequency of one-off-purchases PURCHASES_INSTALLMENTS_FREQUENCY: Frequency of installment purchases
***CASH_ADVANCE_ FREQUENCY:*** Cash-Advance frequency
***AVERAGE_PURCHASE_TRX:*** Average amount per purchase transaction
***CASH_ADVANCE_TRX:*** Average amount per cash-advance transaction
***PURCHASES_TRX:*** Average amount per purchase transaction
***CREDIT_LIMIT:*** Credit limit
***PAYMENTS:*** Total payments (due amount paid by the customer to decrease their statement balance) in the period
***MINIMUM_PAYMENTS:*** Total minimum payments due in the period.
***PRC_FULL_PAYMEN:*** Percentage of months with full payment of the due statement balance
***TENURE:*** Number of months as a customer

In [None]:
### Import Library

import pandas as pd
import numpy as np
import glob as gl

import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats

from math import sqrt

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
### Load CSV data
credit= pd.read_csv("Credit_Cards.csv")
credit.head()

In [None]:
### Load data info/describe
credit.info()

In [9]:
credit.describe()

Unnamed: 0,BALANCE,BALANCE_FREQUENCY,PURCHASES,ONEOFF_PURCHASES,INSTALLMENTS_PURCHASES,CASH_ADVANCE,PURCHASES_FREQUENCY,ONEOFF_PURCHASES_FREQUENCY,PURCHASES_INSTALLMENTS_FREQUENCY,CASH_ADVANCE_FREQUENCY,CASH_ADVANCE_TRX,PURCHASES_TRX,CREDIT_LIMIT,PAYMENTS,MINIMUM_PAYMENTS,PRC_FULL_PAYMENT,TENURE
count,8950.0,8950.0,8950.0,8950.0,8950.0,8950.0,8950.0,8950.0,8950.0,8950.0,8950.0,8950.0,8949.0,8950.0,8637.0,8950.0,8950.0
mean,1564.474828,0.877271,1003.204834,592.437371,411.067645,978.871112,0.490351,0.202458,0.364437,0.135144,3.248827,14.709832,4494.44945,1733.143852,864.206542,0.153715,11.517318
std,2081.531879,0.236904,2136.634782,1659.887917,904.338115,2097.163877,0.401371,0.298336,0.397448,0.200121,6.824647,24.857649,3638.815725,2895.063757,2372.446607,0.292499,1.338331
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,50.0,0.0,0.019163,0.0,6.0
25%,128.281915,0.888889,39.635,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,1.0,1600.0,383.276166,169.123707,0.0,12.0
50%,873.385231,1.0,361.28,38.0,89.0,0.0,0.5,0.083333,0.166667,0.0,0.0,7.0,3000.0,856.901546,312.343947,0.0,12.0
75%,2054.140036,1.0,1110.13,577.405,468.6375,1113.821139,0.916667,0.3,0.75,0.222222,4.0,17.0,6500.0,1901.134317,825.485459,0.142857,12.0
max,19043.13856,1.0,49039.57,40761.25,22500.0,47137.21176,1.0,1.0,1.0,1.5,123.0,358.0,30000.0,50721.48336,76406.20752,1.0,12.0


In [None]:
credit.shape

In [15]:
# Find the total number of missing values in the dataframe
print ("\nNumber of Missing values :  ", credit.isnull().sum().values.sum())


Number of Missing values :   314


In [16]:
# printing total numbers of Unique value in the dataframe. 
print ("\nNumber of Unique values : \n",credit.nunique())


Number of Unique values : 
 CUST_ID                             8950
BALANCE                             8871
BALANCE_FREQUENCY                     43
PURCHASES                           6203
ONEOFF_PURCHASES                    4014
INSTALLMENTS_PURCHASES              4452
CASH_ADVANCE                        4323
PURCHASES_FREQUENCY                   47
ONEOFF_PURCHASES_FREQUENCY            47
PURCHASES_INSTALLMENTS_FREQUENCY      47
CASH_ADVANCE_FREQUENCY                54
CASH_ADVANCE_TRX                      65
PURCHASES_TRX                        173
CREDIT_LIMIT                         205
PAYMENTS                            8711
MINIMUM_PAYMENTS                    8636
PRC_FULL_PAYMENT                      47
TENURE                                 7
dtype: int64


In [17]:
### 1. Missing Value problem 
### Solution: imputing them with median.

credit.isnull().any()

CUST_ID                             False
BALANCE                             False
BALANCE_FREQUENCY                   False
PURCHASES                           False
ONEOFF_PURCHASES                    False
INSTALLMENTS_PURCHASES              False
CASH_ADVANCE                        False
PURCHASES_FREQUENCY                 False
ONEOFF_PURCHASES_FREQUENCY          False
PURCHASES_INSTALLMENTS_FREQUENCY    False
CASH_ADVANCE_FREQUENCY              False
CASH_ADVANCE_TRX                    False
PURCHASES_TRX                       False
CREDIT_LIMIT                         True
PAYMENTS                            False
MINIMUM_PAYMENTS                     True
PRC_FULL_PAYMENT                    False
TENURE                              False
dtype: bool

In [20]:
### Missing values we need to remove with median.

# CREDIT_LIMIT  
credit['CREDIT_LIMIT'].fillna(credit['CREDIT_LIMIT'].median(),inplace=True)
credit['CREDIT_LIMIT'].count()

# MINIMUM_PAYMENTS
credit['MINIMUM_PAYMENTS'].median()
credit['MINIMUM_PAYMENTS'].fillna(credit['MINIMUM_PAYMENTS'].median(),inplace=True)

In [21]:
# Now again check the missing values.
credit.isnull().any()

CUST_ID                             False
BALANCE                             False
BALANCE_FREQUENCY                   False
PURCHASES                           False
ONEOFF_PURCHASES                    False
INSTALLMENTS_PURCHASES              False
CASH_ADVANCE                        False
PURCHASES_FREQUENCY                 False
ONEOFF_PURCHASES_FREQUENCY          False
PURCHASES_INSTALLMENTS_FREQUENCY    False
CASH_ADVANCE_FREQUENCY              False
CASH_ADVANCE_TRX                    False
PURCHASES_TRX                       False
CREDIT_LIMIT                        False
PAYMENTS                            False
MINIMUM_PAYMENTS                    False
PRC_FULL_PAYMENT                    False
TENURE                              False
dtype: bool

In [27]:
# Remove duplicates
credit = credit.drop_duplicates()
print ("\nNumber of Unique values : \n",credit.nunique())


Number of Unique values : 
 CUST_ID                             8950
BALANCE                             8871
BALANCE_FREQUENCY                     43
PURCHASES                           6203
ONEOFF_PURCHASES                    4014
INSTALLMENTS_PURCHASES              4452
CASH_ADVANCE                        4323
PURCHASES_FREQUENCY                   47
ONEOFF_PURCHASES_FREQUENCY            47
PURCHASES_INSTALLMENTS_FREQUENCY      47
CASH_ADVANCE_FREQUENCY                54
CASH_ADVANCE_TRX                      65
PURCHASES_TRX                        173
CREDIT_LIMIT                         205
PAYMENTS                            8711
MINIMUM_PAYMENTS                    8636
PRC_FULL_PAYMENT                      47
TENURE                                 7
dtype: int64
