In [1]:
import plotly.graph_objs as go
import plotly.plotly as py
import plotly.io as pio

# Cufflinks wrapper
import cufflinks

# Data science imports
import pandas as pd
import numpy as np

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [2]:
from plotly.offline import iplot
cufflinks.go_offline()

# Set global theme
cufflinks.set_config_file(world_readable=True, theme='pearl')

# Reading Home Credit Group Dataset
data= pd.read_csv('C:\\Users\\vkrithikaa\\Desktop\\home credit\\home_credit_group.csv')

In [16]:
data.head()

Unnamed: 0,SK_ID_CURR,TARGET,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,...,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,100002,1,Cash loans,M,N,Y,0,202500.0,406597.5,24700.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
1,100003,0,Cash loans,F,N,N,0,270000.0,1293502.5,35698.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
2,100004,0,Revolving loans,M,Y,Y,0,67500.0,135000.0,6750.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
3,100006,0,Cash loans,F,N,Y,0,135000.0,312682.5,29686.5,...,0,0,0,0,,,,,,
4,100007,0,Cash loans,M,N,Y,0,121500.0,513000.0,21865.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0


In [3]:
# 1 Histogram - Amount spent by house loan owner on goods 
data['AMT_GOODS_PRICE'].iplot(kind='hist',bins=50,xTitle='<- Total amount in EUROS-> ',
                            linecolor='black', colors= 'green',
                            yTitle='count',
                            title=' Amount spent in EUROs by Home Owner')

#pio.write_image(hist1, 'fig1.png')

In [18]:
data['AMT_GOODS_PRICE'].describe(include='all')

count    3.072330e+05
mean     5.383962e+05
std      3.694465e+05
min      4.050000e+04
25%      2.385000e+05
50%      4.500000e+05
75%      6.795000e+05
max      4.050000e+06
Name: AMT_GOODS_PRICE, dtype: float64

In [4]:
# 2 Pie Graph Contrat of loan vs goods price spent
data.groupby('NAME_CONTRACT_TYPE', as_index=False)['AMT_GOODS_PRICE'].count().iplot(
    kind='pie', labels='NAME_CONTRACT_TYPE', values='AMT_GOODS_PRICE', 
    title='Pie Split between Cash Vs Revoloving Loans')

In [5]:
# 3 Pie Graph for amt of good bought vs type of suite
data.groupby('NAME_TYPE_SUITE', as_index=False)['AMT_GOODS_PRICE'].count().iplot(
    kind='pie',labels='NAME_TYPE_SUITE', values='AMT_GOODS_PRICE', 
    title='Percentage of types of groups involved in Home Loans')

In [6]:
# 4 Line Chart based on filtering data between 300000 & 400000
data_amt= data[(data.AMT_INCOME_TOTAL >300000) & (data.AMT_CREDIT<400000) & (data.AMT_GOODS_PRICE>300000) &(data.AMT_GOODS_PRICE<400000)]

data_amt[["AMT_CREDIT","AMT_GOODS_PRICE"]].iplot(title='Difference in amount credited and goods purchased to build home')

In [7]:
# 5 Stepped Bar Plot
data_amt.iplot(kind='bar', x=['NAME_INCOME_TYPE', 'NAME_HOUSING_TYPE'],y='AMT_INCOME_TOTAL',yTitle='TOTAL INCOME AMT IN EUROS')  

In [8]:
# 6 Scatter Plot for hours the application was lodged in a week
data_amt.iplot(kind='scatter', x='WEEKDAY_APPR_PROCESS_START', y='HOUR_APPR_PROCESS_START', mode='markers')  

In [9]:
# 7 Box Plots for all amounts of loan issued
data_amt[['AMT_INCOME_TOTAL','AMT_CREDIT','AMT_ANNUITY','AMT_GOODS_PRICE']].iplot(kind='box')  

In [10]:
# 8 Heatmap for correlationships
import plotly.figure_factory as ff
colorscales = [
    'Greys', 'YlGnBu', 'Greens', 'YlOrRd', 'Bluered', 'RdBu', 'Reds', 'Blues',
    'Picnic', 'Rainbow', 'Portland', 'Jet', 'Hot', 'Blackbody', 'Earth',
    'Electric', 'Viridis', 'Cividis'
]

corrs = data_amt[['AMT_INCOME_TOTAL','AMT_CREDIT','AMT_ANNUITY','AMT_GOODS_PRICE']].corr()

figure = ff.create_annotated_heatmap(
    z=corrs.values,
    x=list(corrs.columns),
    y=list(corrs.index),
    colorscale='Earth',
    annotation_text=corrs.round(2).values,
    showscale=True, reversescale=True)

figure.layout.margin = dict(l=200, t=200)
figure.layout.height = 800
figure.layout.width = 1000

iplot(figure)


In [11]:
# 9 Spread Plots
data_amt[['DAYS_EMPLOYED','DAYS_REGISTRATION']].iplot(kind='spread')  

In [14]:
data2['DAYS_BIRTH'].max()

-8413

In [15]:
data2['DAYS_BIRTH'].max()

-15378.504184100419

In [16]:
data2['DAYS_BIRTH'].min()

-24017

In [12]:
# 10 3D Interactive Plot
data2 = data_amt[["DAYS_BIRTH", "DAYS_EMPLOYED", "DAYS_REGISTRATION"]]  
d = data2.iplot(kind='surface', colorscale='rdylbu') 