In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()

import plotly
import plotly.graph_objs as go
import plotly.io as pio
plotly.offline.init_notebook_mode(connected=True)

In [3]:
accepted_loans_df = pd.read_excel("../data/2007_2011_loan_data/LoanStats_2007_2011_BP_edits.xls", sheet_name="Sheet1");

In [4]:
accepted_loans_df.shape

(42538, 10)

In [5]:
accepted_loans_df.head()

Unnamed: 0,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,zip_code,addr_state
0,5000.0,5000.0,4975.0,36 months,0.1065,162.87,B,B2,860xx,AZ
1,2500.0,2500.0,2500.0,60 months,0.1527,59.83,C,C4,309xx,GA
2,2400.0,2400.0,2400.0,36 months,0.1596,84.33,C,C5,606xx,IL
3,10000.0,10000.0,10000.0,36 months,0.1349,339.31,C,C1,917xx,CA
4,3000.0,3000.0,3000.0,60 months,0.1269,67.79,B,B5,972xx,OR


In [6]:
state_loan_amnt_df = accepted_loans_df.groupby('addr_state', as_index=False).agg({
    'loan_amnt': np.median
})

In [7]:
state_loan_amnt_df.sample(n=5)

Unnamed: 0,addr_state,loan_amnt
13,ID,6150.0
44,VA,10000.0
38,RI,8000.0
33,NY,10000.0
41,TN,5550.0


In [8]:
# my adaptation of code borrowed from Stack Exchange
# https://datascience.stackexchange.com/questions/9616/how-to-create-us-state-heatmap
np.random.seed(193)
for col in state_loan_amnt_df.columns:
    state_loan_amnt_df[col] = state_loan_amnt_df[col].astype(str)

scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]

data = [ dict(
        type='choropleth',
        colorscale = scl,
        autocolorscale = False,
        locations = state_loan_amnt_df['addr_state'],
        z = state_loan_amnt_df['loan_amnt'].astype(float),
        locationmode = 'USA-states',
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            )
        )
    ) ]

layout = dict(
        title = 'Median Loan Amount by State in 2007-2012 Lending Club Accepted Loan Data',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)',
        ),
    )

fig = dict( data=data, layout=layout )
plotly.offline.iplot(fig)

In [9]:
pio.write_image(fig, 'images/median_loan_amnt_by_state.png')