In [None]:
################################################################################
# Market Analysis
#
# Hypothesis of market entry depends on both external and internal factors,
# HMDA data are only a source to analysis the external factors. Further information
# about the company's capability, finanical statement, and other information 
# will be needed to analysis the potential revenue and the cost of market entry.
#
# External market analysis
# 1.The market size by state and growth rate
# 2.Main competitors / top lenders
# 3.How competitive the market it, HHI
################################################################################

In [None]:
# subset the conventioanl loan
df_hmda_conventional = df_hmda1[df_hmda1['Loan_Type_Description']=='Conventional'].copy()
df_hmda_conventional.info()

In [None]:
# Conventional and conforming loan market
df_hmda_conventional['Conventional_Conforming_Flag'].value_counts(normalize=True, dropna=False)

In [None]:
# Conventional and conforming loan dataset
df_hmda_conv_conf = df_hmda_conventional[df_hmda_conventional['Conventional_Conforming_Flag']=='Y'].copy()
df_hmda_conv_conf.info()
# 879453 entries

In [None]:
##############################################
# The market size by state and growth rate
##############################################
# Total Loan amount by year
loan_amount_byyear = df_hmda_conv_conf.groupby(by=['As_of_Year','State'])['Loan_Amount_000'].sum()
df_loan_amount_byyear = pd.DataFrame(data=loan_amount_byyear,index=None)
df_loan_amount_byyear.reset_index(inplace=True)

# plot
fig = sns.barplot(x='As_of_Year',y='Loan_Amount_000',data=df_loan_amount_byyear,
                  hue='State', hue_order=['VA','MD','DC','WV','DE'])
fig.set_title('Total Loan Amount in $000s')
fig.set_xlabel('Year')
fig.set_ylabel('Total Loan Amount in $000s')
fig.grid(b='off')

#  The total loan amount was decreasing from 2012 to 2014 in all the five states.

In [None]:
################################################################################
# Interest Rate
################################################################################
# 30-Year Fixed-Rate Mortgages from 'http://www.freddiemac.com/pmms/pmms30.htm'
rate = [['2011',4.45],
['2012',3.66],
['2013',3.98],
['2014',4.17],
['2015',3.85],
['2016',3.65]]


df_rate = pd.DataFrame.from_records(rate,columns=['YYYYMM', 'rate'])

# Plot rate
df_rate.plot(x='YYYYMM',y='rate', marker='o',legend=None, figsize=(6,3))
plt.title('30-Year Fixed-Rate Mortgages')
plt.ylabel('Average Rate')
plt.xlabel('Year')

In [None]:
####################################################################
# Market segment
####################################################################
# loan purpose by year

In [None]:
Loan_Purpose_amount = df_hmda_conv_conf.groupby(by=['As_of_Year','Loan_Purpose_Description'])['Loan_Amount_000'].sum()
Loan_Purpose_amount.unstack(level=-1).head(10)
Loan_Purpose_amount.unstack().plot(kind='bar',stacked=True,
                                 figsize=(6,3),
                                 title='Trend of Loan Purpose by Total Loan Amount in $1000',
                                 subplots=False)

In [None]:
# Loan purpose plot by state
Loan_Purpose_amount = df_hmda_conv_conf.groupby(by=['As_of_Year','State','Loan_Purpose_Description'])['Loan_Amount_000'].sum()
Loan_Purpose_amount = pd.DataFrame(data=Loan_Purpose_amount,index=None,columns=['Loan_Amount_000'])
Loan_Purpose_amount.reset_index(inplace=True)
Loan_Purpose_amount

Loan_Purpose_amount_VA = Loan_Purpose_amount[Loan_Purpose_amount['State']=='VA']
Loan_Purpose_amount_MD = Loan_Purpose_amount[Loan_Purpose_amount['State']=='MD']
Loan_Purpose_amount_DC = Loan_Purpose_amount[Loan_Purpose_amount['State']=='DC']
Loan_Purpose_amount_WV = Loan_Purpose_amount[Loan_Purpose_amount['State']=='WV']
Loan_Purpose_amount_DE = Loan_Purpose_amount[Loan_Purpose_amount['State']=='DE']

In [None]:
# VA
sr1=Loan_Purpose_amount_VA['Loan_Amount_000'][Loan_Purpose_amount_VA['Loan_Purpose_Description']=='Purchase']
sr2=Loan_Purpose_amount_VA['Loan_Amount_000'][Loan_Purpose_amount_VA['Loan_Purpose_Description']=='Refinance']

ind = np.arange(3)   # the x locations for the groups
width = 0.4       # the width of the bars: can also be len(x) sequence

plt.figure(figsize=(5,4))
p1 = plt.bar(ind, sr1, width, color='darkblue', alpha=0.7)
p2 = plt.bar(ind, sr2, width, color='darkgreen',bottom=sr1, alpha=0.7)

plt.ylabel('Loan Amount in $000s')
plt.title('Loan Amount by Purpose in VA')
plt.xticks(ind+width/2, ('2012', '2013', '2014'))
plt.yticks(np.arange(0, 60000000, 10000000))
plt.legend((p1[0], p2[0]), ('Purchase','Refinance'))

plt.show()

In [None]:
####################################################################
# How competitive the market it, Herfindahl-Hirschman Index HHI
####################################################################
        
def hhi(vector):
    """ 
    Calculate the Hirschman-Herfindahl index
    
    param 
    -------
    vector: Positive vector
    
    return: HHI (Float)
    """
    return np.square(vector).sum()