In [7]:
import numpy as np
import pandas as pd
import os

import itertools

from wrangle import wrangle_hud

# Acquire
All functions are called in `wrangle.py` file.
## Functions
- `acquire_fha_data.py` - This function takes fha data from hud website (url, sheet_name) turns it into pandas df.
    - url = https://www.hud.gov/sites/dfiles/Housing/documents/Initi_Endores_Firm%20Comm_DB_FY06_FY20_Q2.xlsx
    - sheet_name = "Firm Cmtmts, Iss'd and Reiss'd"
    - The dataset can be physically found at https://www.hud.gov/program_offices/housing/mfh/mfdata/mfproduction

# Prepare
All functions are called in `wrangle.py` file

## Functions

In [56]:
# there is something wrong with the clean_date.csv

# df = wrangle_hud()

df = pd.read_csv("clean_data.csv")

In [62]:
def set_date_column(df): 
    """function drops all 2020 data and changes feature column"""
    # take this out of the function when I have an internet connection:
    df.date_of_firm_commitment_activity = pd.to_datetime(df.date_of_firm_commitment_activity)
    #drop 2020 mortgage data
    df = df[df.date_of_firm_commitment_activity < '2020-01-01']
    pd.to_datetime(df.date_of_firm_commitment_activity)
    # change date_of_firm_commitment_activity to Y only
    df.date_of_firm_commitment_activity = df.date_of_firm_commitment_activity.apply(lambda x: x.year)
    
    return df

In [68]:
def make_refinance_bool(df):
    """makes a boolean column indicating whether the mortgage was for a refinance"""
    
    df['is_refinance'] = df.activity_description == 'Refinance'
    
    return df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [93]:
unique_fha_list = df.fha_number.value_counts()[df.fha_number.value_counts() == 1]
def in_unique_list(x):
    return x in unique_fha_list
df_unique = df[df.fha_number.apply(in_unique_list)]

In [92]:
repeat_fha_numbers = df.fha_number.value_counts()[df.fha_number.value_counts() != 1].index
def in_repeat_list(x):
    return x in repeat_fha_numbers
df_repeat = df[df.fha_number.apply(in_repeat_list)]

In [105]:
repeat_filter = pd.DataFrame(df_repeat.groupby('fha_number').date_of_firm_issue.max()).reset_index()

In [107]:
repeat_filter.head(2)

Unnamed: 0,fha_number,date_of_firm_issue
0,35493,2012-07-31
1,35512,2013-12-04


In [111]:
if df.fha_number == repeat_filter.loc[0][0]:
    print(df.date_of_firm_issue)

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [110]:
repeat_filter.loc[0][0]

35493

## Data Summary

In [35]:
df.fiscal_year_of_firm_commitment_activity.value_counts()

2013    2708
2012    2424
2011    1904
2014    1695
2017    1481
2010    1471
2015    1319
2016    1298
2018    1290
2006    1220
2019    1083
2007     895
2009     844
2008     669
2020     322
Name: fiscal_year_of_firm_commitment_activity, dtype: int64

In [39]:
df.groupby('fha_number').date_of_firm_issue.max()

fha_number
10010      2007-12-21
10011      2008-02-28
10012      2016-04-20
11142      2014-08-08
11183      2008-11-25
              ...    
17622002   2009-05-14
17622003   2013-07-09
17622004   2015-04-01
17635048   2011-01-21
17635049   2011-09-30
Name: date_of_firm_issue, Length: 20108, dtype: datetime64[ns]

In [38]:
df.columns

Index(['fha_number', 'project_name', 'project_city', 'project_state',
       'basic_fha_risk_share_or_other', 'program_category',
       'activity_description', 'activity_group', 'facility_type',
       'program_designation', 'firm_commitment_activity',
       'lender_at_firm_commitment_activity',
       'mortgage_at_time_of_firm_commitment_issuance_amendment_or_reissuance',
       'unit_or_bed_count', 'date_of_firm_commitment_activity',
       'fiscal_year_of_firm_commitment_activity',
       'mortgage_at_firm_commitment_issuance', 'date_of_firm_issue',
       'fiscal_year_of_firm_commitment', 'map_or_tap', 'lihtc_designation',
       'tax_exempt_bond_designation', 'home_designation', 'cdbg_designation',
       'section_202_refi_designation', 'irp_decoupling_designation',
       'hope_vi_designation', 'current_status', 'final_mortgage_amount'],
      dtype='object')

In [3]:
print(f'This dataframe is {df.shape[0]} rows and {df.shape[1]} columns')

This dataframe is 20918 rows and 29 columns


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20918 entries, 0 to 20917
Data columns (total 29 columns):
fha_number                                                              20918 non-null object
project_name                                                            20918 non-null object
project_city                                                            20918 non-null object
project_state                                                           20918 non-null object
basic_fha_risk_share_or_other                                           20918 non-null object
program_category                                                        20918 non-null object
activity_description                                                    20918 non-null object
activity_group                                                          20918 non-null object
facility_type                                                           20918 non-null object
program_designation                          

In [5]:
df

Unnamed: 0,fha_number,project_name,project_city,project_state,basic_fha_risk_share_or_other,program_category,activity_description,activity_group,facility_type,program_designation,...,map_or_tap,lihtc_designation,tax_exempt_bond_designation,home_designation,cdbg_designation,section_202_refi_designation,irp_decoupling_designation,hope_vi_designation,current_status,final_mortgage_amount
0,11345004,Western Heights Apartments,Olney,TX,Basic FHA,223a7 Refi Apts,Refinance,Refi/ Pchse,Apts,a7/236,...,TAP,False,False,False,False,False,False,False,Firm Comit then Owner Withdrew,271700
1,11345005,Woodlands Village Apts,Breckenridge,TX,Basic FHA,223a7 Refi Apts,Refinance,Refi/ Pchse,Apts,a7/236,...,TAP,False,False,False,False,False,False,False,Finally Endorsed,168300
2,4235583,Harbor Court Apartments,Rocky River,OH,Basic FHA,223a7 Refi Apts,Refinance,Refi/ Pchse,Apts,a7/221d4,...,TAP,False,False,False,False,False,False,False,Finally Endorsed,3886800
3,3297260,Compton Towne Apartments,Wilmington,DE,Risk Sharing,QPE Risk Sharing,Refinance,Refi/ Pchse,Coop/Apts/Mobile Home Pk/ALF,QPE Risk Shg Existing,...,TAP,True,False,False,False,False,False,False,Finally Endorsed,4000000
4,7135779,Evergreen Terrace I,Joliet,IL,Basic FHA,223a7 Refi Apts,Refinance,Refi/ Pchse,Apts,a7/221d4,...,TAP,False,False,False,False,False,False,False,Firm Expired,5247700
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20913,12711269,Quail Run Apartments,Vancouver,WA,Basic FHA,223f Refi/Purchase Apts,Refinance,Refi/ Pchse,Apts/Coops,223f Apts,...,MAP,True,False,False,False,False,False,False,Firm Issued,17051600
20914,2210003,Legacy Memory Care,Falmouth,ME,Basic FHA,232 Health Care,Additions,Impv/Adds,Asst'd Livg,ALF 241a,...,TAP,False,False,False,False,False,False,False,Firm Issued,1189200
20915,6722256,Consulate Health Care of Kissimmee,Kissimmee,FL,Basic FHA,232 Health Care,Refinance,Refi/ Pchse,Nursing/ICF,Nursing/ ICF 223f,...,TAP,False,False,False,False,False,False,False,Firm Issued,17920000
20916,11322284,The Villages of Lake Highlands,Dallas,TX,Basic FHA,232 Health Care,Refinance,Refi/ Pchse,Nursing/ICF,Nursing/ ICF 223f,...,TAP,False,False,False,False,False,False,False,Firm Issued,20756500


In [6]:
df.columns

Index(['fha_number', 'project_name', 'project_city', 'project_state',
       'basic_fha_risk_share_or_other', 'program_category',
       'activity_description', 'activity_group', 'facility_type',
       'program_designation', 'firm_commitment_activity',
       'lender_at_firm_commitment_activity',
       'mortgage_at_time_of_firm_commitment_issuance_amendment_or_reissuance',
       'unit_or_bed_count', 'date_of_firm_commitment_activity',
       'fiscal_year_of_firm_commitment_activity',
       'mortgage_at_firm_commitment_issuance', 'date_of_firm_issue',
       'fiscal_year_of_firm_commitment', 'map_or_tap', 'lihtc_designation',
       'tax_exempt_bond_designation', 'home_designation', 'cdbg_designation',
       'section_202_refi_designation', 'irp_decoupling_designation',
       'hope_vi_designation', 'current_status', 'final_mortgage_amount'],
      dtype='object')