In [1]:
#Load dependencies
import os
import pandas as pd
import datetime as dt

# Data Source
## URL
https://www.fhfa.gov/DataTools/Downloads/Pages/House-Price-Index.aspx
## Description
The FHFA House Price Index (HPI) is a broad measure of the movement of single-family house prices.  The FHFA HPIs are built on tens of millions of home sales and offer insights about house price fluctuations at the national, census division, state, metro area, county, ZIP code, and census tract levels.  The FHFA HPIs use a fully transparent methodology based upon a weighted, repeat-sales statistical technique to analyze transaction data from Fannie Mae and Freddie Mac.  The FHFA HPIs also provide housing economists with an analytical tool that is useful for estimating changes in the rates of mortgage defaults, prepayments and housing affordability in specific geographic areas.

### Importing file into dataframe

In [12]:
# Define data files to import
housing_data = 'raw_data/houston_housing.csv'

# Read data files into Panda data frames
housing_raw = pd.read_csv(housing_data, parse_dates=['DATE'])

# look at the data
housing_raw.head()

Unnamed: 0,DATE,ATNHPIUS26420Q
0,1976-01-01,51.4
1,1976-04-01,58.34
2,1976-07-01,57.97
3,1976-10-01,57.8
4,1977-01-01,59.86


### averaging the quarterly index, renaming the columns

In [16]:
# group by the year (as part of the quarter), average the index value.  We reset the index here so we can rename the column
housing_df = pd.DataFrame(housing_raw.groupby(housing_raw['DATE'].dt.year)['ATNHPIUS26420Q'].mean().reset_index())

# renaming columns
housing_df = housing_df.rename(columns={"DATE": "year", "ATNHPIUS26420Q": "housing_price_index"})

# set the index back to the lower case "year"
housing_df = housing_df.set_index('year')

housing_df.head()

Unnamed: 0_level_0,housing_price_index
year,Unnamed: 1_level_1
1976,56.3775
1977,64.4225
1978,73.5875
1979,83.0475
1980,90.9075
