Data Sources:

RHPI
City market Tracker
mortgage30us
la_unemployment

In [4]:
import os
import pandas as pd

In [7]:
script_dir = os.path.dirname(os.path.realpath('dev.ipynb'))

In [21]:
def  process_rhpi():

    #read in rhpi data

    rhpi = pd.read_csv(f'{script_dir}/../data/rhpi.csv',encoding="utf-16",sep='\t')

    #subset data for los angeles

    la_rhpi = rhpi[rhpi['Region Name'] == "Los Angeles, CA"][['Month, Year of Date','Redfin HPI MoM']]

    #convert types, rename, and sort index
    la_rhpi['Month, Year of Date'] = pd.to_datetime(la_rhpi['Month, Year of Date'])
    la_rhpi.rename(columns={'Month, Year of Date':'Date'},inplace=True)
    la_rhpi.sort_values('Date',inplace=True)
    la_rhpi.set_index('Date',inplace=True)

    #fix formatting
    la_rhpi['Redfin HPI MoM'] = la_rhpi['Redfin HPI MoM'].apply(lambda x:float(str(x).replace('%','')))

    #make time series just the RHPI month over month
    la_rhpi = la_rhpi['Redfin HPI MoM']
    la_rhpi.dropna(inplace=True)

    original_la_rhpi = la_rhpi.copy()

    #get last date in time series
    last_date = la_rhpi.index[-1].strftime('%Y-%m-%d')


    return la_rhpi,original_la_rhpi,last_date

In [33]:
a,b,last_date = process_rhpi()

In [31]:
def process_redfin_city_data(last_date):

    #import housing related information by city

    info = pd.read_csv(f'{script_dir}/../data/city_market_tracker.tsv000.gz',sep='\t',compression='gzip')

    #take subset for los angeles and take mean of variables across different periods and cities to get one mean per period for LA
    la_info = info[(info['parent_metro_region'] == 'Los Angeles, CA') & (info['property_type'] == 'Single Family Residential')]
    features = la_info.groupby(['period_begin']).mean()[['median_sale_price_mom','median_list_price_mom','median_ppsf_mom','median_list_ppsf_mom','homes_sold_mom','new_listings_mom','inventory_mom','sold_above_list_mom','price_drops_mom','median_dom_mom','months_of_supply_mom','avg_sale_to_list_mom']]

    #make index datetime type and set date to match rhpi
    features.index = pd.to_datetime(features.index)
    features = features[(features.index >= '2012-04-01') & (features.index <= last_date)]

    return features

In [37]:
def process_mortage_data(last_date):

    mortgage = pd.read_csv(f'{script_dir}/../data/MORTGAGE30US.csv')

    #set date in datetime format as index
    mortgage['DATE'] = pd.to_datetime(mortgage['DATE'])
    mortgage.set_index('DATE', inplace=True)

    #fill missing values that are '.' values with previous value
    mortgage = mortgage.replace('.',pd.NA).ffill()

    mortgage['MORTGAGE30US'] = mortgage['MORTGAGE30US'].apply(lambda x:float(x))

    #convert weekly data to monthly data and make every datetime the beginning of the month
    mortgage = mortgage.resample('M').mean()
    mortgage.index = mortgage.index.to_period('M').to_timestamp('D')

    #select dates to match rhpi data
    mortgage = mortgage[(mortgage.index >= '2012-04-01') & (mortgage.index <= last_date)]

    mortgage = mortgage['MORTGAGE30US']

    return mortgage

In [60]:
def process_unemployment(last_date):

    #read in unemployment data
    unemp = pd.read_csv(f'{script_dir}/../data/LOSA106UR.csv')
    unemp.rename(columns={'LOSA106UR':'unemployment rate'},inplace=True)

    #change date variable to datetime and make it the index
    unemp['DATE'] = pd.to_datetime(unemp['DATE'])
    unemp.set_index('DATE',inplace=True)


    #subset series to match la rhpi data
    unemp = unemp[(unemp.index >= '2012-04-01') & (unemp.index <= last_date)]
    unemp = unemp['unemployment rate']

    return unemp

In [39]:
process_mortage_data(last_date)

DATE
2012-04-01    3.9100
2012-05-01    3.7980
2012-06-01    3.6750
2012-07-01    3.5500
2012-08-01    3.6020
               ...  
2024-03-01    6.8200
2024-04-01    6.9925
2024-05-01    7.0600
2024-06-01    6.9175
2024-07-01    6.8475
Freq: MS, Name: MORTGAGE30US, Length: 148, dtype: float64

In [63]:
a[a.index <= '2024-05-01']

Date
2012-04-01    1.73
2012-05-01    1.56
2012-06-01    1.93
2012-07-01    0.88
2012-08-01    1.47
              ... 
2024-01-01    1.20
2024-02-01    1.63
2024-03-01    1.48
2024-04-01    0.69
2024-05-01    0.14
Name: Redfin HPI MoM, Length: 146, dtype: float64

In [22]:
a,b,c = process_rhpi()