# Housing

In [1]:
import sys
sys.path.append('../src')

import uschartbook.config

from uschartbook.config import *
from uschartbook.utils import *

### Homeownership rate

In [None]:
# Seasonally adjusted homeownership rate
url = ('https://api.census.gov/data/timeseries/eits/hv?'+
       f'get=cell_value,time_slot_id,data_type_code,'+
       f'category_code&key={census_key}&time=from+1989&'+
       'for=us&seasonally_adj=yes')
r = requests.get(url).json()
date = lambda x: pd.to_datetime(x.time)
df = (pd.DataFrame(r[1:], columns=r[0]).assign(date = date)
        .set_index('date')['cell_value'].astype('float')
        .sort_index().rename('VALUE'))
df.to_csv(data_dir / 'homeown.csv', index_label='date', 
          header=True)

In [None]:
df = pd.read_csv(data_dir / 'homeown.csv', 
                 index_col='date', parse_dates=True)['VALUE']
col = 'violet!60!magenta'
node = end_node(df, col, percent=True, 
                date='q', full_year=True)
write_txt(text_dir / 'homeown_node.txt', node)
dtlt = dtxt(df.index[-1])['qtr1']
latest = df.iloc[-1]
ch3yr = value_text(df.diff(12).iloc[-1], ptype='pp', 
                   adj='total')
url = 'https://www.census.gov/housing/hvs/index.html'
cl = c_line(col)
text = (f'As of {dtlt}, the Census Bureau \href{{{url}}}'+
        f'{{report}} a homeownership rate of {latest:.1f} '+
        f'percent {cl}. Over the past three years, the '+
        f'overall US homeownership rate {ch3yr}.')
write_txt(text_dir / 'homeown.txt', text)
print(text)

### By Race/Ethnicity

In [None]:
url = 'https://www.census.gov/housing/hvs/data/histtab16.xlsx'

data = pd.read_excel(url)
df = data.iloc[7:-9, :-6].reset_index().drop('index', axis=1)
df.columns = ['Date', 'Total', 'White', 'Black', 'Other', 'Other2', 'Hispanic']
df = df.replace('2002r', 2002).replace('20033', 2003).dropna(how='all')

q = {1: '01', 2: '04', 3: '07', 4: '10'}

d = {}
for i, row in df.iterrows():
    if i not in d.keys():
        d[i] = {}
    if type(row['Date']) == int:
        for y in list(range(i, i+5)):
            if y in df.index:
                d[y] = {'quarter': ''}
                d[y]['year'] = row['Date']
    if (type(row['Date']) != int) & ('.' in str(row['Date'])):
        d[i]['quarter'] = q[int(row['Date'][0])]
        
    if 'year' not in d[i].keys():
        d[i] = {'date': '', 'year': '', 'quarter': ''}
    else:
        d[i]['date'] = f'{d[i]["year"]}-{d[i]["quarter"]}-01'
        
dates = {k: v['date'] for k, v in d.items()}
df['date'] = df.index.map(dates)
df = df.dropna()
df = df.set_index(pd.to_datetime(df.date))
result = df[['White', 'Black', 'Hispanic', 'Other', 'Total']]
result.to_csv(data_dir / 'homeown2.csv', index_label='date')

In [None]:
result = pd.read_csv(data_dir / 'homeown2.csv', index_col='date', 
                     parse_dates=True)

color_list = [('Black', 'red!20!orange!95!black'), 
              ('White', 'blue!80!cyan!80!black'),
              ('Hispanic', 'green!80!blue!85!black'),
              ('Other', 'red!80!black')]
cl = {}
for group, color in color_list:
    write_txt(text_dir / f'homeown_{group.lower()}_node.txt', 
              end_node(result[group], color, percent=True))
    cl[group] = c_line(color)

wtlt = result.White.iloc[-1]
blmax = result['Black'].max()
blidxmax = dtxt(result['Black'].idxmax())['qtr2']
blmin = result.loc['2006':, 'Black'].min()
blidxmin = dtxt(result.loc['2006':, 'Black'].idxmin())['qtr1']
bllt = result.Black.iloc[-1]
ltdt = dtxt(result.index[-1])['qtr1']
hsmax = result['Hispanic'].max()
hsidxmax = dtxt(result['Hispanic'].idxmax())['qtr2']
hslt = result['Hispanic'].iloc[-1]
compare = compare_text(hslt, hsmax, [0.3, 1.0, 3.0])
if hslt == hsmax:
    cmp_txt = 'the highest on record'
else:
    cmp_txt = f'{compare} the {hsmax:.1f} percent peak rate in {hsidxmax}'

text = (f'Around three-quarters ({wtlt:.1f} percent in {ltdt}) of '+
        f'non-Hispanic white households own their home {cl["White"]}, '+
        'compared to fewer than half of black and Hispanic households. ')
write_txt(text_dir / 'homeown2.txt', text)
print(text, '\n')

txt2 = ('During the housing bubble, the homeownership rate for black '+
        'households increased by nearly ten percentage points, peaked  '+
        f'at {blmax:.1f} percentin {blidxmax}, and fell to {blmin:.1f} '+
        f'percent in {blidxmin}. The current homeownership rate for black '+
        f'households is {bllt:.1f} percent, as of {ltdt} {cl["Black"]}. '+
        f'The rate for Hispanic households of any race is {hslt:.1f} '+
        f'percent in {ltdt}, {cmp_txt} {cl["Hispanic"]}. ')
write_txt(text_dir / 'homeown3.txt', txt2)
print(txt2)

### FHFA Housing Price Index

In [None]:
url = ('https://www.fhfa.gov/DataTools/Downloads/'+
       'Documents/HPI/HPI_PO_monthly_hist.xls')
df = pd.read_excel(url, header=3, index_col=0, 
                   parse_dates=True).iloc[1:]
sa = df[[col for col in df.columns if 'NSA' not in col]]
sa.columns = [col.replace('\n(SA)', '').replace('\n', '') 
              for col in df.columns if 'NSA' not in col]
sa.to_csv(data_dir / 'hpi_raw.csv', index_label='date')

In [None]:
dr = pd.read_csv(data_dir / 'hpi_raw.csv', index_col='date', 
                 parse_dates=True)
data = (dr.pct_change(12) * 100)
data['USA3M'] = m3rate(dr.USA)
df = data.loc['1991':, ['USA', 'USA3M']]
df.to_csv(data_dir / 'hpi.csv', index_label='date', float_format='%g')

adj = node_adj(df)
smax = df.iloc[-1].idxmax()
adj[smax] = adj[smax] + 0.35

colors = {'USA': 'blue!35!cyan', 'USA3M': 'purple'}

node_file = open(text_dir / 'hpi_nodes.txt', 'w')
for series, color in colors.items():
    date = 'm' if series == smax else None
    node_file.write(end_node(df[series], color, date=date,
                             percent=True, full_year=True,
                             size=1.1, offset=adj[series]))
node_file.close()

ch = data['USA']
chtxt = value_text(df.USA.iloc[-1])
prtxt = value_text(df.USA.iloc[-2], 'increase_of')
ltdt = dtxt(df.USA.index[-1])['mon1']
prdt = dtxt(df.USA.index[-2])['mon1']
pr = df.USA.iloc[-2]
pr3 = df.USA3M.iloc[-2]
regions = data.drop(['USA', 'USA3M'], axis=1).iloc[-1]
div = regions.idxmax()
maxval = regions.max()
divs = pd.read_csv('raw/census_regions.csv')
div_list = divs[divs['Division'] == div].State.to_list()
states = f"{', '.join(div_list[:-1])}, and {div_list[-1]}"
ch3m = df.USA3M.iloc[-1]
mreg = value_text(maxval)
text = (f'{chtxt} over the year ending {ltdt} {c_line(colors["USA"])}. '+
        'The average of the latest three months of data compared '+
        f'to the previous three months shows an annualized growth '+
        f'rate of {ch3m:.1f} percent {c_line(colors["USA3M"])}. '+
        f'In {prdt}, the one-year growth rate was {pr:.1f} percent '+
        f'and the three-month growth rate was {pr3:.1f} percent. '+
        f'Home prices in the {div} region, which includes {states}, '+
        f'{mreg} in {ltdt}, the highest one-year growth rate.')
write_txt(text_dir / 'fhfa.txt', text)
print(text)

In [None]:
t = pd.DataFrame()
for i in [-1, -2, -3, -4, -13, -25, -37]:
    t[dtxt(data.index[i])['mon6']] = data.iloc[i]
    
t['`03--`05 Average'] = data.loc['2003':'2005'].mean()
t['`09--`12 Average'] = data.loc['2009':'2012'].mean()

final = (t.rename({'USA': '\\textbf{United States}'})
          .drop('USA3M')
          .sort_values(dtxt(data.index[-1])['mon6'], ascending=False)
          .round(1))

final.to_csv(data_dir / 'hpi.tex', sep='&', line_terminator='\\\ ', 
             quotechar=' ')
final

### Ratio of price to rent

In [None]:
hpi = pd.read_csv(data_dir / 'hpi_raw.csv', index_col='date', 
                  parse_dates=True)['USA']
rent = (pd.read_csv(data_dir / 'cpi_raw.csv', index_col='date', 
                    parse_dates=True)['Rent of primary residence'])
ratio = (hpi / rent).dropna()
ratio = ratio / ratio.iloc[0]
ratio.name = 'VALUE'
ratio.to_csv(data_dir / 'hpi_rent_ratio.csv', index_label='date', 
             header=True)

ltdate = dtxt(ratio.index[-1])['mon1']
ltval = (ratio.iloc[-1] - 1) * 100
cl = c_line('red')
val = value_text(ltval, 'above_below')
text = (f'As of {ltdate}, housing prices are {val} '+
        f'the rental equivalent {cl}.')
write_txt(text_dir / 'hp_rent.txt', text)
print(text)

### Owner's equity share in real estate

In [None]:
data = fred_df('HOEREPHRE')['VALUE'].astype('float')
data.to_csv(data_dir / 'homeeq.csv', index_label='date', header=True)

datelt = dtxt(data.index[-1])['qtr1']
latest = data.iloc[-1]
ch3, ch1 = [value_text(data.diff(i).iloc[-1], style='increase_by', 
                   adj='total', ptype='pp') for i in [12, 4]]
avg89 = data.loc['1989'].mean()
compare = compare_text(latest, avg89, [0.05, 1.0, 5.0])
node_color = 'blue!80!violet'
node = end_node(data, node_color, percent=True, date='q', full_year=True, offset=-0.35)
write_txt(text_dir / 'homeeq_node.txt', node)
cl = c_line(node_color)
url = 'https://www.federalreserve.gov/releases/z1/20191212/html/b101.htm'
text = (f'As of {datelt}, the Federal Reserve \href{{{url}}}{{report}} '+
        f"\\textbf{{owners\' equity}} is {latest:.1f} percent of residential "+
        f"real estate {cl}. Over the past three years, the owners\' "+
        f"equity share {ch3}. Over the past year, the share {ch1}. "+
        f'The current share is {compare} the 1989 average of {avg89:.1f} '+
        f'percent.')
write_txt(text_dir / 'homeeq.txt', text)
print(text)

### Residential construction

In [None]:
key = census_key
base = 'https://api.census.gov/data/timeseries/eits/resconst'
param = 'cell_value,time_slot_id,category_code,data_type_code'
t = '&time=from+1989'
oth = '&for=us&seasonally_adj=yes'

url = f'{base}?get={param}&key={key}{t}{oth}'

r = requests.get(url).json()

In [None]:
for t in ['total', 'single', 'multi']:
    df = pd.DataFrame()
    for series in ['APERMITS', 'ASTARTS', 'ACOMPLETIONS']:
        df[series] = pd.Series(
            {pd.to_datetime(i[4]): 
             float(i[0]) for i in r[1:] if i[2] == series 
             and i[3] == t.upper()}
        ).sort_index()
    df.to_csv(data_dir / f'permits_{t}.csv', index_label='date')

In [None]:
df = pd.read_csv(data_dir / 'permits_total.csv', 
                 parse_dates=True, index_col='date')
p_col = 'blue!70!green'
node = end_node(df.APERMITS, p_col, date='m', full_year=True, 
                digits='comma', offset=0.35)
write_txt(text_dir / 'permits_node.txt', node)
c_col = 'blue!30!cyan'
node = end_node(df.ACOMPLETIONS, c_col, date='m', full_year=True, 
                digits='comma', offset=0.35)
write_txt(text_dir / 'completions_node.txt', node)

s = series_info(df['APERMITS'])
s['val_5yr_ago'] = df['APERMITS'].iloc[-61]

if s['days_since_match'] > 100:
    hlt = f", {s['last_matched']}"
else:
    hlt = ''
month = s['date_latest'].strftime('%B')

d = {}
for i in ['val_prev', 'val_year_ago', 'val_5yr_ago']:
    mo_ch = s["val_latest"] - s[i]
    mo_pch = (s["val_latest"] / s[i] - 1) * 100
    if mo_ch >= 0.1:
        txt = f'increased by {abs(mo_ch)*1000:,.0f} ({mo_pch:.1f} percent)'
    elif mo_ch <= -0.1:
        txt = f'decreased by {abs(mo_ch)*1000:,.0f} ({mo_pch:.1f} percent)'
    else:
        txt = 'were virtually unchanged' 
    d[i] = txt

date = s["date_latest_ft"]    
    
text = (f'In {date}, a seasonally-adjusted annual rate of '+
        f'{s["val_latest"]*1000:,.0f} new residential housing '+
        f'units were authorized by building permits{hlt} '+
        f'{c_line(p_col)}. Permits issued {d["val_prev"]} '+
        f'over the previous month, {d["val_year_ago"]} '+
        f'over last {month}, and {d["val_5yr_ago"]} total '+
        f'over the past five years.')
write_txt(text_dir / 'permits.txt', text)
print(text, '\n')

prdate = dtxt(df.index[-2])['mon3']
pryrdate = dtxt(df.index[-13])['mon1']
ltval = df['ACOMPLETIONS'].iloc[-1]
prval = df['ACOMPLETIONS'].iloc[-2]
pryrval = df['ACOMPLETIONS'].iloc[-3]
text = (f'In {date}, a seasonally-adjusted annual rate of '+
        f'{ltval*1000:,.0f} new residential units were completed '+
        f'{c_line(c_col)}, compared to {prval*1000:,.0f} in '+
        f'{prdate} and {pryrval*1000:,.0f} in {pryrdate}.')
write_txt(text_dir / 'completions.txt', text)
print(text)

In [None]:
for t in ['single', 'multi']:
    df = pd.read_csv(data_dir / f'permits_{t}.csv', 
                     parse_dates=True, index_col='date')
    date = dtxt(df.index[-1])['mon1']
    pval = df['APERMITS'].iloc[-1]
    cval = df['ACOMPLETIONS'].iloc[-1]
    text = (f'In {date}, a seasonally-adjusted annual rate of '+
            f'{pval*1000:,.0f} new {t}-family residential units were '+
            f'permitted and {cval*1000:,.0f} were completed.')
    write_txt(text_dir / f'permits_{t}.txt', text)
    print(text)

### Household formation estimates

In [3]:
# Retrieve historical data and revised data
files = {'hist': 'histtab8.xlsx',
         'rev': 'hist_tab_8a_v2020.xlsx'}
url = 'https://www.census.gov/housing/hvs/data/'
for name, file in files.items():
    data = pd.read_excel(url + file).iloc[4:, :5]
    data.columns = ['category', 'Q1', 'Q2', 'Q3', 'Q4']
    years = []
    for h in data[data['Q1'] == '1st Qtr'].index - 1:
        year_raw = data.loc[h, 'Q1']
        if type(year_raw) == int:
            year = year_raw
        elif type(year_raw) == str:
            year = int(year_raw[:4])
        elif type(year) == float:
            year = year + 1
        years.append(year)
    data.loc[data['Q1'] == '1st Qtr', 'category'] = years
    data = data.dropna(subset=['category'])
    
    levels = pd.DataFrame()
    for series in ['Owner', 'Renter', 'Vacant']:
        srs = data.loc[data['category'].astype('str')
                       .str.contains(series)]
        srs.index = years
        srs = srs[['Q1', 'Q2', 'Q3', 'Q4']].unstack().swaplevel()
        srs.index = pd.to_datetime([f'{i[0]}-{i[1]}' 
                                    for i in srs.index])
        srs = srs.sort_index()
        levels[series] = srs
    res = levels[~levels.index.duplicated()]
    res.to_csv(data_dir / f'hhform_raw_{name}.csv', 
               index_label='date')

In [5]:
hist = pd.read_csv(data_dir / 'hhform_raw_hist.csv', index_col='date', 
                    parse_dates=True)
rev = pd.read_csv(data_dir / 'hhform_raw_rev.csv', index_col='date', 
                    parse_dates=True)
level = (hist.loc[:'2000'].append(rev.loc['2001':]))
level['Occupied'] = level[['Owner', 'Renter']].sum(axis=1)
cats = ['Owner', 'Renter', 'Vacant']
lvl = level[cats + ['Occupied']].rolling(4).mean() / 1_000
lvl.loc['1989':].to_csv(data_dir / 'hhform_lvl.csv', 
                        index_label='date')
cols = {'Owner': 'teal!68!green!95!black', 'Renter': 'cyan!65!white', 
        'Vacant': 'orange!80!yellow'}
offsets = node_adj(lvl)
nodes = '\n'.join([end_node(lvl[cname], color, offset=offsets[cname]) 
                   for cname, color in cols.items()])
write_txt(text_dir / 'hhform_lvl_nodes.txt', nodes)
level['Total'] = level.sum(axis=1)
level['pop'] = nipa_df(retrieve_table('T70100')['Data'], ['B230RC'])
res = (level.divide(level['pop'], axis=0)[cats]
            .rolling(4).mean().dropna()) * 100
res = res.loc['1989':]
res = res - res.iloc[0]
res.to_csv(data_dir / 'hhform_idx.csv', index_label='date')
offsets = node_adj(res)
nodes = '\n'.join([end_node(res[cname], color, offset=offsets[cname]) 
                   for cname, color in cols.items()])
write_txt(text_dir / 'hhform_idx_nodes.txt', nodes)

ltdt = dtxt(level.index[-1])['qtr1']
lvllt = lvl['Occupied'].iloc[-1]
lvlltr = lvl['Renter'].iloc[-1] 
lvllto = lvl['Owner'].iloc[-1]
lvl90 = lvl.loc['1990-10-01', 'Occupied']
shltr = (lvl['Renter'] / lvl['Occupied']).iloc[-1] * 100
shlto = (lvl['Owner'] / lvl['Occupied']).iloc[-1] * 100
htxt = (f'Over the year ending {ltdt}, there were an average of {lvllt:.1f} '+
        f'million households, compared to {lvl90:.1f} million in 1990.')
write_txt(text_dir / 'hh_tot.txt', htxt)

text = (f'Over the year ending {ltdt}, there were an agerage of '+
        f'{lvllt:.1f} million total occupied '+
        f'housing units in the US, of which {lvlltr:.1f} million '+
        f'({shltr:.1f} percent) were rented, and {lvllto:.1f} '+
        f'million ({shlto:.1f} percent) were owner-occupied. ')
write_txt(text_dir / 'hhform_lvl.txt', text)
print(text)

Over the year ending 2021 Q4, there were an agerage of 126.6 million total occupied housing units in the US, of which 43.7 million (34.5 percent) were rented, and 82.9 million (65.5 percent) were owner-occupied. 


In [6]:
cats = ['Owner', 'Renter', 'Vacant']
occ = 'Occupied'

grps = {}
for name, dft in {'hist': hist, 'rev': rev}.items():
    dft['Occupied'] = dft[['Owner', 'Renter']].sum(axis=1)
    grps[name] = growth_contrib_ann(dft[cats + [occ]], 'Occupied')[cats].dropna()
    
ch = (grps['hist'].loc[:'2001'].append(grps['rev'].loc['2002':]))
ch = ch.loc['1987':].rolling(4).mean().dropna()
ch['pop'] = (nipa_df(retrieve_table('T70100')['Data'], ['B230RC'])
                .pct_change(4).dropna() * 100).round(2)
ch.dropna().to_csv(data_dir / 'hhform.csv', index_label='date')

ch['Occupied'] = ch['Owner'] + ch['Renter']
ch19 = ch.loc['2019-10-01':, 'Occupied'].mean()
chpop = ch.loc['2019-10-01':, 'pop'].mean()
ch19oo = value_text(ch.loc['2019-10-01':, 'Owner'].mean(), 
           'contribution', 'pp', adj='average')
ch19r = value_text(ch.loc['2019-10-01':, 'Renter'].mean(), 
           'contribution', 'pp')
chlt = ch['Occupied'].iloc[-1]
chltoo = value_text(ch['Owner'].iloc[-1], 'contribution', 'pp')
chltr = value_text(ch['Renter'].iloc[-1], 'contribution', 'pp')
text = (f'From 2019 Q4 to {ltdt}, the average annual \\textbf{{household '+
        f'formation rate}} was {ch19:.1f} percent, while annual population '+
        f'growth averaged {chpop:.1f} percent. Changes in the number '+
        f'of owner-occupied households {ch19oo} {c_box(cols["Owner"])}, and '+
        f'changes in rented households {ch19r} {c_box(cols["Renter"])}. Over '+
        f'the year ending {ltdt}, the household formation rate averaged '+
        f'{chlt:.1f} percent, of which owner-occupied households {chltoo}, '+
        f'and rented households {chltr}. ')
write_txt(text_dir / 'hhform.txt', text)
print(text)

From 2019 Q4 to 2021 Q4, the average annual \textbf{household formation rate} was 1.6 percent, while annual population growth averaged 0.3 percent. Changes in the number of owner-occupied households contributed 1.7 percentage points on an average basis (see\cbox{teal!68!green!95!black}), and changes in rented households subtracted 0.1 percentage point (see\cbox{cyan!65!white}). Over the year ending 2021 Q4, the household formation rate averaged 0.6 percent, of which owner-occupied households subtracted 0.8 percentage point, and rented households contributed 1.4 percentage points. 


### New Home Sales

In [2]:
# New home sales data from Census API
url = ('https://api.census.gov/data/timeseries/eits/ressales?'+
       f'get=cell_value,time_slot_id,data_type_code,'+
       f'category_code&key={census_key}&time=from+1989&'+
       'for=us&seasonally_adj=yes&data_type_code=TOTAL'+
       '&category_code=ASOLD')
r = requests.get(url).json()
date = lambda x: pd.to_datetime(x.time)
df = (pd.DataFrame(r[1:], columns=r[0]).assign(date = date)
        .set_index('date')['cell_value'].astype('float')
        .sort_index().rename('VALUE'))
df.to_csv(data_dir / 'nhs.csv', index_label='date', 
          header=True)

In [3]:
data = pd.read_csv(data_dir / 'nhs.csv', index_col='date',
                   parse_dates=True)['VALUE']
node_color = 'green!80!blue'
datelt = dtxt(data.index[-1])['mon1']
latest = data.iloc[-1] * 1000
ch1yr = data.pct_change(12).iloc[-1]

txt = value_text(ch1yr)
compdt = '2020-02-01'
cdt = dtxt(pd.to_datetime(compdt))['mon1']
pc = data.loc[compdt]
pcch = ((data.iloc[-1] / pc) - 1) * 100
pcvt = value_text(pcch)
url = 'https://www.census.gov/construction/nrs/pdf/newressales.pdf'
cl = c_line(node_color)
text = (f'In {datelt}, the Census Bureau \href{{{url}}}{{report}}'+
        ' seasonally-adjusted single family new homes sales totaling '+
        f'{latest:,.0f} {cl}. Over the past year, new homes sales '+
        f'{txt}. Pre-COVID, in {cdt}, the annualized rate of '+
        f'single family new home sales was {pc * 1000:,.0f}. '+
        f'Since {cdt}, new home sales have {pcvt}.')
write_txt(text_dir / 'nhs.txt', text)
print(text)

node = end_node(data, node_color, date='m', digits=0, full_year=True)
write_txt(text_dir / 'nhs_node.txt', node)

In February 2022, the Census Bureau \href{https://www.census.gov/construction/nrs/pdf/newressales.pdf}{report} seasonally-adjusted single family new homes sales totaling 772,000 (see {\color{green!80!blue}\textbf{---}}). Over the past year, new homes sales decreased 0.1 percent. Pre-COVID, in February 2020, the annualized rate of single family new home sales was 730,000. Since February 2020, new home sales have increased 5.8 percent.
