In [2]:
import pandas as pd
import numpy as np
import altair as alt
import eco_style
alt.themes.enable('light')

ThemeRegistry.enable('light')

# OECD House Prices

In [47]:
df = pd.read_csv('oecd_data.csv')
df = df[df.VINTAGE == 'VINTAGE_TOTAL']
df = df[['REG_ID', 'Region', 'TIME', 'Value']]
df.columns = ['iso3', 'country', 'date', 'value']
df = df[df.iso3.apply(lambda x: len(x) == 3)]
# drop everything with brackets in its country (these are subnational regions)
df = df[~df.country.str.contains('\(')]

iso3s = df.query("date == '2005-Q1'").iso3.unique()
df = df[df.iso3.isin(iso3s)]

exclude = [ # We just want deveoloped OECD countries
    "CHL", "BRA"
]

df = df[~df.iso3.isin(exclude)]

# reindex to 2005=100
df = df.merge(df.query("date == '2010'"), on='iso3', suffixes=('', '_2005'))
df['value'] = df.value / df.value_2005 * 100

df = df[df.date.str.contains("-")]

def quarters_to_date(date_str):
    year = date_str.split("-")[0]
    quarter = int(date_str.split("-")[1][-1])
    month = (quarter - 1) * 3 + 1
    return f"{year}-{month:02d}-01"

df['date'] = df.date.apply(quarters_to_date)
df = df[['iso3', 'country', 'date', 'value']]

df.drop_duplicates(subset=['iso3'], keep='last').sort_values('value', ascending=False)

Unnamed: 0,iso3,country,date,value
1698,ISL,Iceland,2023-07-01,320.023605
871,EST,Estonia,2023-07-01,310.264315
1582,HUN,Hungary,2023-04-01,289.992564
2032,MEX,Mexico,2023-07-01,246.547708
2344,NZL,New Zealand,2023-07-01,233.197986
1939,ISR,Israel,2023-07-01,212.82874
674,AUT,Austria,2023-07-01,212.297128
2437,NOR,Norway,2023-07-01,194.350954
2530,SWE,Sweden,2023-07-01,179.287501
302,AUS,Australia,2021-10-01,178.024655


# Britain vs France Housing Starts

In [172]:
df = pd.read_csv("fra_starts.csv", skiprows=3, sep=";", encoding="latin1")
df = df.iloc[:, :-1]
df.columns = ['date', 'value']
df['date']  = pd.to_datetime(df.date).dt.strftime("%Y-%m-%d")
df['country'] = 'France'
fra_df = df.copy()

df = pd.read_excel("ukhousebuilding-2.xlsx", sheet_name="1a", skiprows=5)
df = df[['Period', 'Started - All Dwellings']]
df.columns = ['date', 'value']
df['month'] = ['01', '04', '07', '10']*int(len(df)/4)
df['year'] = df.date.str[-4:]
df['date'] = df.year + '-' + df.month + '-01'
df['date'] = pd.to_datetime(df.date)

# drop everything between Apr 2020 and Oct 2022
df = df[~((df.date >= '2020-04-01') & (df.date <= '2022-10-01'))]

df['date'] = df.date.dt.strftime("%Y-%m-%d")

df['value'] = pd.to_numeric(df.value, errors='coerce')
df = df.dropna(subset=['value'])
df['value'] = df.value.rolling(4).sum()
df['country'] = 'UK'
df = df[['country', 'date', 'value']]
uk_df = df.copy()

df = pd.read_csv("canada.csv")
df = df[(df['Housing estimates'] == 'Housing starts') & (df['GEO'] == 'Canada') & (df['SCALAR_FACTOR'] == 'units') & (df['Type of unit'] == 'Total units')]
df['date'] = df['REF_DATE']
df['date'] = pd.to_datetime(df.date).dt.strftime("%Y-%m-%d")
df['value'] = pd.to_numeric(df.VALUE, errors='coerce')
df = df[['date', 'value']]
# group by year 
df['year'] = pd.to_datetime(df.date).dt.year
df = df.groupby('year').sum().reset_index()
df['country'] = 'Canada'
df = df.query("year < 2024")
df['date'] = df.year.astype(str) + '-01-01'
df = df[['country', 'date', 'value']]
canada_df = df.copy()

df = pd.concat([canada_df, uk_df, fra_df])
df

full_housing_panel = df.copy()

In [157]:

df = df.sort_values('date')
df.drop_duplicates(subset=['country'], keep='last')

Unnamed: 0,country,date,value
75,Canada,2023-01-01,240267.0
183,UK,2023-10-01,176460.0
0,France,2024-04-01,282400.0


In [169]:

# add labels at the end for each 

df = df.query("date <= '2023-10-01'")
df['label'] = np.where(df.date == df.date.max(), df.country, '')

base = alt.Chart(df.query("country != 'Canada' and date >= '2000-01-01'")).encode(
    x=alt.X('date:T', title=None),
    y=alt.Y('value:Q', title=None),
    strokeDash=alt.condition(
        alt.datum.country == 'France',
        alt.value([3, 3]),
        alt.value([0, 0])
    ),
    color=alt.Color('country:N', 
                    legend=None,
                    title=None),
)

line = base.mark_line(point={
    "size": 0,
})

label = base.mark_text(
    align='left',
    size=12,
    dx=5,
    dy=0).encode(
    text='label'
)

chart = line + label

chart = chart.properties(
        title={
        "text": ["Housebuilding in the UK and France"],
        "dx": 50,
        "anchor": "start",
        "subtitle": ["Housing Starts, not seasonally adjusted", "Source: ONS, INSEE", ""],
        "subtitleColor": "#676A86",
    }
)

chart.save("uk_vs_france.svg", scale_factor=2.0)
chart.save("uk_vs_france.png", scale_factor=2.0)
chart

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
WARN strokeDash dropped as it is incompatible with "text".
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
WARN strokeDash dropped as it is incompatible with "text".
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


# 10 year comparison

In [186]:
df = full_housing_panel.copy()
df = df.query("country != 'Canada' and date >= '2011-10-01'")
df = df[df.date.astype(str).str.contains("10-01")]
df = df.pivot(index='date', columns='country', values='value')
df = df.reset_index()
df = df.dropna()
df['shortfall'] = df['France'] - df['UK']
df.shortfall.sum()

2037360.0

In [177]:

df['value'] = pd.to_numeric(df.value, errors='coerce')
df = df.dropna()
df['country'] = 'UK'
df


Unnamed: 0,country,date,value
143,UK,2013-10-01,150120.0
144,UK,2014-01-01,162210.0
145,UK,2014-04-01,167330.0
146,UK,2014-07-01,170200.0
147,UK,2014-10-01,170120.0
...,...,...,...
122,UK,2014-02-01,350000.0
123,UK,2014-01-01,356500.0
124,UK,2013-12-01,357900.0
125,UK,2013-11-01,369600.0


In [None]:


df = pd.concat([fra_df, uk_df])
df = df.query("date >= '2001-01-01'")

base = alt.Chart(df).encode(
    x=alt.X('date:T', axis=alt.Axis(title='')),
    y=alt.Y('value:Q', axis=alt.Axis(title='Index (2005=100)')),
    color=alt.Color('country:N')
)

lines = base.mark_line()

chart = lines

chart

In [73]:
fra_df

Unnamed: 0,date,value,country
0,2024-04-01,282400,France
1,2024-03-01,282600,France
2,2024-02-01,286400,France
3,2024-01-01,289800,France
4,2023-12-01,298100,France
...,...,...,...
276,2001-04-01,336900,France
277,2001-03-01,333500,France
278,2001-02-01,337700,France
279,2001-01-01,340100,France


In [67]:
df.date.iloc[0]

1980

In [56]:
df.columns

Index(['Revised', 'Period', 'Started - All Dwellings',
       'Started - Private Enterprise', 'Started - Housing Associations',
       'Started - Local Authorities', 'Completed - All Dwellings',
       'Completed - Private Enterprise', 'Completed - Housing Associations',
       'Completed - Local Authorities'],
      dtype='object')

In [39]:
df.iso3.value_counts()

iso3
GBR    276
HUN    250
NZL    219
AUT    186
ISR    148
CHE    118
DEU    118
FRA    118
ISL    116
DNK    104
BEL     93
SWE     93
NOR     93
IRL     93
NLD     93
MEX     93
FIN     93
EST     93
POL     93
AUS     92
Name: count, dtype: int64

In [34]:
df.query("iso3 == 'GBR'")

Unnamed: 0,iso3,country,date,value,country_2005,date_2005,value_2005
2095,GBR,United Kingdom,1969,2.453420,United Kingdom,2005,78.951
2096,GBR,United Kingdom,1970,2.596547,United Kingdom,2005,78.951
2097,GBR,United Kingdom,1971,2.905600,United Kingdom,2005,78.951
2098,GBR,United Kingdom,1972,3.883421,United Kingdom,2005,78.951
2099,GBR,United Kingdom,1973,5.342554,United Kingdom,2005,78.951
...,...,...,...,...,...,...,...
2366,GBR,United Kingdom,2022-Q3,185.698725,United Kingdom,2005,78.951
2367,GBR,United Kingdom,2022-Q4,186.145837,United Kingdom,2005,78.951
2368,GBR,United Kingdom,2023-Q1,181.793771,United Kingdom,2005,78.951
2369,GBR,United Kingdom,2023-Q2,181.712708,United Kingdom,2005,78.951


In [33]:
df.date.unique()

array(['2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012',
       '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020',
       '2021', '2022', '2005-Q1', '2005-Q2', '2005-Q3', '2005-Q4',
       '2006-Q1', '2006-Q2', '2006-Q3', '2006-Q4', '2007-Q1', '2007-Q2',
       '2007-Q3', '2007-Q4', '2008-Q1', '2008-Q2', '2008-Q3', '2008-Q4',
       '2009-Q1', '2009-Q2', '2009-Q3', '2009-Q4', '2010-Q1', '2010-Q2',
       '2010-Q3', '2010-Q4', '2011-Q1', '2011-Q2', '2011-Q3', '2011-Q4',
       '2012-Q1', '2012-Q2', '2012-Q3', '2012-Q4', '2013-Q1', '2013-Q2',
       '2013-Q3', '2013-Q4', '2014-Q1', '2014-Q2', '2014-Q3', '2014-Q4',
       '2015-Q1', '2015-Q2', '2015-Q3', '2015-Q4', '2016-Q1', '2016-Q2',
       '2016-Q3', '2016-Q4', '2017-Q1', '2017-Q2', '2017-Q3', '2017-Q4',
       '2018-Q1', '2018-Q2', '2018-Q3', '2018-Q4', '2019-Q1', '2019-Q2',
       '2019-Q3', '2019-Q4', '2020-Q1', '2020-Q2', '2020-Q3', '2020-Q4',
       '2021-Q1', '2021-Q2', '2021-Q3', '2021-Q4', '2022-Q1',

In [26]:
iso3s

array(['BEL', 'FRA', 'AUS', 'AUT', 'CHL', 'DNK', 'EST', 'FIN', 'DEU',
       'HUN', 'ISL', 'IRL', 'ISR', 'MEX', 'NLD', 'NZL', 'NOR', 'SWE',
       'GBR', 'CHE', 'POL', 'BRA'], dtype=object)

In [18]:
df.date.value_counts()

date
2017-Q1    39
2018-Q1    39
2014-Q3    39
2014-Q4    39
2015-Q1    39
           ..
1971-Q1     1
1971-Q2     1
1971-Q3     1
1971-Q4     1
1979-Q3     1
Name: count, Length: 276, dtype: int64

In [13]:
df.query("country == 'United States'")

Unnamed: 0,iso3,country,date,value


In [15]:
df.iso3.unique()

array(['BEL', 'FRA', 'AUS', 'AU1', 'AU2', 'AU3', 'AU4', 'AU5', 'AU6',
       'AU7', 'AU8', 'AUT', 'CHL', 'CZE', 'DNK', 'EST', 'FIN', 'DEU',
       'HUN', 'ISL', 'IRL', 'ISR', 'ITA', 'ITC', 'ITH', 'ITI', 'JPN',
       'JPA', 'JPB', 'JPC', 'JPE', 'JPG', 'JPH', 'JPI', 'JPJ', 'LVA',
       'LTU', 'LUX', 'MEX', 'NLD', 'NZL', 'NOR', 'PRT', 'SVK', 'SVN',
       'ESP', 'SWE', 'TUR', 'GBR', 'UKC', 'UKD', 'UKE', 'UKF', 'UKG',
       'UKH', 'UKI', 'UKJ', 'UKK', 'UKL', 'UKM', 'UKN', 'GRC', 'CHE',
       'CHN', 'SAU', 'IND', 'POL', 'BRA'], dtype=object)