In [88]:
#Import packages
import numpy as np # linear algebra
from numpy import arange,array,ones
from scipy import stats
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from datetime import datetime
import math
import os
import warnings
warnings.filterwarnings("ignore")
from chart_studio import plotly as py
import plotly.graph_objs as go
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot


**Data Loading**

In [89]:
#names of the csv files to be read
csv_file_list = ["pp-2020.csv", "pp-2019.csv", "pp-2018.csv"]

#new list to store read csv files into
list_of_dataframes = []

#reading of each file in csv_file_list and storage in list_of_dataframes with column headers
for filename in csv_file_list:
    list_of_dataframes.append(pd.read_csv(filename, 
                                          names = ['ID','Price', 'Date of Transfer', 'Post Code',
                                                   'Property Type', 'Old/New','Duration','Address 1',
                                                   'Address 2', 'Address 3', 'Town/City', 'District',
                                                   'Borough', 'Region', 'PPDCategoryType', 'RecordStatus']))

#merging of each yearly csv into one data frame
merged_df = pd.concat(list_of_dataframes)

**Pre Processing**

In [90]:
#removal of duplicated entries
merged_df = merged_df.drop_duplicates(subset=['ID'], keep="first")

#removal of unnecessary columns
merged_df.drop(['ID','Address 1', 'Address 2', 'Address 3','Borough', 
         'Region', 'PPDCategoryType', 'RecordStatus'], axis=1, inplace=True)

#removal of outliers
clean_df = merged_df.loc[(merged_df['Price'] <= (3000000)) & (merged_df['Price'] >= (10000)),]


#converstion of Date of Transfer column from object to datettime
clean_df['Date of Transfer'] = pd.to_datetime(clean_df['Date of Transfer'])

#creation of columns containing transaction year & month for use in further analysis
clean_df['Year'] = clean_df['Date of Transfer'].dt.year
clean_df['Month'] = clean_df['Date of Transfer'].dt.month

**National Median Price Calculation**

In [91]:
# Create a list of a day from each month from January 2018 to December 2020
daterange = pd.date_range('2018-01-01','2020-12-31', freq='1M')
daterange = [d.strftime('%d-%m-%Y') for d in daterange]

#Determining median price for every month in daterange
national = clean_df.groupby([(clean_df["Date of Transfer"].dt.year),
                       (clean_df["Date of Transfer"].dt.month)]).median()

#Reducing dataframe to just hold median price
national = pd.DataFrame(DD['Price'])

#Adding back date column and renaming columns
national['Dates'] = daterange
national['Dates'] = pd.to_datetime(national['Dates'], format = '%d-%m-%Y')
national.columns = ['Price', 'Dates']

**National: Treatment Group differences**

In [92]:
#Taking treatment before values into a dataframe
treatment_before = national[(national["Dates"]< '2019-10-01') & (national["Dates"]>= '2019-02-01')]
treatment_before['month'] = treatment_before['Dates'].dt.month
treatment_before = treatment_before.set_index('month')

#Taking treatment after values into a dataframe
treatment_after = national[(national["Dates"]< '2020-10-01') & (national["Dates"]>= '2020-02-01')]
treatment_after['month'] = treatment_after['Dates'].dt.month
treatment_after = treatment_after.set_index('month')

#Calculating percentage change between treatment before and after
treatment_diff = treatment_after
treatment_diff['Price'] = (treatment_after["Price"]-treatment_before["Price"])/treatment_before["Price"]

**National: Control Group differences**

In [94]:
#Taking control before values into a dataframe
control_before = national[(national["Dates"]< '2019-02-01') & (national["Dates"]>= '2018-10-01')]
control_before['month'] = national['Dates'].dt.month
control_before = control_before.set_index('month').drop(['Dates'], axis=1)

#Taking control after values into a dataframe
control_after = national[(national["Dates"]< '2020-02-01') & (national["Dates"]>= '2019-10-01')]
control_after['month'] = control_after['Dates'].dt.month
control_after = control_after.set_index('month').drop(['Dates'], axis=1)

#Calculating percentage change between control before and after
control_diff = (control_after-control_before)/control_before

**National: Difference in Treatment and Control Groups**

In [95]:
#Calculating the difference in the differences of treatment & control
diff_diff = treatment_diff
diff_diff["Price"] = treatment_diff['Price'] - control_diff['Price'].mean()

**National: Plotting of DiD**

In [96]:
#Creating Figure for plotting
fig = go.Figure()

#Adding line for transaction count
fig.add_trace(go.Scatter(
    x=diff_diff['Dates'],
    y=diff_diff['Price'],
    name = "No of Transactions",
    line = dict(dash='dash')))

#Inserting key date annotations
fig.add_annotation(x = datetime.strptime('31-03-2020', '%d-%m-%Y'), y = 0.0412,
        xref = 'x', yref = 'y', text = 'Lockdown Announced', showarrow = True,
        arrowhead = 7, ax = 0, ay = -35)
fig.add_annotation(x = datetime.strptime('30-06-2020', '%d-%m-%Y'), y = -0.0255,
        xref = 'x', yref = 'y', text = 'Stamp Duty Holiday', showarrow = True,
        arrowhead = 7, ax = 0, ay = -65)
#Inserting line at 0
fig.add_hline(y = 0,line_width=1, line_color="red")

#Setting title and updating axis labels
fig.update_layout(
    title="Nationwide: Effect of COVID-19 on House Prices",
    xaxis_title="Date", yaxis_title="Price Effect")

fig.update_xaxes(dtick="M1", tickformat="%b\n%Y", ticklabelmode="period")

fig.show()

**London Median Price Calculation**

In [63]:
#### Create dataframe of city median monthly prices
DD_LDN = clean_df[clean_df["District"]=='LONDON']
DD_LDN = DD_LDN.groupby([(DD_LDN["Date of Transfer"].dt.year),
                          (DD_LDN["Date of Transfer"].dt.month)]).median()
DD_LDN = pd.DataFrame(DD_LDN['Price'])
DD_LDN['Dates'] = daterange
DD_LDN['Dates'] = pd.to_datetime(DD_LDN['Dates'], format = '%d-%m-%Y')
DD_LDN.columns = ['Price', 'Dates']
DD_LDN = DD_LDN.iloc[0:276,:]


**London: Treatment Group Differences**

In [76]:
#Caluclate the difference of the treatment
DD_LDN_TB = DD_LDN[(DD_LDN["Dates"]< '2019-10-01') & (DD_LDN["Dates"]>= '2019-02-01')]
DD_LDN_TB['month'] = DD_LDN_TB['Dates'].dt.month
DD_LDN_TB = DD_LDN_TB.set_index('month')


DD_LDN_TA = DD_LDN[(DD_LDN["Dates"]< '2020-10-01') & (DD_LDN["Dates"]>= '2020-02-01')]
DD_LDN_TA['month'] = DD_LDN_TA['Dates'].dt.month
DD_LDN_TA = DD_LDN_TA.set_index('month')

DD_LDN_T = DD_LDN_TA
DD_LDN_T['Price'] = (DD_LDN_TA["Price"]-DD_LDN_TB["Price"])/DD_LDN_TB["Price"]

**London: Control Group Differences**

In [77]:
#Caluclate the difference of the control
DD_LDN_CA = DD_LDN[(DD_LDN["Dates"]< '2020-02-01') & (DD_LDN["Dates"]>= '2019-10-01')]
DD_LDN_CA['month'] = DD_LDN_CA['Dates'].dt.month
DD_LDN_CA = DD_LDN_CA.set_index('month').drop(['Dates'], axis=1)

DD_LDN_CB = DD_LDN[(DD_LDN["Dates"]< '2019-02-01') & (DD_LDN["Dates"]>= '2018-10-01')]
DD_LDN_CB['month'] = DD_LDN_CB['Dates'].dt.month
DD_LDN_CB = DD_LDN_CB.set_index('month').drop(['Dates'], axis=1)

DD_LDN_C = (DD_LDN_CA-DD_LDN_CB)/DD_LDN_CB

**London: Differences in Treatment and Control Groups**

In [78]:
#Calculate the difference in the differences
DD_LDN_X = DD_LDN_T
DD_LDN_X["Price"] = DD_LDN_T['Price'] - DD_LDN_C['Price'].mean()

**London: Plotting of differences**

In [82]:
#Creating Figure for plotting
fig = go.Figure()

#Adding line for transaction count
fig.add_trace(go.Scatter(
    x=DD_LDN_X['Dates'],
    y=DD_LDN_X['Price'],
    name = "No of Transactions",
    line = dict(dash='dash')))

#Inserting key date annotations
fig.add_annotation(x = datetime.strptime('31-03-2020', '%d-%m-%Y'), y = -0.0085,
        xref = 'x', yref = 'y', text = 'Lockdown Announced', showarrow = True,
        arrowhead = 7, ax = 0, ay = 65)
fig.add_annotation(x = datetime.strptime('30-06-2020', '%d-%m-%Y'), y = -0.0389,
        xref = 'x', yref = 'y', text = 'Stamp Duty Holiday', showarrow = True,
        arrowhead = 7, ax = 0, ay = -90)
#Inserting line at 0
fig.add_hline(y = 0,line_width=1, line_color="red")

#Setting title and updating axis labels
fig.update_layout(
    title="London: Effect of COVID-19 on House Prices",
    xaxis_title="Date", yaxis_title="Price Effect")

fig.update_xaxes(dtick="M1", tickformat="%b\n%Y", ticklabelmode="period")

fig.show()

In [103]:
#Create dataframe of city median monthly prices
DD_BRI = clean_df[clean_df["District"]=='BRISTOL']
DD_BRI = DD_BRI.groupby([(DD_BRI["Date of Transfer"].dt.year),
                          (DD_BRI["Date of Transfer"].dt.month)]).median()
DD_BRI = pd.DataFrame(DD_BRI['Price'])
DD_BRI['Dates'] = daterange
DD_BRI['Dates'] = pd.to_datetime(DD_BRI['Dates'], format = '%d-%m-%Y')
DD_BRI.columns = ['Price', 'Dates']
DD_BRI = DD_BRI.iloc[0:276,:]

In [104]:
#Caluclate the difference of the treatment
DD_BRI_TB = DD_BRI[(DD_BRI["Dates"]< '2019-10-01') & (DD_BRI["Dates"]>= '2019-02-01')]
DD_BRI_TB['month'] = DD_BRI_TB['Dates'].dt.month
DD_BRI_TB = DD_BRI_TB.set_index('month')


DD_BRI_TA = DD_BRI[(DD_BRI["Dates"]< '2020-10-01') & (DD_BRI["Dates"]>= '2020-02-01')]
DD_BRI_TA['month'] = DD_BRI_TA['Dates'].dt.month
DD_BRI_TA = DD_BRI_TA.set_index('month')

DD_BRI_T = DD_BRI_TA
DD_BRI_T['Price'] = (DD_BRI_TA["Price"]-DD_BRI_TB["Price"])/DD_BRI_TB["Price"]

In [105]:
DD_BRI_CA = DD_BRI[(DD_BRI["Dates"]< '2020-02-01') & (DD_BRI["Dates"]>= '2019-10-01')]
DD_BRI_CA['month'] = DD_BRI_CA['Dates'].dt.month
DD_BRI_CA = DD_BRI_CA.set_index('month').drop(['Dates'], axis=1)

DD_BRI_CB = DD_BRI[(DD_BRI["Dates"]< '2019-02-01') & (DD_BRI["Dates"]>= '2018-10-01')]
DD_BRI_CB['month'] = DD_BRI_CB['Dates'].dt.month
DD_BRI_CB = DD_BRI_CB.set_index('month').drop(['Dates'], axis=1)

DD_BRI_C = (DD_BRI_CA-DD_BRI_CB)/DD_BRI_CB

In [106]:
#Calculate the difference in the differences
DD_BRI_X = DD_BRI_T
DD_BRI_X["Price"] = DD_BRI_T['Price'] - DD_BRI_C['Price'].mean()

In [107]:
#Creating Figure for plotting
fig = go.Figure()

#Adding line for transaction count
fig.add_trace(go.Scatter(
    x=DD_BRI_X['Dates'],
    y=DD_BRI_X['Price'],
    name = "No of Transactions",
    line = dict(dash='dash')))

#Inserting key date annotations
fig.add_annotation(x = datetime.strptime('31-03-2020', '%d-%m-%Y'), y = -0.026,
        xref = 'x', yref = 'y', text = 'Lockdown Announced', showarrow = True,
        arrowhead = 7, ax = 0, ay = 70)
fig.add_annotation(x = datetime.strptime('30-06-2020', '%d-%m-%Y'), y = -0.015,
        xref = 'x', yref = 'y', text = 'Stamp Duty Holiday', showarrow = True,
        arrowhead = 7, ax = 0, ay = 70)
#Inserting line at 0
fig.add_hline(y = 0,line_width=1, line_color="red")

#Setting title and updating axis labels
fig.update_layout(
    title="Bristol: Effect of COVID-19 on House Prices",
    xaxis_title="Date", yaxis_title="Price Effect")

fig.update_xaxes(dtick="M1", tickformat="%b\n%Y", ticklabelmode="period")

fig.show()

In [97]:
#Create dataframe of city median monthly prices
DD_CAM = clean_df[clean_df["District"]=='BIRMINGHAM']
DD_CAM = DD_CAM.groupby([(DD_CAM["Date of Transfer"].dt.year),
                          (DD_CAM["Date of Transfer"].dt.month)]).median()
DD_CAM = pd.DataFrame(DD_CAM['Price'])
DD_CAM['Dates'] = daterange
DD_CAM['Dates'] = pd.to_datetime(DD_CAM['Dates'], format = '%d-%m-%Y')
DD_CAM.columns = ['Price', 'Dates']
DD_CAM = DD_CAM.iloc[0:276,:]

In [98]:
#Caluclate the difference of the treatment
DD_CAM_TB = DD_CAM[(DD_CAM["Dates"]< '2019-10-01') & (DD_CAM["Dates"]>= '2019-02-01')]
DD_CAM_TB['month'] = DD_CAM_TB['Dates'].dt.month
DD_CAM_TB = DD_CAM_TB.set_index('month')


DD_CAM_TA = DD_CAM[(DD_CAM["Dates"]< '2020-10-01') & (DD_CAM["Dates"]>= '2020-02-01')]
DD_CAM_TA['month'] = DD_CAM_TA['Dates'].dt.month
DD_CAM_TA = DD_CAM_TA.set_index('month')

DD_CAM_T = DD_CAM_TA
DD_CAM_T['Price'] = (DD_CAM_TA["Price"]-DD_CAM_TB["Price"])/DD_CAM_TB["Price"]

In [99]:
DD_CAM_CA = DD_CAM[(DD_CAM["Dates"]< '2020-02-01') & (DD_CAM["Dates"]>= '2019-10-01')]
DD_CAM_CA['month'] = DD_CAM_CA['Dates'].dt.month
DD_CAM_CA = DD_CAM_CA.set_index('month').drop(['Dates'], axis=1)

DD_CAM_CB = DD_CAM[(DD_CAM["Dates"]< '2019-02-01') & (DD_CAM["Dates"]>= '2018-10-01')]
DD_CAM_CB['month'] = DD_CAM_CB['Dates'].dt.month
DD_CAM_CB = DD_CAM_CB.set_index('month').drop(['Dates'], axis=1)

DD_CAM_C = (DD_CAM_CA-DD_CAM_CB)/DD_CAM_CB

In [100]:
#Calculate the difference in the differences
DD_CAM_X = DD_CAM_T
DD_CAM_X["Price"] = DD_CAM_T['Price'] - DD_CAM_C['Price'].mean()

In [102]:
#Creating Figure for plotting
fig = go.Figure()

#Adding line for transaction count
fig.add_trace(go.Scatter(
    x=DD_CAM_X['Dates'],
    y=DD_CAM_X['Price'],
    name = "No of Transactions",
    line = dict(dash='dash')))

#Inserting key date annotations
fig.add_annotation(x = datetime.strptime('31-03-2020', '%d-%m-%Y'), y = -0.0086,
        xref = 'x', yref = 'y', text = 'Lockdown Announced', showarrow = True,
        arrowhead = 7, ax = 0, ay = 45)
fig.add_annotation(x = datetime.strptime('30-06-2020', '%d-%m-%Y'), y = -0.045,
        xref = 'x', yref = 'y', text = 'Stamp Duty Holiday', showarrow = True,
        arrowhead = 7, ax = 0, ay = 35)
#Inserting line at 0
fig.add_hline(y = 0,line_width=1, line_color="red")

#Setting title and updating axis labels
fig.update_layout(
    title="Birmingham: Effect of COVID-19 on House Prices",
    xaxis_title="Date", yaxis_title="Price Effect")

fig.update_xaxes(dtick="M1", tickformat="%b\n%Y", ticklabelmode="period")

fig.show()