In [2]:
import os
import warnings
warnings.filterwarnings("ignore")

import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import numpy as np # linear algebra
from datetime import datetime
from numpy import arange,array,ones
from scipy import stats

from chart_studio import plotly as py
import plotly.graph_objs as go
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

from datetime import datetime

import math

**Loading Data**

In [3]:
#names of the csv files to be read
csv_file_list = ["pp-2021.csv", "pp-2020.csv", "pp-2019.csv", "pp-2018.csv"]

#new list to store read csv files into
list_of_dataframes = []

#reading of each file in csv_file_list and storage in list_of_dataframes with column headers
for filename in csv_file_list:
    list_of_dataframes.append(pd.read_csv(filename, names = ['ID','Price', 
                                                             'Date of Transfer', 'Post Code',
                                                             'Property Type', 'Old/New','Duration','Address 1',
                                                             'Address 2', 'Address 3', 'Town/City', 'District',
                                                             'Borough', 'Region', 'PPDCategoryType', 'RecordStatus']))

#merging of each yearly csv into one data frame
merged_df = pd.concat(list_of_dataframes)

#rows and columns of the merged dataframe
merged_df.shape

(3095100, 16)

**Checking Data Integrity**

In [4]:
#preview of inital rows of merged dataframe
merged_df.head()

Unnamed: 0,ID,Price,Date of Transfer,Post Code,Property Type,Old/New,Duration,Address 1,Address 2,Address 3,Town/City,District,Borough,Region,PPDCategoryType,RecordStatus
0,{C3C3F9B5-F41B-362B-E053-6B04A8C03ACC},420000,2021-01-22 00:00,PE11 3PE,D,N,F,13,,MARKET WAY,PINCHBECK,SPALDING,SOUTH HOLLAND,LINCOLNSHIRE,A,A
1,{C3C3F9B5-F41C-362B-E053-6B04A8C03ACC},295000,2021-02-26 00:00,LN1 2XS,T,N,F,1,,HAYES YARD,INGHAM,LINCOLN,WEST LINDSEY,LINCOLNSHIRE,A,A
2,{C3C3F9B5-F41D-362B-E053-6B04A8C03ACC},96000,2021-02-11 00:00,PE12 7DS,T,N,F,8,,CHANCERY LANE,HOLBEACH,SPALDING,SOUTH HOLLAND,LINCOLNSHIRE,A,A
3,{C3C3F9B5-F41F-362B-E053-6B04A8C03ACC},225000,2021-02-18 00:00,LN12 2JJ,D,N,F,5,,YOULGRAVE AVENUE,SUTTON ON SEA,MABLETHORPE,EAST LINDSEY,LINCOLNSHIRE,A,A
4,{C3C3F9B5-F420-362B-E053-6B04A8C03ACC},110000,2021-03-12 00:00,NG33 4BY,S,N,F,17,,WOOD END,ROPSLEY,GRANTHAM,SOUTH KESTEVEN,LINCOLNSHIRE,A,A


**Duplicate Value Analysis**

In [5]:
#count of unique IDs
merged_df['ID'].nunique()

3095093

In [6]:
#finding duplicated ID number
merged_df['ID'][merged_df['ID'].duplicated()]

125840    {BEF7EBBE-E07B-7A76-E053-6B04A8C092F7}
409068    {BEF7EBBF-45B5-7A76-E053-6B04A8C092F7}
423841    {BEF7EBBF-5CCB-7A76-E053-6B04A8C092F7}
564921    {BEF7EBBE-F420-7A76-E053-6B04A8C092F7}
739849    {BEF7EBBF-A849-7A76-E053-6B04A8C092F7}
741827    {BEF7EBBF-B5BB-7A76-E053-6B04A8C092F7}
754226    {BEF7EBBF-8BE3-7A76-E053-6B04A8C092F7}
Name: ID, dtype: object

In [7]:
#locating duplicated rows for review
merged_df[merged_df["ID"]=="{BEF7EBBE-D091-7A76-E053-6B04A8C092F7}"]

Unnamed: 0,ID,Price,Date of Transfer,Post Code,Property Type,Old/New,Duration,Address 1,Address 2,Address 3,Town/City,District,Borough,Region,PPDCategoryType,RecordStatus
401859,{BEF7EBBE-D091-7A76-E053-6B04A8C092F7},710000,2020-12-17 00:00,IG9 5RF,F,N,L,ROEBUCK HEIGHTS,FLAT 11,NORTH END,,BUCKHURST HILL,EPPING FOREST,ESSEX,A,A


In [8]:
#removal of second duplicated entry
merged_df = merged_df.drop_duplicates(subset=['ID'], keep="first")

merged_df.shape

(3095093, 16)

**Data Type Analysis**

In [9]:
#review of column data types
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3095093 entries, 0 to 1032727
Data columns (total 16 columns):
 #   Column            Dtype 
---  ------            ----- 
 0   ID                object
 1   Price             int64 
 2   Date of Transfer  object
 3   Post Code         object
 4   Property Type     object
 5   Old/New           object
 6   Duration          object
 7   Address 1         object
 8   Address 2         object
 9   Address 3         object
 10  Town/City         object
 11  District          object
 12  Borough           object
 13  Region            object
 14  PPDCategoryType   object
 15  RecordStatus      object
dtypes: int64(1), object(15)
memory usage: 401.4+ MB


In [10]:
#converstion of Date of Transfer column from object to datettime
merged_df['Date of Transfer'] = pd.to_datetime(merged_df['Date of Transfer'])

#creation of columns containing transaction year & month for use in further analysis
merged_df['Year'] = merged_df['Date of Transfer'].dt.year
merged_df['Month'] = merged_df['Date of Transfer'].dt.month

**Price Distribution Analysis**

In [11]:
#summary statistics for Price
merged_df['Price'].describe()

count    3.095093e+06
mean     3.572154e+05
std      1.755787e+06
min      1.000000e+00
25%      1.500000e+05
50%      2.389500e+05
75%      3.700000e+05
max      6.300000e+08
Name: Price, dtype: float64

In [12]:
#0.5th quantile (99.5% of records are greater than this value)
print(merged_df.Price.quantile(0.005))

#99.5th quantile (99.5% of records are lesser than this value)
print(merged_df.Price.quantile(0.995))

10000.0
2950000.0


In [87]:
#threshold implementation for min & max price
clean_df = merged_df.loc[(merged_df['Price'] <= (3000000)) & (merged_df['Price'] >= (10000)),]

count                 3066033
unique                   1270
top       2019-06-28 00:00:00
freq                    20374
first     2018-01-01 00:00:00
last      2021-06-28 00:00:00
Name: Date of Transfer, dtype: object

**Transaction Count**

In [89]:
#Calculate the total number of property transactions each month
transactions = clean_df.groupby([(clean_df["Date of Transfer"].dt.year),
                                 (clean_df["Date of Transfer"].dt.month)]).count()
transactions = pd.DataFrame(transactions['ID'])

# Create a list of each month from January 2018 to June 2021
daterange = pd.date_range('2018-01-01','2021-06-30', 
              freq='1M')
daterange = [d.strftime('%d-%m-%Y') for d in daterange]

#Insert a date column to transactions data frame using the daterange created
transactions['Dates'] = daterange
transactions['Dates'] = pd.to_datetime(transactions['Dates'], format = '%d-%m-%Y')
transactions.columns = ['Count', 'Dates']

In [96]:
#Creating Figure for plotting
fig = go.Figure()

#Adding line for transaction count
fig.add_trace(go.Scatter(
    x=transactions['Dates'],
    y=transactions['Count'],
    name = "No of Transactions",
    line = dict(dash='dash')))

#Inserting graphic to denote pandemic area
fig.add_vrect(x0="2020-03-31", x1="2021-06-30", annotation_text="Pandemic Area", 
              annotation_position="bottom left", fillcolor="red", opacity=0.1, line_width=0)

#Inserting key date annotations
fig.add_annotation(x = datetime.strptime('31-03-2020', '%d-%m-%Y'), y = 70330,
        xref = 'x', yref = 'y', text = 'Lockdown Announced', showarrow = True,
        arrowhead = 7, ax = 0, ay = -80)
fig.add_annotation(x = datetime.strptime('30-06-2020', '%d-%m-%Y'), y = 52880,
        xref = 'x', yref = 'y', text = 'Stamp Duty Holiday', showarrow = True,
        arrowhead = 7, ax = 0, ay = -70)

#Setting title and updating axis labels
fig.update_layout(
    title="Monthly Count of Property Transactions Completed - Jan 2018 to June 2021",
    xaxis_title="Date", yaxis_title="Number of Transactions")

fig.update_xaxes(dtick="M1", tickformat="%b\n%Y", ticklabelmode="period")

fig.show()

**Range of Data Adjustment**

In [16]:
#Dataframe for all data
df_complete = clean_df[clean_df['Date of Transfer']< '2021-04-01']
#Dataframes for select years
df_21 = df_complete[df_complete['Year']== 2021]
df_20 = df_complete[df_complete['Year']== 2020]
df_19 = df_complete[df_complete['Year']== 2019]
df_18 = df_complete[df_complete['Year']== 2018]

**Median Price Analysis**

In [97]:
# Create a list of a day from each month from January 2018 to March 2021
daterange = pd.date_range('2018-01-01','2021-03-31', 
              freq='1M')

daterange = [d.strftime('%d-%m-%Y') for d in daterange]
daterange


# Use group by to calculate the median price of each month
median = df_complete.groupby([(df_complete["Date of Transfer"].dt.year),
                              (df_complete["Date of Transfer"].dt.month)]).median()
median = pd.DataFrame(median['Price'])
median['Dates'] = daterange
median['Dates'] = pd.to_datetime(median['Dates'], format = '%d-%m-%Y')
median.columns = ['Price', 'Dates']

In [103]:
fig = go.Figure()

#Adding line for Median Price
fig.add_trace(go.Scatter(
    x=median['Dates'],
    y=median['Price'],
    name = "Median House Price GBP",
    line = dict(color='red')))

#Inserting graphic to denote pandemic area
fig.add_vrect(x0="2020-03-31", x1="2021-03-30", annotation_text="Pandemic Area", 
              annotation_position="top left", fillcolor="red", opacity=0.1, line_width=0)

#Inserting key date annotations
fig.add_annotation(x = datetime.strptime('31-03-2020', '%d-%m-%Y'), y = 240000,
        xref = 'x', yref = 'y', text = 'Lockdown Announced', showarrow = True,
        arrowhead = 7, ax = 0, ay = -80)
fig.add_annotation(x = datetime.strptime('30-06-2020', '%d-%m-%Y'), y = 235000,
        xref = 'x', yref = 'y', text = 'Stamp Duty Holiday', showarrow = True,
        arrowhead = 7, ax = 0, ay = -70)

#Setting title and updating axis labels
fig.update_layout(
    title="Median Property Price (Jan 2018 - Mar 2021)",
    xaxis_title="Date", yaxis_title="Median Price (GBP)")

fig.update_xaxes(dtick="M1", tickformat="%b\n%Y", ticklabelmode="period")

fig.show()

In [69]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(
    x=transactions['Dates'],
    y=transactions['Count'],
    name = "No of Transactions",
    line = dict(dash='dash')),
    secondary_y=True,
)

fig.add_trace(
    go.Scatter(
    x=median['Dates'],
    y=median['Price'],
    name = "Median House Price GBP"),
    secondary_y=False,
)

#Inserting shaded area to denote first lockdown
fig.add_vrect(x0="2020-03-26", x1="2020-07-04",fillcolor="red", opacity=0.1, line_width=0)
#Inserting shaded area to denote second lockdown
fig.add_vrect(x0="2020-10-31", x1="2020-12-02",fillcolor="red", opacity=0.1, line_width=0)
#Inserting shaded area to denote third lockdon
fig.add_vrect(x0="2021-01-06", x1="2021-03-31",fillcolor="red", opacity=0.1, line_width=0)

#Inserting line for First Lockdown
fig.add_vline(x = datetime.strptime('26-03-2020', '%d-%m-%Y'),line_width=1.5, line_dash="dash", line_color="red")
#Inserting line for Stampt duty holiday start
fig.add_vline(x = datetime.strptime('30-06-2020','%d-%m-%Y'),line_width=1.5, line_dash="dash", line_color="blue")
#Inserting line for Stampt duty holiday end
fig.add_vline(x = datetime.strptime('31-03-2021', '%d-%m-%Y'),line_width=1.5, line_dash="dash", line_color="blue")
        
fig.update_layout(
    title="Monthly Median Price and Count of Transactions Completed (Jan 2018 - March 2020)",
    xaxis_title="Date")

fig.update_yaxes(title_text="Median Price", secondary_y=False)
fig.update_yaxes(title_text="Number of Transactions", secondary_y=True)

fig.update_xaxes(
    dtick="M1",
    tickformat="%b\n%Y",
    ticklabelmode="period")

fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=-0.33,
    xanchor="center",
    x=0.5
))

fig.show()

**Property Types**

Detached House Median Price Calculation

In [20]:
#Taking a specific house type from the full dataset and creating individual data frame
detached = df_complete[df_complete['Property Type']=='D']

#Grouping by year & month and calculating median price
detached = detached.groupby([(detached["Date of Transfer"].dt.year),
                          (detached["Date of Transfer"].dt.month)]).median()

#Removing unnecessary columns
detached = pd.DataFrame(detached['Price'])

#Inserting date-range-index for plotting
detached['Dates'] = daterange
detached['Dates'] = pd.to_datetime(detached['Dates'], format = '%d-%m-%Y')

#Rename coulumns and set index
detached.columns = ['Median Price', 'Dates']
detached = detached.set_index("Dates")

Semi-Detached Median Price Calculation

In [21]:
#Taking a specific house type from the full dataset and creating individual data frame
semi_detached = df_complete[df_complete['Property Type']=='S']

#Grouping by year & month and calculating median price
semi_detached = semi_detached.groupby([(semi_detached["Date of Transfer"].dt.year),
                          (semi_detached["Date of Transfer"].dt.month)]).median()

#Reducing columns and inserting date-range-index
semi_detached = pd.DataFrame(semi_detached['Price'])

#Inserting date-range-index for plotting
semi_detached['Dates'] = daterange
semi_detached['Dates'] = pd.to_datetime(semi_detached['Dates'], format = '%d-%m-%Y')

#Rename coulumns and reset index
semi_detached.columns = ['Median Price', 'Dates']
semi_detached = semi_detached.set_index("Dates")

Terraced Median Price Calculation

In [22]:
#Taking a specific house type from the full dataset and creating individual data frame
Terraced = df_complete[df_complete['Property Type']=='T']

#Grouping by year & month and calculating median price
Terraced = Terraced.groupby([(Terraced["Date of Transfer"].dt.year),
                          (Terraced["Date of Transfer"].dt.month)]).median()

#Reducing columns and inserting date-range-index
Terraced = pd.DataFrame(Terraced['Price'])

#Inserting date-range-index for plotting
Terraced['Dates'] = daterange
Terraced['Dates'] = pd.to_datetime(Terraced['Dates'], format = '%d-%m-%Y')

#Rename coulumns and reset index
Terraced.columns = ['Median Price', 'Dates']
Terraced = Terraced.set_index("Dates")

Flat Median Price Calculation

In [23]:
#Taking a specific house type from the full dataset and creating individual data frame
Flat = df_complete[df_complete['Property Type']=='F']

#Grouping by year & month and calculating median price
Flat = Flat.groupby([(Flat["Date of Transfer"].dt.year),
                          (Flat["Date of Transfer"].dt.month)]).median()

#Reducing columns and inserting date-range-index
Flat = pd.DataFrame(Flat['Price'])

#Inserting date-range-index for plotting
Flat['Dates'] = daterange
Flat['Dates'] = pd.to_datetime(Flat['Dates'], format = '%d-%m-%Y')

#Rename coulumns and reset index
Flat.columns = ['Median Price', 'Dates']
Flat = Flat.set_index("Dates")

Plotting of Property Types - Median Price

In [118]:
#Creating a new figure
fig = go.Figure()

#Adding line for detached houses
fig.add_trace(go.Scatter(
    x=detached.index,
    y=detached['Median Price'],
    name = "Detached",
    line = dict(color = 'blue'),
    opacity = 0.8))

#Adding line for Semi-detached houses
fig.add_trace(go.Scatter(
    x=semi_detached.index,
    y=semi_detached['Median Price'],
    name = "Semi Detached",
    line = dict(color = 'red'),
    opacity = 0.8))

#Adding line for terraced houses
fig.add_trace(go.Scatter(
    x=Terraced.index,
    y=Terraced['Median Price'],
    name = "Terraced",
    line = dict(color = 'Green'),
    opacity = 0.8))

#Adding line for flats
fig.add_trace(go.Scatter(
    x=Flat.index,
    y=Flat['Median Price'],
    name = "Flats/Maisonettes",
    line = dict(color = 'black'),
    opacity = 0.8))

#Inserting shaded area to denote first lockdown
fig.add_vrect(x0="2020-03-26", x1="2020-07-04",fillcolor="red", opacity=0.1, line_width=0)
#Inserting shaded area to denote second lockdown
fig.add_vrect(x0="2020-10-31", x1="2020-12-02",fillcolor="red", opacity=0.1, line_width=0)
#Inserting shaded area to denote third lockdon
fig.add_vrect(x0="2021-01-06", x1="2021-03-31",fillcolor="red", opacity=0.1, line_width=0)

#Inserting line for First Lockdown
fig.add_vline(x = datetime.strptime('26-03-2020', '%d-%m-%Y'),line_width=1.5, line_dash="dash", line_color="red")
#Inserting line for Stampt duty holiday start
fig.add_vline(x = datetime.strptime('30-06-2020','%d-%m-%Y'),line_width=1.5, line_dash="dash", line_color="blue")
#Inserting line for Stampt duty holiday end
fig.add_vline(x = datetime.strptime('31-03-2021', '%d-%m-%Y'),line_width=1.5, line_dash="dash", line_color="blue")


#Adding figure and axis titles
fig.update_layout(
    title="Median House Price by Property Type (Jan 2018 - Mar 2021)",
    xaxis_title="Date",
    yaxis_title="Median Price (GBP)")

#Updating x-axis ticks to be months
fig.update_xaxes(
    dtick="M1",
    tickformat="%b\n%Y",
    ticklabelmode="period")

#Rearranging legend to bottom of figure
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=-0.33,
    xanchor="center",
    x=0.5
))

#Displaying figure
fig.show()

Detached Share of Transactions Calculation

In [75]:
#Taking specific house type from full data frame
DetachedShare = df_complete[df_complete['Property Type']=='D']

#Grouping by year & month and counting number of transactions per month
DetachedShare = DetachedShare.groupby([(DetachedShare["Date of Transfer"].dt.year),
                          (DetachedShare["Date of Transfer"].dt.month)]).count()

#Reducing columns and inserting total transactions per month from previous 'transactions' dataframe
DetachedShare = pd.DataFrame(DetachedShare['ID'])
DetachedShare['Total'] = transactions['Count']

#Calculating share of transactions by dividing property type count by total
DetachedShare['Share'] = DetachedShare['ID'] / DetachedShare['Total']

#Inserting date-range-index
DetachedShare['Dates'] = daterange
DetachedShare['Dates'] = pd.to_datetime(DetachedShare['Dates'], format = '%d-%m-%Y')

#Rename coulumns and reset index
DetachedShare.columns = ['Count', 'All', 'Share', 'Dates']
DetachedShare = DetachedShare.set_index("Dates")

Semi-detached Share of Transactions Calculation

In [76]:
#Taking specific house type from full data frame
Semi_detachedShare = df_complete[df_complete['Property Type']=='S']

#Grouping by year & month and counting number of transactions per month
Semi_detachedShare = Semi_detachedShare.groupby([(Semi_detachedShare["Date of Transfer"].dt.year),
                          (Semi_detachedShare["Date of Transfer"].dt.month)]).count()

#Reducing columns and inserting total transactions per month from previous 'transactions' dataframe
Semi_detachedShare = pd.DataFrame(Semi_detachedShare['ID'])
Semi_detachedShare['Total'] = transactions['Count']

#Calculating share of transactions by dividing property type count by total
Semi_detachedShare['Share'] = Semi_detachedShare['ID'] / Semi_detachedShare['Total']

#Inserting date-range-index
Semi_detachedShare['Dates'] = daterange
Semi_detachedShare['Dates'] = pd.to_datetime(Semi_detachedShare['Dates'], format = '%d-%m-%Y')

#Renaming coulumns and reset index
Semi_detachedShare.columns = ['Count', 'All', 'Share', 'Dates']
Semi_detachedShare = Semi_detachedShare.set_index("Dates")

Terraced Share of Transactions Calculation

In [77]:
#Taking specific house type from full data frame
TerracedShare = df_complete[df_complete['Property Type']=='T']

#Grouping by year & month and counting number of transactions per month
TerracedShare = TerracedShare.groupby([(TerracedShare["Date of Transfer"].dt.year),
                          (TerracedShare["Date of Transfer"].dt.month)]).count()

#Reducing columns and inserting total transactions per month from previous 'transactions' dataframe
TerracedShare = pd.DataFrame(TerracedShare['ID'])
TerracedShare['Total'] = transactions['Count']

#Calculating share of transactions by dividing property type count by total count
TerracedShare['Share'] = TerracedShare['ID'] / TerracedShare['Total']

#Inserting date-range-index
TerracedShare['Dates'] = daterange
TerracedShare['Dates'] = pd.to_datetime(TerracedShare['Dates'], format = '%d-%m-%Y')

#Renaming coulumns and resetting index
TerracedShare.columns = ['Count', 'All', 'Share', 'Dates']
TerracedShare = TerracedShare.set_index("Dates")

Flats Share of Transactions Calculation

In [78]:
#Taking specific house type from full data frame
FlatShare = df_complete[df_complete['Property Type']=='F']

#Grouping by year & month and counting number of transactions per month
FlatShare = FlatShare.groupby([(FlatShare["Date of Transfer"].dt.year),
                          (FlatShare["Date of Transfer"].dt.month)]).count()

#Reducing columns and inserting total transactions per month from previous 'transactions' dataframe
FlatShare = pd.DataFrame(FlatShare['ID'])
FlatShare['Total'] = transactions['Count']

#Calculating share of transactions by dividing property type count by total count
FlatShare['Share'] = FlatShare['ID'] / FlatShare['Total']

#Inserting date-range-index
FlatShare['Dates'] = daterange
FlatShare['Dates'] = pd.to_datetime(FlatShare['Dates'], format = '%d-%m-%Y')

#Renaming coulumns and resetting index
FlatShare.columns = ['Count', 'All', 'Share', 'Dates']
FlatShare = FlatShare.set_index("Dates")

Plotting of Property Types - Share of Transactions

In [79]:
#Creating a new figure
fig = go.Figure()

#Adding line for detached houses
fig.add_trace(go.Scatter(
    x=DetachedShare.index,
    y=DetachedShare['Share'],
    name = "Detached",
    line = dict(color = 'blue'),
    opacity = 0.8))

#Adding line for semi-detached houses
fig.add_trace(go.Scatter(
    x=Semi_detachedShare.index,
    y=Semi_detachedShare['Share'],
    name = "Semi Detached",
    line = dict(color = 'red'),
    opacity = 0.8))

#Adding line for terraced houses
fig.add_trace(go.Scatter(
    x=TerracedShare.index,
    y=TerracedShare['Share'],
    name = "Terraced",
    line = dict(color = 'Green'),
    opacity = 0.8))

#Adding line for flats
fig.add_trace(go.Scatter(
    x=FlatShare.index,
    y=FlatShare['Share'],
    name = "Flats/Maisonettes",
    line = dict(color = 'black'),
    opacity = 0.8))

#Inserting shaded area to denote first lockdown
fig.add_vrect(x0="2020-03-26", x1="2020-07-04",fillcolor="red", opacity=0.1, line_width=0)
#Inserting shaded area to denote second lockdown
fig.add_vrect(x0="2020-10-31", x1="2020-12-02",fillcolor="red", opacity=0.1, line_width=0)
#Inserting shaded area to denote third lockdon
fig.add_vrect(x0="2021-01-06", x1="2021-03-31",fillcolor="red", opacity=0.1, line_width=0)

#Inserting line for First Lockdown
fig.add_vline(x = datetime.strptime('26-03-2020', '%d-%m-%Y'),line_width=1.5, line_dash="dash", line_color="red")
#Inserting line for Stampt duty holiday start
fig.add_vline(x = datetime.strptime('30-06-2020','%d-%m-%Y'),line_width=1.5, line_dash="dash", line_color="blue")
#Inserting line for Stampt duty holiday end
fig.add_vline(x = datetime.strptime('31-03-2021', '%d-%m-%Y'),line_width=1.5, line_dash="dash", line_color="blue")

#Adding figure and axis titles
fig.update_layout(
    title="Property Type: Share of Total Transactions (Jan 2018 - Mar 2021)",
    xaxis_title="Date", yaxis_title="Share of Transactions")

#Updating x-axis ticks to be months
fig.update_xaxes(dtick="M1", tickformat="%b\n%Y", ticklabelmode="period")

#Rearranging legend to bottom of figure
fig.update_layout(legend=dict(
    orientation="h", yanchor="bottom", y=-0.33, xanchor="center", x=0.5))

#Displaying figure
fig.show()

Trends in house price are reflected in transaction count. Demadnf for Flats did not recover to pre-covid levels after restrictions were lifted. Detached houses can be seen to make the biggest increase following the inital covid dip in transaction count indicating there was especially more demand for them.

**Timeline of House Sales: New vs Old**

In [84]:
#Filter complete dataset to only new build properties
new_transactions = df_complete.loc[(df_complete["Old/New"] == "Y")]

#Calculate the total number of new build property transactions each month
Newcount = new_transactions.groupby([(new_transactions["Date of Transfer"].dt.year),
                                     (new_transactions["Date of Transfer"].dt.month)]).count()
Newcount = pd.DataFrame(Newcount['ID'])

#Add total monthly transactions as column in Newcount dataframe
Newcount['Total'] = transactions['Count']

#Calculate share of new build transactions
Newcount['Share'] = Newcount['ID'] / Newcount['Total']

#Format Newcount dataframe for plotting
Newcount['Dates'] = daterange
Newcount['Dates'] = pd.to_datetime(Newcount['Dates'], format = '%d-%m-%Y')
Newcount.columns = ['Count', 'All', 'Share', 'Dates']
Newcount = Newcount.iloc[0:276,:]

In [86]:
#Creating a new figure
fig = go.Figure()

#Adding line for new property share
fig.add_trace(go.Scatter(
    x=Newcount['Dates'],
    y=Newcount['Share'],
    name = "Share of New Build Transactions",
    line = dict(color = '#7F7F7F'),
    opacity = 0.8))
              
#Inserting shaded area to denote first lockdown
fig.add_vrect(x0="2020-03-26", x1="2020-07-04",fillcolor="red", opacity=0.1, line_width=0)
#Inserting shaded area to denote second lockdown
fig.add_vrect(x0="2020-10-31", x1="2020-12-02",fillcolor="red", opacity=0.1, line_width=0)
#Inserting shaded area to denote third lockdon
fig.add_vrect(x0="2021-01-06", x1="2021-03-31",fillcolor="red", opacity=0.1, line_width=0)

#Inserting line for First Lockdown
fig.add_vline(x = datetime.strptime('26-03-2020', '%d-%m-%Y'),line_width=1.5, line_dash="dash", line_color="red")
#Inserting line for Stampt duty holiday start
fig.add_vline(x = datetime.strptime('30-06-2020','%d-%m-%Y'),line_width=1.5, line_dash="dash", line_color="blue")
#Inserting line for Stampt duty holiday end
fig.add_vline(x = datetime.strptime('31-03-2021', '%d-%m-%Y'),line_width=1.5, line_dash="dash", line_color="blue")

#Adding figure and axis titles
fig.update_layout(
    title="New Build: Share of Total Transactions (Jan 2018 - Mar 2021)",
    xaxis_title="Date", yaxis_title="Share of Transactions")

#Updating x-axis ticks to be months
fig.update_xaxes(dtick="M1",tickformat="%b\n%Y",ticklabelmode="period")

#Rearranging legend to bottom of figure
fig.update_layout(legend=dict(orientation="h",yanchor="bottom",y=-0.33,xanchor="center",x=0.5))

#Displaying figure
fig.show()