# 1: Gathering and Processing Raw Data

## Pulling data from Tilastokeskus, Statistics Finland’s free-of-charge statistical databases

In [38]:
# Suppress the warnings
import warnings
warnings.filterwarnings('ignore')

In [7]:
import requests
import json
import pandas as pd
import os
import glob
from datetime import datetime
from datetime import date, timedelta
from dateutil.relativedelta import relativedelta

### Gathering the Finnish costs of building data

In [8]:
# API's url
url='https://pxdata.stat.fi:443/PxWeb/api/v1/en/StatFin/rki/statfin_rki_pxt_11na.px'

In [9]:
# query taken from the Tilastokeskus's data page
query = {
  "query": [
    {
      "code": "Perusvuosi",
      "selection": {
        "filter": "item",
        "values": [
          "1990_100"
        ]
      }
    },
    {
      "code": "Indeksi",
      "selection": {
        "filter": "item",
        "values": [
          "Kokonaisindeksi",
          "Työpanokset",
          "Tarvikepanokset",
          "Palvelut"
        ]
      }
    },
    {
      "code": "Tiedot",
      "selection": {
        "filter": "item",
        "values": [
          "pisteluku",
          "vuosimuutos"
        ]
      }
    }
  ],
  "response": {
    "format": "json-stat2"
  }
}

In [10]:
session = requests.Session()

response = session.post(url, json=query)
response_json = json.loads(response.content.decode('utf-8-sig'))

response_json

{'class': 'dataset',
 'label': 'Building cost index by type of cost, annual data by Year, Base year, Index and Information',
 'source': 'Statistics Finland, building cost index',
 'updated': '2023-01-13T06:00:00Z',
 'id': ['Vuosi', 'Perusvuosi', 'Indeksi', 'Tiedot'],
 'size': [33, 1, 4, 2],
 'dimension': {'Vuosi': {'extension': {'show': 'value'},
   'label': 'Year',
   'category': {'index': {'1990': 0,
     '1991': 1,
     '1992': 2,
     '1993': 3,
     '1994': 4,
     '1995': 5,
     '1996': 6,
     '1997': 7,
     '1998': 8,
     '1999': 9,
     '2000': 10,
     '2001': 11,
     '2002': 12,
     '2003': 13,
     '2004': 14,
     '2005': 15,
     '2006': 16,
     '2007': 17,
     '2008': 18,
     '2009': 19,
     '2010': 20,
     '2011': 21,
     '2012': 22,
     '2013': 23,
     '2014': 24,
     '2015': 25,
     '2016': 26,
     '2017': 27,
     '2018': 28,
     '2019': 29,
     '2020': 30,
     '2021': 31,
     '2022': 32},
    'label': {'1990': '1990',
     '1991': '1991',
     '1

In [11]:
# define the column names for the dataframe
colnames = ["Total", "Labour", "Materials", "Services"]

In [12]:
# a for loop to extract the data required from the JSON response
iter = len(response_json['dimension']['Vuosi']['category']['index'].items())
year = [] # empty list
total = [] # empty list
labour = [] # empty list
materials = [] # empty list
services = [] # empty list
for x in range(0, iter):
    try:
        total.append(response_json['value'][x*8])
        labour.append(response_json['value'][x*8+2])
        materials.append(response_json['value'][x*8+4])
        services.append(response_json['value'][x*8+6])
        year.append([k for k, v in response_json['dimension']['Vuosi']['category']['index'].items() if v == x][0])
    except:
        print(f"There is a missing value at index: {x}")

In [13]:
#make a dataframe out of a tuple made out of the five lists
finnish_costs = pd.DataFrame(list(zip(total,labour,materials,services)), index = year, columns = [colnames])

In [46]:
finnish_costs

Unnamed: 0,Total,Labour,Materials,Services
1990,100.0,100.0,100.0,100.0
1991,102.2,108.4,98.3,102.5
1992,100.4,105.8,96.3,102.8
1993,100.7,105.0,98.6,99.0
1994,102.2,103.3,103.0,97.8
1995,103.5,103.0,105.3,99.1
1996,102.7,103.5,104.3,96.3
1997,105.2,106.9,107.2,96.8
1998,107.6,109.4,109.8,98.5
1999,109.1,113.5,110.2,99.0


### Gathering the Finnish domestic building completion data

In [47]:
# API's url
url='https://pxdata.stat.fi:443/PxWeb/api/v1/en/StatFin/ras/statfin_ras_pxt_12fy.px'

In [48]:
# query taken from the Tilastokeskus's data page
query = {
  "query": [
    {
      "code": "Käyttötarkoitus",
      "selection": {
        "filter": "item",
        "values": [
          "01"
        ]
      }
    },
    {
      "code": "Rakennusvaihe",
      "selection": {
        "filter": "item",
        "values": [
          "3"
        ]
      }
    },
    {
      "code": "Tiedot",
      "selection": {
        "filter": "item",
        "values": [
          "asuntolkm"
        ]
      }
    }
  ],
  "response": {
    "format": "json-stat2"
  }
}

In [49]:
session = requests.Session()

response = session.post(url, json=query)
response_json = json.loads(response.content.decode('utf-8-sig'))

response_json

{'class': 'dataset',
 'label': 'Building and dwelling production by Month, Building type, Construction stage and Information',
 'source': 'Statistics Finland, building and dwelling production',
 'updated': '2023-04-25T05:00:00Z',
 'id': ['Kuukausi', 'Käyttötarkoitus', 'Rakennusvaihe', 'Tiedot'],
 'size': [338, 1, 1, 1],
 'dimension': {'Kuukausi': {'extension': {'show': 'value'},
   'label': 'Month',
   'category': {'index': {'1995M01': 0,
     '1995M02': 1,
     '1995M03': 2,
     '1995M04': 3,
     '1995M05': 4,
     '1995M06': 5,
     '1995M07': 6,
     '1995M08': 7,
     '1995M09': 8,
     '1995M10': 9,
     '1995M11': 10,
     '1995M12': 11,
     '1996M01': 12,
     '1996M02': 13,
     '1996M03': 14,
     '1996M04': 15,
     '1996M05': 16,
     '1996M06': 17,
     '1996M07': 18,
     '1996M08': 19,
     '1996M09': 20,
     '1996M10': 21,
     '1996M11': 22,
     '1996M12': 23,
     '1997M01': 24,
     '1997M02': 25,
     '1997M03': 26,
     '1997M04': 27,
     '1997M05': 28,
     '

In [50]:
# define the column names for the dataframe
colnames = ["Month", "Residential buildings completion (No.)"]

In [51]:
# A loop to extract the JSON data required
month = []
completed_buildings = []
for x in range(0, len(response_json['value'])):
    try:
        month.append([k for k, v in response_json['dimension']['Kuukausi']['category']['index'].items() if v == x][0])
        completed_buildings.append(response_json['value'][x])
    except:
        print(f"There is a missing value at index: {x}")

In [52]:
#make a dataframe out of a tuple made out of the month and completed_buildings lists
finnish_completed_buildings = pd.DataFrame(list(zip(month,completed_buildings)), columns = colnames)

In [53]:
finnish_completed_buildings

Unnamed: 0,Month,Residential buildings completion (No.)
0,1995M01,2253
1,1995M02,2198
2,1995M03,2328
3,1995M04,1525
4,1995M05,1968
...,...,...
333,2022M10,4466
334,2022M11,3809
335,2022M12,5031
336,2023M01,3780


In [54]:
#Create a Year variable in same format as Finnish costs data
finnish_completed_buildings["Year"] = " "
for i in range(0,len(finnish_completed_buildings)):
    try:
        year=finnish_completed_buildings.Month[i][0:4]
        finnish_completed_buildings["Year"][i] = int(year)
    except:
        # An exception will occur if the year string is not numeric, for example.
        print(year)

In [55]:
finnish_completed_buildings

Unnamed: 0,Month,Residential buildings completion (No.),Year
0,1995M01,2253,1995
1,1995M02,2198,1995
2,1995M03,2328,1995
3,1995M04,1525,1995
4,1995M05,1968,1995
...,...,...,...
333,2022M10,4466,2022
334,2022M11,3809,2022
335,2022M12,5031,2022
336,2023M01,3780,2023


In [56]:
# group the data by year (and sum) as the monthly granularity is not needed
finnish_completed_buildings = finnish_completed_buildings.groupby(["Year"], as_index=False).sum()

In [57]:
finnish_completed_buildings

Unnamed: 0,Year,Residential buildings completion (No.)
0,1995,24526
1,1996,21372
2,1997,26955
3,1998,30529
4,1999,30537
5,2000,34960
6,2001,32306
7,2002,29354
8,2003,30082
9,2004,32972


### Save the raw Finnish data to csv file

In [58]:
finnish_costs.to_csv('finnish_costs.csv')
finnish_completed_buildings.to_csv('finnish_completed_buildings.csv')

## Reading in the Irish data

### Reading in the social housing completion data

In [73]:
# use glob to get all the csv files in the social housing data folder
path = os.getcwd() + "/RawData/Irish Datasets/social_housing_construction/"
csv_files = glob.glob(os.path.join(path, "*.csv"))

In [74]:
# create a dataframe of the first .csv file
social_housing_ireland = pd.read_csv(csv_files[0], encoding = 'cp1252', usecols=[2,4])
social_housing_ireland.columns = ["Local Authority", "No. of Units"]
social_housing_ireland["Quarter"] = csv_files[0][-11:-4]

# loop over th erest of the list of csv files
for f in csv_files[1:]:
    try:
    # read the csv file
        df = pd.read_csv(f, encoding = 'cp1252', usecols=[2,4])
        df.columns = ["Local Authority", "No. of Units"]
        df["Quarter"] = f[-11:-4]
    except:
        # An exception will occcur if a file has unmatched encoding, or is not .csv format, for example.
        print(f)
    
    #concatenate the temporary df to the existing social housing dataframe
    social_housing_ireland = pd.concat([social_housing_ireland, df])

In [51]:
social_housing_ireland.head()

Unnamed: 0,Local Authority,No. of Units,Quarter
0,Carlow,24.0,q1_2018
1,Carlow,16.0,q1_2018
2,Carlow,23.0,q1_2018
3,Carlow,5.0,q1_2018
4,Carlow,16.0,q1_2018


In [52]:
social_housing_ireland.tail()

Unnamed: 0,Local Authority,No. of Units,Quarter
2671,Wicklow,45.0,q3_2021
2672,Wicklow,4.0,q3_2021
2673,Wicklow,4.0,q3_2021
2674,Wicklow,23.0,q3_2021
2675,Wicklow,28.0,q3_2021


In [53]:
# reindex for the new concatenated dataframe
social_housing_ireland = social_housing_ireland.reset_index(drop=True)

In [54]:
social_housing_ireland.tail()

Unnamed: 0,Local Authority,No. of Units,Quarter
25732,Wicklow,45.0,q3_2021
25733,Wicklow,4.0,q3_2021
25734,Wicklow,4.0,q3_2021
25735,Wicklow,23.0,q3_2021
25736,Wicklow,28.0,q3_2021


In [55]:
# check for missing values
social_housing_ireland.isnull().sum()

Local Authority    6
No. of Units       6
Quarter            0
dtype: int64

In [62]:
# view the missing values
social_housing_ireland[social_housing_ireland.isnull().any(axis=1)]

Unnamed: 0,Local Authority,No. of Units,Quarter
13839,,,q4_2018
13840,,,q4_2018
13841,,,q4_2018
13842,,,q4_2018
13843,,,q4_2018
13844,,,q4_2018


In [67]:
# view the missing rows and two either side
social_housing_ireland.iloc[social_housing_ireland[social_housing_ireland.isnull().any(axis=1)].index[0]-2:max(social_housing_ireland[social_housing_ireland.isnull().any(axis=1)].index)+2,]

Unnamed: 0,Local Authority,No. of Units,Quarter
13837,Wicklow,24.0,q4_2018
13838,Wicklow,51.0,q4_2018
13839,,,q4_2018
13840,,,q4_2018
13841,,,q4_2018
13842,,,q4_2018
13843,,,q4_2018
13844,,,q4_2018
13845,Carlow,6.0,q1_2021


In [21]:
# drop missing values
social_housing_ireland = social_housing_ireland.dropna(axis=0)

In [24]:
social_housing_ireland[social_housing_ireland.isnull().any(axis=1)]

Unnamed: 0,Local Authority,No. of Units,Quarter


In [22]:
#Confirm that all the data was successfully concatenated
social_housing_ireland.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 25731 entries, 0 to 2675
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Local Authority  25731 non-null  object 
 1   No. of Units     25731 non-null  float64
 2   Quarter          25731 non-null  object 
dtypes: float64(1), object(2)
memory usage: 804.1+ KB


In [23]:
#Confirm that all 14 files were appended successfully (there were 14 files, i.e. 14 quarters)
social_housing_ireland.Quarter.nunique()

14

In [25]:
social_housing_ireland_grouped_by_quarter = social_housing_ireland.groupby(['Quarter', 'Local Authority'],as_index=False).sum()

In [26]:
social_housing_ireland_grouped_by_quarter.head()

Unnamed: 0,Quarter,Local Authority,No. of Units
0,q1_2018,Carlow,374.0
1,q1_2018,Cavan,55.0
2,q1_2018,Clare,212.0
3,q1_2018,Cork City,873.0
4,q1_2018,Cork County,765.0


In [27]:
social_housing_ireland_grouped_by_quarter.Quarter.unique()

array(['q1_2018', 'q1_2019', 'q1_2021', 'q2_2018', 'q2_2019', 'q2_2020',
       'q3_2018', 'q3_2019', 'q3_2020', 'q3_2021', 'q4-2021', 'q4_2018',
       'q4_2019', 'q4_2020'], dtype=object)

In [28]:
#Create a Quarter variable in same format as completion data dataframe
social_housing_ireland_grouped_by_quarter["Quarter_int"] = " "
social_housing_ireland_grouped_by_quarter["Year"] = " "
for i in range(0,len(social_housing_ireland_grouped_by_quarter)):
    try:
        year=social_housing_ireland_grouped_by_quarter.Quarter[i][3:7]
        quarter_int=social_housing_ireland_grouped_by_quarter.Quarter[i][1]
        social_housing_ireland_grouped_by_quarter["Year"][i] = int(year)
        social_housing_ireland_grouped_by_quarter["Quarter_int"][i] = int(quarter_int)
    except:
        #determine where the issue is if the try fails
        print(year)
        print(quarter)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  social_housing_ireland_grouped_by_quarter["Year"][i] = int(year)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  social_housing_ireland_grouped_by_quarter["Quarter_int"][i] = int(quarter_int)


In [29]:
social_housing_ireland_grouped_by_quarter

Unnamed: 0,Quarter,Local Authority,No. of Units,Quarter_int,Year
0,q1_2018,Carlow,374.0,1,2018
1,q1_2018,Cavan,55.0,1,2018
2,q1_2018,Clare,212.0,1,2018
3,q1_2018,Cork City,873.0,1,2018
4,q1_2018,Cork County,765.0,1,2018
...,...,...,...,...,...
432,q4_2020,Tipperary,654.0,4,2020
433,q4_2020,Waterford,845.0,4,2020
434,q4_2020,Westmeath,677.0,4,2020
435,q4_2020,Wexford,1070.0,4,2020


In [30]:
#sort by year, then quarter
social_housing_ireland_grouped_by_quarter = social_housing_ireland_grouped_by_quarter.sort_values(by=["Year", "Quarter_int"])

In [31]:
social_housing_ireland_grouped_by_quarter

Unnamed: 0,Quarter,Local Authority,No. of Units,Quarter_int,Year
0,q1_2018,Carlow,374.0,1,2018
1,q1_2018,Cavan,55.0,1,2018
2,q1_2018,Clare,212.0,1,2018
3,q1_2018,Cork City,873.0,1,2018
4,q1_2018,Cork County,765.0,1,2018
...,...,...,...,...,...
339,q4-2021,Tipperary,811.0,4,2021
340,q4-2021,Waterford,947.0,4,2021
341,q4-2021,Westmeath,981.0,4,2021
342,q4-2021,Wexford,1234.0,4,2021


In [32]:
# Once sorted the Quarter_int is no longer needed
social_housing_ireland_grouped_by_quarter.pop("Quarter_int")

0      1
1      1
2      1
3      1
4      1
      ..
339    4
340    4
341    4
342    4
343    4
Name: Quarter_int, Length: 437, dtype: object

### Reading in the Irish house construction cost index data

In [70]:
path = os.getcwd() + "/RawData/Irish Datasets/"
irish_costs = pd.read_csv(path + "HSM09 — House Construction Cost Index.csv")

In [71]:
irish_costs.head()

Unnamed: 0,STATISTIC,STATISTIC Label,TLIST(M1),Month,C02196V02652,State,UNIT,VALUE
0,HSM09,House Construction Cost Index,1975M01,1975M01,-,State,Base Jan 1991=100,18.3
1,HSM09,House Construction Cost Index,1975M02,1975M02,-,State,Base Jan 1991=100,18.7
2,HSM09,House Construction Cost Index,1975M03,1975M03,-,State,Base Jan 1991=100,18.8
3,HSM09,House Construction Cost Index,1975M04,1975M04,-,State,Base Jan 1991=100,19.6
4,HSM09,House Construction Cost Index,1975M05,1975M05,-,State,Base Jan 1991=100,19.8


In [72]:
# check for missing values
irish_costs.isnull().sum()

STATISTIC          0
STATISTIC Label    0
TLIST(M1)          0
Month              0
C02196V02652       0
State              0
UNIT               0
VALUE              0
dtype: int64

In [34]:
#Check for statistical value in the columns
print(irish_costs["STATISTIC"].nunique())
print(irish_costs["STATISTIC Label"].nunique())
print(irish_costs["TLIST(M1)"].nunique())
print(irish_costs["Month"].nunique())
print(irish_costs["C02196V02652"].nunique())
print(irish_costs["State"].nunique())
print(irish_costs["UNIT"].nunique())

1
1
513
513
1
1
1


In [35]:
# Make the VALUE variable name meaningful
irish_costs.rename(columns = {"VALUE": "House Construction Cost Index"}, inplace= True)

In [36]:
# Only Month, unit and House Construction Cost Index are meaningful in the analysis
irish_costs = irish_costs.iloc[:,[3, 6, 7]]

In [37]:
irish_costs.head()

Unnamed: 0,Month,UNIT,House Construction Cost Index
0,1975M01,Base Jan 1991=100,18.3
1,1975M02,Base Jan 1991=100,18.7
2,1975M03,Base Jan 1991=100,18.8
3,1975M04,Base Jan 1991=100,19.6
4,1975M05,Base Jan 1991=100,19.8


In [38]:
irish_costs.tail()

Unnamed: 0,Month,UNIT,House Construction Cost Index
508,2017M04,Base Jan 1991=100,210.7
509,2017M05,Base Jan 1991=100,211.1
510,2017M06,Base Jan 1991=100,211.2
511,2017M07,Base Jan 1991=100,211.4
512,2017M08,Base Jan 1991=100,211.4


In [39]:
#Make the Month variable conform to a format easily convertible to datetime object
for i in range(0,len(irish_costs.Month)):
    year=irish_costs["Month"][i][0:4]
    month=irish_costs.Month[i][5:7]
    irish_costs["Month"][i] = f"{year}{month}"
    irish_costs.Month[i] = irish_costs.Month[i].replace("O", "0") #To catch any mistyped Os that should have been 0s

In [40]:
irish_costs.head()

Unnamed: 0,Month,UNIT,House Construction Cost Index
0,197501,Base Jan 1991=100,18.3
1,197502,Base Jan 1991=100,18.7
2,197503,Base Jan 1991=100,18.8
3,197504,Base Jan 1991=100,19.6
4,197505,Base Jan 1991=100,19.8


### Checking for missing date values

In [41]:
# Create a dataframe of sequential dates that should match those in the irish_costs dataframe
start_dt = date(1975, 1, 1)
end_dt = date(2017, 8, 1)

dates = []

while start_dt <= end_dt:
    # add current date to list by converting  it to iso format
    dates.append(start_dt.isoformat())
    # increment start date by timedelta
    start_dt += relativedelta(months=1)
    
dates_df = pd.DataFrame(dates, columns = ["date"])

#print(dates)

In [42]:
# extract the months from the irish_costs df, to compare with the theoretical
test_months = irish_costs["Month"]

In [43]:
# the lengths of the theoretical and actual date lists should match
len(test_months)

513

In [44]:
# The irish_costs dataframe has one too many dates represented
len(dates)

512

In [45]:
# Find the non-equal value
for i in range(0,len(dates)):
    data_year = irish_costs["Month"][i][0:4]
    test_year = dates_df['date'][i][0:4]
    data_month = irish_costs["Month"][i][4:7]
    test_month = dates_df['date'][i][5:7]
    if data_year == test_year and data_month == test_month:
        pass
    elif data_year != test_year and data_month != test_month:
        # print the index and the years and months that don't match
        print(i)
        print(data_year, test_year, data_month, test_month)

396
2007 2008 04 01
408
2008 2009 12 01
420
2009 2010 12 01
432
2010 2011 12 01
444
2011 2012 12 01
456
2012 2013 12 01
468
2013 2014 12 01
480
2014 2015 12 01
492
2015 2016 12 01
504
2016 2017 12 01


In [46]:
# index 396, 200704, is represented twice
irish_costs.iloc[380:410,]

Unnamed: 0,Month,UNIT,House Construction Cost Index
380,200609,Base Jan 1991=100,194.6
381,200610,Base Jan 1991=100,197.7
382,200611,Base Jan 1991=100,198.0
383,200612,Base Jan 1991=100,198.1
384,200701,Base Jan 1991=100,198.8
385,200702,Base Jan 1991=100,198.8
386,200703,Base Jan 1991=100,199.4
387,200704,Base Jan 1991=100,200.0
388,200705,Base Jan 1991=100,200.1
389,200706,Base Jan 1991=100,200.3


In [47]:
# drop the duplicate value
irish_costs.drop(index=irish_costs.index[396], inplace = True)

In [48]:
irish_costs.iloc[380:410,]

Unnamed: 0,Month,UNIT,House Construction Cost Index
380,200609,Base Jan 1991=100,194.6
381,200610,Base Jan 1991=100,197.7
382,200611,Base Jan 1991=100,198.0
383,200612,Base Jan 1991=100,198.1
384,200701,Base Jan 1991=100,198.8
385,200702,Base Jan 1991=100,198.8
386,200703,Base Jan 1991=100,199.4
387,200704,Base Jan 1991=100,200.0
388,200705,Base Jan 1991=100,200.1
389,200706,Base Jan 1991=100,200.3


In [49]:
#reindex after dropping the duplicate value
irish_costs = irish_costs.reset_index(drop=True)

In [50]:
irish_costs.iloc[380:410,]

Unnamed: 0,Month,UNIT,House Construction Cost Index
380,200609,Base Jan 1991=100,194.6
381,200610,Base Jan 1991=100,197.7
382,200611,Base Jan 1991=100,198.0
383,200612,Base Jan 1991=100,198.1
384,200701,Base Jan 1991=100,198.8
385,200702,Base Jan 1991=100,198.8
386,200703,Base Jan 1991=100,199.4
387,200704,Base Jan 1991=100,200.0
388,200705,Base Jan 1991=100,200.1
389,200706,Base Jan 1991=100,200.3


In [51]:
irish_costs.head()

Unnamed: 0,Month,UNIT,House Construction Cost Index
0,197501,Base Jan 1991=100,18.3
1,197502,Base Jan 1991=100,18.7
2,197503,Base Jan 1991=100,18.8
3,197504,Base Jan 1991=100,19.6
4,197505,Base Jan 1991=100,19.8


In [52]:
# Check again to make sure all dates match
for i in range(0,len(dates)):
    data_year = irish_costs["Month"][i][0:4]
    test_year = dates_df['date'][i][0:4]
    data_month = irish_costs["Month"][i][4:7]
    test_month = dates_df['date'][i][5:7]
    if data_year == test_year and data_month == test_month:
        pass
    elif data_year != test_year and data_month != test_month:
        print(i)
        print(data_year, test_year, data_month, test_month)

### Save the cleaned Irish data to file

In [53]:
irish_costs.to_csv('irish_costs.csv')
social_housing_ireland_grouped_by_quarter.to_csv('irish_completed_social_housing.csv')

## References

DATA.GOV.IE (2023). DATA.GOV.IE. url: https : / / data . gov . ie / dataset ? q = Social +
Housing+Construction+Status%5C&tags=construction%5C&sort=score+desc%2C+
metadata_created+desc (visited on 05/10/2023).

HSM09 - House Construction Cost Index (2023). DATA.GOV.IE. url: https://data.gov.ie/
dataset/hsm09-house-construction-cost-index (visited on 05/06/2023).

12fy – Building and dwelling production, 1995M01-2023M02 (2023). Tilastokeskus. url: https://pxdata.stat.fi/PxWeb/pxweb/en/StatFin/StatFin__ras/statfin_ras_pxt_12fy.px/ (visited on 05/10/2023).

11na – Building cost index by type of cost, annual data, 1990-2022 (2023). Tilastokeskus. url:
https://pxdata.stat.fi/PxWeb/pxweb/en/StatFin/StatFin__rki/statfin_rki_pxt_11na.px/ (visited on 05/10/2023).

PxWeb API (2023). stat.fi. url: https://pxdata.stat.fi/api1.html (visited on 05/10/2023).