# Pulling data from Tilastokeskus, Statistics Finland’s free-of-charge statistical databases

In [18]:
import requests
import json
import pandas as pd

### Gathering the Finnish costs of building data

In [51]:
url='https://pxdata.stat.fi:443/PxWeb/api/v1/en/StatFin/rki/statfin_rki_pxt_11na.px'

In [52]:
query = {
  "query": [
    {
      "code": "Perusvuosi",
      "selection": {
        "filter": "item",
        "values": [
          "1990_100"
        ]
      }
    },
    {
      "code": "Indeksi",
      "selection": {
        "filter": "item",
        "values": [
          "Kokonaisindeksi",
          "Työpanokset",
          "Tarvikepanokset",
          "Palvelut"
        ]
      }
    },
    {
      "code": "Tiedot",
      "selection": {
        "filter": "item",
        "values": [
          "pisteluku",
          "vuosimuutos"
        ]
      }
    }
  ],
  "response": {
    "format": "json-stat2"
  }
}

In [55]:
session = requests.Session()

response = session.post(url, json=query)
response_json = json.loads(response.content.decode('utf-8-sig'))

response_json

{'class': 'dataset',
 'label': 'Building cost index by type of cost, annual data by Year, Base year, Index and Information',
 'source': 'Statistics Finland, building cost index',
 'updated': '2023-01-13T06:00:00Z',
 'id': ['Vuosi', 'Perusvuosi', 'Indeksi', 'Tiedot'],
 'size': [33, 1, 4, 2],
 'dimension': {'Vuosi': {'extension': {'show': 'value'},
   'label': 'Year',
   'category': {'index': {'1990': 0,
     '1991': 1,
     '1992': 2,
     '1993': 3,
     '1994': 4,
     '1995': 5,
     '1996': 6,
     '1997': 7,
     '1998': 8,
     '1999': 9,
     '2000': 10,
     '2001': 11,
     '2002': 12,
     '2003': 13,
     '2004': 14,
     '2005': 15,
     '2006': 16,
     '2007': 17,
     '2008': 18,
     '2009': 19,
     '2010': 20,
     '2011': 21,
     '2012': 22,
     '2013': 23,
     '2014': 24,
     '2015': 25,
     '2016': 26,
     '2017': 27,
     '2018': 28,
     '2019': 29,
     '2020': 30,
     '2021': 31,
     '2022': 32},
    'label': {'1990': '1990',
     '1991': '1991',
     '1

In [160]:
colnames = ["Total", "Labour", "Materials", "Services"]

In [150]:
response_json['value']

[100,
 None,
 100,
 None,
 100,
 None,
 100,
 None,
 102.2,
 2.2,
 108.4,
 8.4,
 98.3,
 -1.7,
 102.5,
 2.5,
 100.4,
 -1.7,
 105.8,
 -2.4,
 96.3,
 -2,
 102.8,
 0.4,
 100.7,
 0.3,
 105,
 -0.8,
 98.6,
 2.4,
 99,
 -3.7,
 102.2,
 1.5,
 103.3,
 -1.6,
 103,
 4.5,
 97.8,
 -1.3,
 103.5,
 1.3,
 103,
 -0.2,
 105.3,
 2.2,
 99.1,
 1.4,
 102.7,
 -0.8,
 103.5,
 0.4,
 104.3,
 -0.9,
 96.3,
 -2.9,
 105.2,
 2.5,
 106.9,
 3.3,
 107.2,
 2.7,
 96.8,
 0.6,
 107.6,
 2.3,
 109.4,
 2.3,
 109.8,
 2.5,
 98.5,
 1.7,
 109.1,
 1.4,
 113.5,
 3.8,
 110.2,
 0.3,
 99,
 0.5,
 112.3,
 3,
 117.9,
 3.9,
 113.2,
 2.8,
 100.9,
 1.9,
 115.6,
 2.9,
 122.5,
 3.9,
 116.6,
 3,
 102,
 1.1,
 116.5,
 0.8,
 125.1,
 2.1,
 117.1,
 0.4,
 101.5,
 -0.5,
 118.7,
 1.9,
 127.9,
 2.2,
 119.2,
 1.8,
 103,
 1.5,
 121.6,
 2.4,
 130.3,
 1.9,
 123,
 3.2,
 104.1,
 1,
 126,
 3.7,
 134.3,
 3.1,
 128.9,
 4.8,
 105.3,
 1.2,
 130.7,
 3.7,
 137.2,
 2.2,
 134.7,
 4.5,
 109.9,
 4.4,
 138.5,
 5.9,
 142.5,
 3.8,
 144.5,
 7.3,
 116,
 5.5,
 143.9,
 3.9,
 147.7,

In [148]:
2022-1990

32

In [140]:
response_json['value']

[100,
 None,
 100,
 None,
 100,
 None,
 100,
 None,
 102.2,
 2.2,
 108.4,
 8.4,
 98.3,
 -1.7,
 102.5,
 2.5,
 100.4,
 -1.7,
 105.8,
 -2.4,
 96.3,
 -2,
 102.8,
 0.4,
 100.7,
 0.3,
 105,
 -0.8,
 98.6,
 2.4,
 99,
 -3.7,
 102.2,
 1.5,
 103.3,
 -1.6,
 103,
 4.5,
 97.8,
 -1.3,
 103.5,
 1.3,
 103,
 -0.2,
 105.3,
 2.2,
 99.1,
 1.4,
 102.7,
 -0.8,
 103.5,
 0.4,
 104.3,
 -0.9,
 96.3,
 -2.9,
 105.2,
 2.5,
 106.9,
 3.3,
 107.2,
 2.7,
 96.8,
 0.6,
 107.6,
 2.3,
 109.4,
 2.3,
 109.8,
 2.5,
 98.5,
 1.7,
 109.1,
 1.4,
 113.5,
 3.8,
 110.2,
 0.3,
 99,
 0.5,
 112.3,
 3,
 117.9,
 3.9,
 113.2,
 2.8,
 100.9,
 1.9,
 115.6,
 2.9,
 122.5,
 3.9,
 116.6,
 3,
 102,
 1.1,
 116.5,
 0.8,
 125.1,
 2.1,
 117.1,
 0.4,
 101.5,
 -0.5,
 118.7,
 1.9,
 127.9,
 2.2,
 119.2,
 1.8,
 103,
 1.5,
 121.6,
 2.4,
 130.3,
 1.9,
 123,
 3.2,
 104.1,
 1,
 126,
 3.7,
 134.3,
 3.1,
 128.9,
 4.8,
 105.3,
 1.2,
 130.7,
 3.7,
 137.2,
 2.2,
 134.7,
 4.5,
 109.9,
 4.4,
 138.5,
 5.9,
 142.5,
 3.8,
 144.5,
 7.3,
 116,
 5.5,
 143.9,
 3.9,
 147.7,

In [169]:
year = []
total = []
labour = []
materials = []
services = []
for x in range(0, len(response_json['value']), 8):
    total.append(response_json['value'][x])
    labour.append(response_json['value'][x+2])
    materials.append(response_json['value'][x+4])
    services.append(response_json['value'][x+6])
    year.append(int(x/8+1990))

In [170]:
finnish_costs = pd.DataFrame(list(zip(total,labour,materials,services)), index = year, columns =[colnames])

In [171]:
finnish_costs

Unnamed: 0,Total,Labour,Materials,Services
1990,100.0,100.0,100.0,100.0
1991,102.2,108.4,98.3,102.5
1992,100.4,105.8,96.3,102.8
1993,100.7,105.0,98.6,99.0
1994,102.2,103.3,103.0,97.8
1995,103.5,103.0,105.3,99.1
1996,102.7,103.5,104.3,96.3
1997,105.2,106.9,107.2,96.8
1998,107.6,109.4,109.8,98.5
1999,109.1,113.5,110.2,99.0


## References

https://pandas.pydata.org/docs/reference/api/pandas.read_json.html