## Experiment with short version

In [13]:
import pandas as pd
import requests
import json

# Define the API URL
api_url = "https://pxdata.stat.fi:443/PxWeb/api/v1/en/Postinumeroalueittainen_avoin_tieto/uusin/paavo_pxt_12f8.px"

# Opening JSON file
with open('sq-api_table_paavo_pxt_12f8_short.px.json') as json_file:
    json_query = json.load(json_file)

# Send a POST request to the API with the JSON query
response = requests.post(api_url, json=json_query['queryObj'])

# Check if the request was successful
if response.status_code == 200:
    # Parse the JSON response
    data_short = response.json()
else:
    print("Failed to retrieve data. Status code:", response.status_code)


In [36]:
data_short

{'class': 'dataset',
 'label': '10. All data groups by Area, Information and Year',
 'source': 'Statistics Finland',
 'updated': '2023-01-25T06:00:00Z',
 'id': ['Alue', 'Tiedot', 'Vuosi'],
 'size': [2, 3, 2],
 'dimension': {'Alue': {'extension': {'show': 'value'},
   'label': 'Area',
   'category': {'index': {'KU049': 0, 'KU091': 1},
    'label': {'KU049': 'Espoo', 'KU091': 'Helsinki'}},
   'link': {'describedby': [{'extension': {'Alue': 'SCALE-TYPE=nominal'}}]}},
  'Tiedot': {'extension': {'show': 'value'},
   'label': 'Information',
   'category': {'index': {'he_vakiy': 0, 'he_miehet': 1, 'he_naiset': 2},
    'label': {'he_vakiy': 'Inhabitants, total (HE)',
     'he_miehet': 'Males (HE)',
     'he_naiset': 'Females (HE)'},
    'unit': {'he_vakiy': {'base': 'number', 'decimals': 0},
     'he_miehet': {'base': 'number', 'decimals': 0},
     'he_naiset': {'base': 'number', 'decimals': 0}}}},
  'Vuosi': {'extension': {'show': 'value'},
   'label': 'Year',
   'category': {'index': {'2020'

In [37]:
import pandas as pd

json_data = data_short

json_region = json_data['dimension']['Alue']['category']['label'].values()
json_info = json_data['dimension']['Tiedot']['category']['label'].values()
json_year = json_data['dimension']['Vuosi']['category']['label'].values()

In [38]:
# Create a nested index
tuples = []
for alue in json_region:
    for tiedot in json_info:
        tuples.append((alue, tiedot))

index = pd.MultiIndex.from_tuples(tuples, names=["Region", "Information"])


In [54]:
import numpy as np

# Create a NumPy array from the value list
value_array = np.array(json_data["value"])

# Split the NumPy array by the dimension of the DataFrame
split_array = np.split(value_array, 6)

In [55]:
split_array

[array([292796, 297132]),
 array([145892, 147968]),
 array([146904, 149164]),
 array([656920, 658457]),
 array([312196, 313429]),
 array([344724, 345028])]

In [39]:
index

MultiIndex([(   'Espoo', 'Inhabitants, total (HE)'),
            (   'Espoo',              'Males (HE)'),
            (   'Espoo',            'Females (HE)'),
            ('Helsinki', 'Inhabitants, total (HE)'),
            ('Helsinki',              'Males (HE)'),
            ('Helsinki',            'Females (HE)')],
           names=['Region', 'Information'])

In [56]:

# Create a DataFrame from the value list with the nested index
df = pd.DataFrame(data=split_array, index=index, columns=list(json_year))

# Print the DataFrame
df

Unnamed: 0_level_0,Unnamed: 1_level_0,2020,2021
Region,Information,Unnamed: 2_level_1,Unnamed: 3_level_1
Espoo,"Inhabitants, total (HE)",292796,297132
Espoo,Males (HE),145892,147968
Espoo,Females (HE),146904,149164
Helsinki,"Inhabitants, total (HE)",656920,658457
Helsinki,Males (HE),312196,313429
Helsinki,Females (HE),344724,345028


## Convert region_info.json to dataframe

In [57]:
# Opening JSON data file
with open('region_info.json') as json_file:
    json_data = json.load(json_file)

In [60]:
import pandas as pd
import numpy as np

json_region = json_data['dimension']['Alue']['category']['label'].values()
json_info = json_data['dimension']['Tiedot']['category']['label'].values()
json_year = json_data['dimension']['Vuosi']['category']['label'].values()

# Create a nested index
tuples = []
for alue in json_region:
    for tiedot in json_info:
        tuples.append((alue, tiedot))

index = pd.MultiIndex.from_tuples(tuples, names=["Region", "Information"])

# Create a NumPy array from the value list
value_array = np.array(json_data["value"])

# Split the NumPy array by the dimension of the DataFrame
split_array = np.split(value_array, json_data['size'][0] * json_data['size'][1])

In [65]:

# Create a DataFrame from the value list with the nested index
df = pd.DataFrame(data=split_array, index=index, columns=list(json_year))

# Print the DataFrame
df

Unnamed: 0_level_0,Unnamed: 1_level_0,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
Region,Information,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
MK01 Uusimaa,"Inhabitants, total (HE)",1532309.0,1549058.0,1566835.0,1585473.0,1603388.0,1620261.0,1638293.0,1655624.0,1671024.0,1689725.0,1702678.0,1714741.0
MK01 Uusimaa,Males (HE),741391.0,750126.0,759593.0,769181.0,778493.0,787495.0,797682.0,806972.0,815181.0,825227.0,832256.0,839306.0
MK01 Uusimaa,Females (HE),790918.0,798932.0,807242.0,816292.0,824895.0,832766.0,840611.0,848652.0,855843.0,864498.0,870422.0,875435.0
MK01 Uusimaa,Average age of inhabitants (HE),39.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,41.0,41.0,41.0,41.0
MK01 Uusimaa,0-2 years (HE),55759.0,55952.0,55756.0,55300.0,55183.0,54830.0,53901.0,52411.0,50959.0,49553.0,48816.0,49980.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
MK21 Åland,Unemployed (PT),446.0,422.0,546.0,583.0,652.0,623.0,552.0,573.0,548.0,574.0,1404.0,797.0
MK21 Åland,Children aged 0 to 14 (PT),4582.0,4645.0,4665.0,4658.0,4696.0,4691.0,4779.0,4842.0,4953.0,4942.0,4974.0,4985.0
MK21 Åland,Students (PT),1666.0,1567.0,1543.0,1567.0,1473.0,1457.0,1436.0,1339.0,1322.0,1559.0,1694.0,1642.0
MK21 Åland,Pensioners (PT),5988.0,6142.0,6224.0,6357.0,6463.0,6586.0,6686.0,6873.0,6930.0,7063.0,7104.0,7338.0


## Query info by cities and save to csv

In [87]:
import pandas as pd
import requests
import json

# Define the API URL
api_url = "https://pxdata.stat.fi:443/PxWeb/api/v1/en/Postinumeroalueittainen_avoin_tieto/uusin/paavo_pxt_12f8.px"

# Opening JSON file
with open('sq-api_table_paavo_pxt_12f8_city_2010_2012.px.json') as json_file:
    json_query = json.load(json_file)

# Send a POST request to the API with the JSON query
response = requests.post(api_url, json=json_query['queryObj'])

# Check if the request was successful
if response.status_code == 200:
    # Parse the JSON response
    json_data = response.json()
else:
    print("Failed to retrieve data. Status code:", response.status_code)


In [88]:
import pandas as pd
import numpy as np

json_region = json_data['dimension']['Alue']['category']['label'].values()
json_info = json_data['dimension']['Tiedot']['category']['label'].values()
json_year = json_data['dimension']['Vuosi']['category']['label'].values()

# Create a nested index
tuples = []
for alue in json_region:
    for tiedot in json_info:
        tuples.append((alue, tiedot))

index = pd.MultiIndex.from_tuples(tuples, names=["Region", "Information"])

# Create a NumPy array from the value list
value_array = np.array(json_data["value"])

# Split the NumPy array by the dimension of the DataFrame
split_array = np.split(value_array, json_data['size'][0] * json_data['size'][1])

In [89]:

# Create a DataFrame from the value list with the nested index
df = pd.DataFrame(data=split_array, index=index, columns=list(json_year))

# Print the DataFrame
df

Unnamed: 0_level_0,Unnamed: 1_level_0,2010,2011,2012
Region,Information,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Askola,"Inhabitants, total (HE)",4864.0,4911.0,4988.0
Askola,Males (HE),2455.0,2482.0,2505.0
Askola,Females (HE),2409.0,2429.0,2483.0
Askola,Average age of inhabitants (HE),39.0,40.0,40.0
Askola,0-2 years (HE),182.0,204.0,194.0
...,...,...,...,...
Vårdö,Unemployed (PT),5.0,3.0,4.0
Vårdö,Children aged 0 to 14 (PT),69.0,66.0,65.0
Vårdö,Students (PT),23.0,23.0,16.0
Vårdö,Pensioners (PT),137.0,141.0,134.0


In [90]:
df.to_csv('city_info_2010_2012.csv')