# Calculate educational attainment growth of regions in Finland

Data source: Statistic Finland

Table: 12bq -- Population aged 15 or over by level of education, municipality, gender and age, 1970-2022

Link: https://pxdata.stat.fi/PxWeb/pxweb/en/StatFin/StatFin__vkour/statfin_vkour_pxt_12bq.px/

In [73]:
import pandas as pd
import numpy as np

In [74]:
# Create the base with region column DataFrame
base_df = pd.read_csv('../data/Indices/population_density_index.csv')
base_df = base_df[['Region code', 'Region name (en)', 'Region name (fi)']].copy()

base_df

Unnamed: 0,Region code,Region name (en),Region name (fi)
0,MK01,Uusimaa,Uusimaa
1,MK02,Southwest Finland,Varsinais-Suomi
2,MK04,Satakunta,Satakunta
3,MK05,Kanta-Häme,Kanta-Häme
4,MK06,Pirkanmaa,Pirkanmaa
5,MK07,Päijät-Häme,Päijät-Häme
6,MK08,Kymenlaakso,Kymenlaakso
7,MK09,South Karelia,Etelä-Karjala
8,MK10,South Savo,Etelä-Savo
9,MK11,North Savo,Pohjois-Savo


In [75]:
import requests
import json

# Define the URL and JSON query
url = "https://pxdata.stat.fi:443/PxWeb/api/v1/en/StatFin/vkour/statfin_vkour_pxt_12bq.px"
query = {
  "query": [
    {
      "code": "Vuosi",
      "selection": {
        "filter": "item",
        "values": [
          "2018",
          "2022"
        ]
      }
    },
    {
      "code": "Alue",
      "selection": {
        "filter": "agg:_Regions 2023.agg",
        "values": [
          "MK01",
          "MK02",
          "MK04",
          "MK05",
          "MK06",
          "MK07",
          "MK08",
          "MK09",
          "MK10",
          "MK11",
          "MK12",
          "MK13",
          "MK14",
          "MK15",
          "MK16",
          "MK17",
          "MK18",
          "MK19",
          "MK21"
        ]
      }
    },
    {
      "code": "Ikä",
      "selection": {
        "filter": "item",
        "values": [
          "25-29",
          "30-34",
          "35-39",
          "40-44",
          "45-49",
          "50-54",
          "55-59",
          "60-64",
          "65-69",
          "70-74",
          "75-79",
          "80-"
        ]
      }
    },
    {
      "code": "Sukupuoli",
      "selection": {
        "filter": "item",
        "values": [
          "SSS"
        ]
      }
    },
    {
      "code": "Koulutusaste",
      "selection": {
        "filter": "item",
        "values": [
          "SSS",
          "6",
          "7",
          "8"
        ]
      }
    }
  ],
  "response": {
    "format": "json-stat2"
  }
}

# Send the POST request
response = requests.post(url, json=query)

# Check the response status and content
if response.status_code == 200:
    json_data = response.json()
else:
    print("Request failed with status code:", response.status_code)

json_data

{'class': 'dataset',
 'label': 'Population aged 15 or over by level of education, municipality, gender and age by Year, Area, Age, Gender, Level of education and Information',
 'source': 'Statistics Finland, educational structure of population',
 'updated': '2023-10-03T05:00:00Z',
 'id': ['Vuosi', 'Alue', 'Ikä', 'Sukupuoli', 'Koulutusaste', 'Tiedot'],
 'size': [2, 19, 12, 1, 4, 1],
 'dimension': {'Vuosi': {'extension': {'show': 'value'},
   'label': 'Year',
   'category': {'index': {'2018': 0, '2022': 1},
    'label': {'2018': '2018', '2022': '2022'}}},
  'Alue': {'extension': {'show': 'value'},
   'label': 'Area',
   'category': {'index': {'MK01': 0,
     'MK02': 1,
     'MK04': 2,
     'MK05': 3,
     'MK06': 4,
     'MK07': 5,
     'MK08': 6,
     'MK09': 7,
     'MK10': 8,
     'MK11': 9,
     'MK12': 10,
     'MK13': 11,
     'MK14': 12,
     'MK15': 13,
     'MK16': 14,
     'MK17': 15,
     'MK18': 16,
     'MK19': 17,
     'MK21': 18},
    'label': {'MK01': 'MK01 Uusimaa',
    

In [76]:
values = json_data['value']
shape = json_data['size']

# Load values into a 1D NumPy array
values = np.array(values).reshape(shape).squeeze()
# reshape = np.reshape(values, (19,12,6))
# print(reshape.shape)
# reshape
# print(values.shape)
# values
np.rollaxis(values, 0, -1).reshape(19,12,8)[:2,:,:]

array([[[126901,  31153,  15639,    187, 124583,  30141,  17071,     92],
        [126772,  29890,  27680,   1466, 140324,  32468,  29162,   1165],
        [125614,  29043,  30176,   2720, 131283,  28681,  30841,   2535],
        [118076,  23643,  27936,   3253, 125865,  27499,  31548,   3307],
        [103968,  13620,  22388,   2725, 114834,  20888,  27682,   3379],
        [114926,  11059,  20953,   2854, 105634,  12741,  22769,   2922],
        [105742,   8842,  17711,   2530, 111888,  10411,  20390,   2883],
        [ 94018,   8875,  13820,   2168,  97857,   8212,  16581,   2469],
        [ 88451,   8073,  10506,   1932,  86664,   8423,  12675,   2143],
        [ 84602,   8013,   9574,   1772,  83396,   7838,   9958,   1828],
        [ 51139,   4455,   5200,   1118,  70049,   6818,   8242,   1592],
        [ 65954,   4478,   4673,    887,  75832,   6032,   6570,   1353]],

       [[ 31301,   7629,   2466,     36,  30952,   7441,   2997,     21],
        [ 29563,   7000,   4417,    

In [77]:
import pandas as pd
import numpy as np

json_year = json_data['dimension']['Vuosi']['category']['label'].values()
json_region = json_data['dimension']['Alue']['category']['label'].values()
json_age = json_data['dimension']['Ik\u00e4']['category']['label'].values()
json_education = json_data['dimension']['Koulutusaste']['category']['label'].values()

In [78]:

# Create the nested index
tuples = []
for region in json_region:
    for age in json_age:
        tuples.append((region, age))

# Create the columns names
columns = []
for year in json_year:
    for education in json_education:
        columns.append(year + " " + education)


index = pd.MultiIndex.from_tuples(tuples, names=["Region", "Age Group"])

# Create a NumPy array from the value list
value_array = np.array(json_data['value'])
shape = json_data['size']

# Transform the NumPy array into the desired DataFrame view (19, 12, 8)
# Index being multiindex Region -> Age group (19,12)
# Columns being year + education (2*4 = 6)
transformed_array = np.array(value_array).reshape(shape).squeeze()
transformed_array = np.rollaxis(transformed_array, 0, -1).reshape(19,12,8)
transformed_array = transformed_array.reshape(19*12,8)

# Create a DataFrame from the value list with the nested index
df = pd.DataFrame(data=transformed_array, index=index, columns=columns)

# Print the DataFrame
df

Unnamed: 0_level_0,Unnamed: 1_level_0,2018 Total,2018 6 Bachelor's or equivalent level,2018 7 Master's or equivalent level,2018 8 Doctoral or equivalent level,2022 Total,2022 6 Bachelor's or equivalent level,2022 7 Master's or equivalent level,2022 8 Doctoral or equivalent level
Region,Age Group,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
MK01 Uusimaa,25 - 29,126901,31153,15639,187,124583,30141,17071,92
MK01 Uusimaa,30 - 34,126772,29890,27680,1466,140324,32468,29162,1165
MK01 Uusimaa,35 - 39,125614,29043,30176,2720,131283,28681,30841,2535
MK01 Uusimaa,40 - 44,118076,23643,27936,3253,125865,27499,31548,3307
MK01 Uusimaa,45 - 49,103968,13620,22388,2725,114834,20888,27682,3379
...,...,...,...,...,...,...,...,...,...
MK21 Åland,60 - 64,1917,150,131,12,1956,135,160,4
MK21 Åland,65 - 69,1960,172,96,16,1943,170,120,13
MK21 Åland,70 - 74,1855,130,94,12,1860,158,93,15
MK21 Åland,75 - 79,1194,95,49,8,1574,104,76,7


In [79]:
education_attainment = df.groupby(level=['Region']).sum().copy()
education_attainment

Unnamed: 0_level_0,2018 Total,2018 6 Bachelor's or equivalent level,2018 7 Master's or equivalent level,2018 8 Doctoral or equivalent level,2022 Total,2022 6 Bachelor's or equivalent level,2022 7 Master's or equivalent level,2022 8 Doctoral or equivalent level
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
MK01 Uusimaa,1206163,181144,206256,23612,1268209,200152,233489,25668
MK02 Southwest Finland,351801,44012,36427,5106,361563,49536,41981,5548
MK04 Satakunta,164031,18119,10449,584,162051,19681,11409,597
MK05 Kanta-Häme,127293,13876,9668,752,128223,15603,10742,814
MK06 Pirkanmaa,374489,50631,43914,4621,390620,58720,51585,5169
MK07 Päijät-Häme,155497,17726,11036,709,156202,19475,12473,787
MK08 Kymenlaakso,127758,13331,8069,341,124671,14590,8683,361
MK09 South Karelia,97588,10572,7573,753,96255,11555,8286,834
MK10 South Savo,105694,10930,6872,475,103034,11706,7470,509
MK11 North Savo,185846,21628,14388,2004,186609,24247,16486,2175


In [80]:
education_attainment.reset_index(inplace=True)
education_attainment['Region code'] = education_attainment['Region'].str[:4]

# Calculate ratio of Bachelor's, Master's and Doctoral degrees to total for each year
education_attainment["2018 Bachelor's Percentage"] = education_attainment["2018 6 Bachelor\'s or equivalent level"] / education_attainment['2018 Total'] * 100
education_attainment["2018 Master's Percentage"] = education_attainment["2018 7 Master\'s or equivalent level"] / education_attainment['2018 Total'] * 100
education_attainment["2018 Doctoral Percentage"] = education_attainment["2018 8 Doctoral or equivalent level"] / education_attainment['2018 Total'] * 100
education_attainment["2022 Bachelor's Percentage"] = education_attainment["2022 6 Bachelor\'s or equivalent level"] / education_attainment['2022 Total'] * 100
education_attainment["2022 Master's Percentage"] = education_attainment["2022 7 Master\'s or equivalent level"] / education_attainment['2022 Total'] * 100
education_attainment["2022 Doctoral Percentage"] = education_attainment["2022 8 Doctoral or equivalent level"] / education_attainment['2022 Total'] * 100

# Calculate education attainment absolute growth index
education_attainment["Bachelor's Aboslute Growth 2018-2022 (persons)"] = education_attainment["2022 6 Bachelor\'s or equivalent level"] - education_attainment["2018 6 Bachelor\'s or equivalent level"]
education_attainment["Master's Aboslute Growth 2018-2022 (persons)"] = education_attainment["2022 7 Master\'s or equivalent level"] - education_attainment["2018 7 Master\'s or equivalent level"]
education_attainment["Doctoral Aboslute Growth 2018-2022 (persons)"] = education_attainment["2022 8 Doctoral or equivalent level"] - education_attainment["2018 8 Doctoral or equivalent level"]

# Calculate education attainment relative growth index
education_attainment["Bachelor's Relative Growth 2018-2022 (%)"] = education_attainment["Bachelor's Aboslute Growth 2018-2022 (persons)"] / education_attainment["2018 6 Bachelor\'s or equivalent level"] * 100
education_attainment["Master's Relative Growth 2018-2022 (%)"] = education_attainment["Master's Aboslute Growth 2018-2022 (persons)"] / education_attainment["2018 7 Master\'s or equivalent level"] * 100
education_attainment["Doctoral Relative Growth 2018-2022 (%)"] = education_attainment["Doctoral Aboslute Growth 2018-2022 (persons)"] / education_attainment["2018 8 Doctoral or equivalent level"] * 100

# Calculate education attainment ratio growth index
education_attainment["Bachelor's Ratio Growth 2018-2022 (%)"] = education_attainment["2022 Bachelor's Percentage"] - education_attainment["2018 Bachelor's Percentage"]
education_attainment["Master's Ratio Growth 2018-2022 (%)"] = education_attainment["2022 Master's Percentage"] - education_attainment["2018 Master's Percentage"]
education_attainment["Doctoral Ratio Growth 2018-2022 (%)"] = education_attainment["2022 Doctoral Percentage"] - education_attainment["2018 Doctoral Percentage"]

education_attainment

Unnamed: 0,Region,2018 Total,2018 6 Bachelor's or equivalent level,2018 7 Master's or equivalent level,2018 8 Doctoral or equivalent level,2022 Total,2022 6 Bachelor's or equivalent level,2022 7 Master's or equivalent level,2022 8 Doctoral or equivalent level,Region code,...,2022 Doctoral Percentage,Bachelor's Aboslute Growth 2018-2022 (persons),Master's Aboslute Growth 2018-2022 (persons),Doctoral Aboslute Growth 2018-2022 (persons),Bachelor's Relative Growth 2018-2022 (%),Master's Relative Growth 2018-2022 (%),Doctoral Relative Growth 2018-2022 (%),Bachelor's Ratio Growth 2018-2022 (%),Master's Ratio Growth 2018-2022 (%),Doctoral Ratio Growth 2018-2022 (%)
0,MK01 Uusimaa,1206163,181144,206256,23612,1268209,200152,233489,25668,MK01,...,2.023957,19008,27233,2056,10.493309,13.203495,8.707437,0.764054,1.310748,0.066344
1,MK02 Southwest Finland,351801,44012,36427,5106,361563,49536,41981,5548,MK02,...,1.534449,5524,5554,442,12.551122,15.246932,8.656483,1.190035,1.256545,0.08306
2,MK04 Satakunta,164031,18119,10449,584,162051,19681,11409,597,MK04,...,0.368403,1562,960,13,8.620785,9.187482,2.226027,1.098859,0.670239,0.012372
3,MK05 Kanta-Häme,127293,13876,9668,752,128223,15603,10742,814,MK05,...,0.634832,1727,1074,62,12.44595,11.108813,8.244681,1.267809,0.782516,0.044068
4,MK06 Pirkanmaa,374489,50631,43914,4621,390620,58720,51585,5169,MK06,...,1.323281,8089,7671,548,15.976378,17.468233,11.858905,1.512489,1.47955,0.089333
5,MK07 Päijät-Häme,155497,17726,11036,709,156202,19475,12473,787,MK07,...,0.503835,1749,1437,78,9.866862,13.021022,11.00141,1.068253,0.88793,0.047877
6,MK08 Kymenlaakso,127758,13331,8069,341,124671,14590,8683,361,MK08,...,0.289562,1259,614,20,9.444153,7.609369,5.865103,1.26823,0.648884,0.022651
7,MK09 South Karelia,97588,10572,7573,753,96255,11555,8286,834,MK09,...,0.866448,983,713,81,9.298146,9.415027,10.756972,1.171272,0.848209,0.094837
8,MK10 South Savo,105694,10930,6872,475,103034,11706,7470,509,MK10,...,0.494012,776,598,34,7.099726,8.701979,7.157895,1.020125,0.748246,0.044601
9,MK11 North Savo,185846,21628,14388,2004,186609,24247,16486,2175,MK11,...,1.165539,2619,2098,171,12.109303,14.581596,8.532934,1.355886,1.092621,0.087226


In [81]:
columns = [
    "Region code",
    "Bachelor's Aboslute Growth 2018-2022 (persons)",
    "Master's Aboslute Growth 2018-2022 (persons)",
    "Doctoral Aboslute Growth 2018-2022 (persons)",
    "Bachelor's Relative Growth 2018-2022 (%)",
    "Master's Relative Growth 2018-2022 (%)",
    "Doctoral Relative Growth 2018-2022 (%)",
    "Bachelor's Ratio Growth 2018-2022 (%)",
    "Master's Ratio Growth 2018-2022 (%)",
    "Doctoral Ratio Growth 2018-2022 (%)"
]
education_index = education_attainment[columns].copy()
education_index

Unnamed: 0,Region code,Bachelor's Aboslute Growth 2018-2022 (persons),Master's Aboslute Growth 2018-2022 (persons),Doctoral Aboslute Growth 2018-2022 (persons),Bachelor's Relative Growth 2018-2022 (%),Master's Relative Growth 2018-2022 (%),Doctoral Relative Growth 2018-2022 (%),Bachelor's Ratio Growth 2018-2022 (%),Master's Ratio Growth 2018-2022 (%),Doctoral Ratio Growth 2018-2022 (%)
0,MK01,19008,27233,2056,10.493309,13.203495,8.707437,0.764054,1.310748,0.066344
1,MK02,5524,5554,442,12.551122,15.246932,8.656483,1.190035,1.256545,0.08306
2,MK04,1562,960,13,8.620785,9.187482,2.226027,1.098859,0.670239,0.012372
3,MK05,1727,1074,62,12.44595,11.108813,8.244681,1.267809,0.782516,0.044068
4,MK06,8089,7671,548,15.976378,17.468233,11.858905,1.512489,1.47955,0.089333
5,MK07,1749,1437,78,9.866862,13.021022,11.00141,1.068253,0.88793,0.047877
6,MK08,1259,614,20,9.444153,7.609369,5.865103,1.26823,0.648884,0.022651
7,MK09,983,713,81,9.298146,9.415027,10.756972,1.171272,0.848209,0.094837
8,MK10,776,598,34,7.099726,8.701979,7.157895,1.020125,0.748246,0.044601
9,MK11,2619,2098,171,12.109303,14.581596,8.532934,1.355886,1.092621,0.087226


In [82]:
final_df = pd.merge(base_df, education_index, on='Region code', how='inner')
final_df

Unnamed: 0,Region code,Region name (en),Region name (fi),Bachelor's Aboslute Growth 2018-2022 (persons),Master's Aboslute Growth 2018-2022 (persons),Doctoral Aboslute Growth 2018-2022 (persons),Bachelor's Relative Growth 2018-2022 (%),Master's Relative Growth 2018-2022 (%),Doctoral Relative Growth 2018-2022 (%),Bachelor's Ratio Growth 2018-2022 (%),Master's Ratio Growth 2018-2022 (%),Doctoral Ratio Growth 2018-2022 (%)
0,MK01,Uusimaa,Uusimaa,19008,27233,2056,10.493309,13.203495,8.707437,0.764054,1.310748,0.066344
1,MK02,Southwest Finland,Varsinais-Suomi,5524,5554,442,12.551122,15.246932,8.656483,1.190035,1.256545,0.08306
2,MK04,Satakunta,Satakunta,1562,960,13,8.620785,9.187482,2.226027,1.098859,0.670239,0.012372
3,MK05,Kanta-Häme,Kanta-Häme,1727,1074,62,12.44595,11.108813,8.244681,1.267809,0.782516,0.044068
4,MK06,Pirkanmaa,Pirkanmaa,8089,7671,548,15.976378,17.468233,11.858905,1.512489,1.47955,0.089333
5,MK07,Päijät-Häme,Päijät-Häme,1749,1437,78,9.866862,13.021022,11.00141,1.068253,0.88793,0.047877
6,MK08,Kymenlaakso,Kymenlaakso,1259,614,20,9.444153,7.609369,5.865103,1.26823,0.648884,0.022651
7,MK09,South Karelia,Etelä-Karjala,983,713,81,9.298146,9.415027,10.756972,1.171272,0.848209,0.094837
8,MK10,South Savo,Etelä-Savo,776,598,34,7.099726,8.701979,7.157895,1.020125,0.748246,0.044601
9,MK11,North Savo,Pohjois-Savo,2619,2098,171,12.109303,14.581596,8.532934,1.355886,1.092621,0.087226


In [83]:
final_df.to_csv('../data/Indices/education_attainment_index.csv', index=False)