# Calculate working age population growth of regions in Finland

Data source: Statistic Finland

Table: 12f8 -- 10. All data groups by municipality, 2010-2021

Link: https://pxdata.stat.fi/PxWeb/pxweb/en/Postinumeroalueittainen_avoin_tieto/Postinumeroalueittainen_avoin_tieto__uusin/paavo_pxt_12f8.px/

In [2]:
import pandas as pd
import numpy as np

In [3]:
# Create the base with region column DataFrame
base_df = pd.read_csv('../data/Indices/population_density_index.csv')
base_df = base_df[['Region code', 'Region name (en)', 'Region name (fi)']].copy()

base_df

Unnamed: 0,Region code,Region name (en),Region name (fi)
0,MK01,Uusimaa,Uusimaa
1,MK02,Southwest Finland,Varsinais-Suomi
2,MK04,Satakunta,Satakunta
3,MK05,Kanta-Häme,Kanta-Häme
4,MK06,Pirkanmaa,Pirkanmaa
5,MK07,Päijät-Häme,Päijät-Häme
6,MK08,Kymenlaakso,Kymenlaakso
7,MK09,South Karelia,Etelä-Karjala
8,MK10,South Savo,Etelä-Savo
9,MK11,North Savo,Pohjois-Savo


In [4]:
# Load demographic data
demographic_df = pd.read_csv('../data/region_city_data/region_info_2011_2021.csv')
demographic_df

Unnamed: 0,Region,Information,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,MK01 Uusimaa,"Inhabitants, total (HE)",1532309.0,1549058.0,1566835.0,1585473.0,1603388.0,1620261.0,1638293.0,1655624.0,1671024.0,1689725.0,1702678.0,1714741.0
1,MK01 Uusimaa,Males (HE),741391.0,750126.0,759593.0,769181.0,778493.0,787495.0,797682.0,806972.0,815181.0,825227.0,832256.0,839306.0
2,MK01 Uusimaa,Females (HE),790918.0,798932.0,807242.0,816292.0,824895.0,832766.0,840611.0,848652.0,855843.0,864498.0,870422.0,875435.0
3,MK01 Uusimaa,Average age of inhabitants (HE),39.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,41.0,41.0,41.0,41.0
4,MK01 Uusimaa,0-2 years (HE),55759.0,55952.0,55756.0,55300.0,55183.0,54830.0,53901.0,52411.0,50959.0,49553.0,48816.0,49980.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1971,MK21 Åland,Unemployed (PT),446.0,422.0,546.0,583.0,652.0,623.0,552.0,573.0,548.0,574.0,1404.0,797.0
1972,MK21 Åland,Children aged 0 to 14 (PT),4582.0,4645.0,4665.0,4658.0,4696.0,4691.0,4779.0,4842.0,4953.0,4942.0,4974.0,4985.0
1973,MK21 Åland,Students (PT),1666.0,1567.0,1543.0,1567.0,1473.0,1457.0,1436.0,1339.0,1322.0,1559.0,1694.0,1642.0
1974,MK21 Åland,Pensioners (PT),5988.0,6142.0,6224.0,6357.0,6463.0,6586.0,6686.0,6873.0,6930.0,7063.0,7104.0,7338.0


In [24]:
# Choose rows that show the population of working age (16 to 64 years old)

# Create a regular expression pattern to match the specified format
# xx-yy years (HE)
pattern = r'^(\d{2})-(\d{2}) years \(HE\)$'

# Use the 'str.match' method to select rows that match the pattern
working_population = demographic_df[demographic_df['Information'].str.match(pattern)]

# Filter the rows based on the conditions
working_population = working_population[working_population.apply(lambda row: ( 
    (int(row['Information'][:2]) in range(16, 61) or int(row['Information'][:2]) in [16, 18]) and
    (int(row['Information'][3:5]) in range(20, 65) or int(row['Information'][3:5]) in [17, 19]) and
    (int(row['Information'][:2]) + 4 == int(row['Information'][3:5]) or 
     (int(row['Information'][:2]) in [16, 18] and int(row['Information'][3:5]) in [17, 19]))
    ), axis=1)]

working_population


Unnamed: 0,Region,Information,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
8,MK01 Uusimaa,16-17 years (HE),37284.0,36777.0,35623.0,34837.0,34086.0,33970.0,34650.0,34641.0,34646.0,35325.0,36553.0,36964.0
9,MK01 Uusimaa,18-19 years (HE),38006.0,38377.0,38153.0,37694.0,36515.0,35643.0,35041.0,35066.0,35606.0,35491.0,35307.0,36170.0
10,MK01 Uusimaa,20-24 years (HE),98150.0,100485.0,103332.0,104314.0,105051.0,104363.0,103280.0,101404.0,99076.0,97640.0,95659.0,94309.0
11,MK01 Uusimaa,25-29 years (HE),113459.0,113827.0,113300.0,114269.0,115893.0,118351.0,121965.0,125526.0,126901.0,128407.0,127804.0,125776.0
12,MK01 Uusimaa,30-34 years (HE),116553.0,117545.0,119561.0,121897.0,123341.0,124580.0,125189.0,125209.0,126772.0,129666.0,132694.0,135675.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1886,MK21 Åland,40-44 years (HE),2019.0,2001.0,1910.0,1866.0,1890.0,1903.0,1886.0,1863.0,1856.0,1841.0,1891.0,1899.0
1887,MK21 Åland,45-49 years (HE),1985.0,2049.0,2123.0,2180.0,2147.0,2074.0,2078.0,1977.0,1956.0,1949.0,1962.0,1972.0
1888,MK21 Åland,50-54 years (HE),1941.0,1956.0,1949.0,1943.0,1964.0,2026.0,2087.0,2160.0,2212.0,2168.0,2097.0,2084.0
1889,MK21 Åland,55-59 years (HE),2065.0,2069.0,2022.0,1958.0,1931.0,1947.0,1931.0,1957.0,1970.0,1990.0,2061.0,2120.0


In [25]:
working_population['Region code'] = working_population['Region'].apply(lambda x: x[:4])
working_population = working_population[['Region code', '2017', '2021']].copy()

working_population = working_population.groupby('Region code').sum().reset_index()
working_population

Unnamed: 0,Region code,2017,2021
0,MK01,1078359.0,1108121.0
1,MK02,293018.0,292518.0
2,MK04,128027.0,121402.0
3,MK05,102201.0,98493.0
4,MK06,317608.0,324044.0
5,MK07,122107.0,117280.0
6,MK08,98782.0,91892.0
7,MK09,76809.0,72748.0
8,MK10,79210.0,71654.0
9,MK11,151157.0,144995.0


In [27]:
working_population['Working-age population Absolute Growth 2017-2021'] = working_population['2021'] - working_population['2017']
working_population['Working-age population Relative Growth 2017-2021 (%)'] = working_population['Working-age population Absolute Growth 2017-2021'] / working_population['2017'] * 100
working_population

Unnamed: 0,Region code,2017,2021,Working-age population Absolute Growth 2017-2021,Working-age population Relative Growth 2017-2021 (%)
0,MK01,1078359.0,1108121.0,29762.0,2.759934
1,MK02,293018.0,292518.0,-500.0,-0.170638
2,MK04,128027.0,121402.0,-6625.0,-5.17469
3,MK05,102201.0,98493.0,-3708.0,-3.628145
4,MK06,317608.0,324044.0,6436.0,2.026397
5,MK07,122107.0,117280.0,-4827.0,-3.95309
6,MK08,98782.0,91892.0,-6890.0,-6.974955
7,MK09,76809.0,72748.0,-4061.0,-5.287141
8,MK10,79210.0,71654.0,-7556.0,-9.5392
9,MK11,151157.0,144995.0,-6162.0,-4.076556


In [28]:
working_population.drop(columns=['2017', '2021'], inplace=True)
working_population

Unnamed: 0,Region code,Working-age population Absolute Growth 2017-2021,Working-age population Relative Growth 2017-2021 (%)
0,MK01,29762.0,2.759934
1,MK02,-500.0,-0.170638
2,MK04,-6625.0,-5.17469
3,MK05,-3708.0,-3.628145
4,MK06,6436.0,2.026397
5,MK07,-4827.0,-3.95309
6,MK08,-6890.0,-6.974955
7,MK09,-4061.0,-5.287141
8,MK10,-7556.0,-9.5392
9,MK11,-6162.0,-4.076556


In [29]:
df = pd.merge(base_df, working_population, on='Region code', how='inner')
df

Unnamed: 0,Region code,Region name (en),Region name (fi),Working-age population Absolute Growth 2017-2021,Working-age population Relative Growth 2017-2021 (%)
0,MK01,Uusimaa,Uusimaa,29762.0,2.759934
1,MK02,Southwest Finland,Varsinais-Suomi,-500.0,-0.170638
2,MK04,Satakunta,Satakunta,-6625.0,-5.17469
3,MK05,Kanta-Häme,Kanta-Häme,-3708.0,-3.628145
4,MK06,Pirkanmaa,Pirkanmaa,6436.0,2.026397
5,MK07,Päijät-Häme,Päijät-Häme,-4827.0,-3.95309
6,MK08,Kymenlaakso,Kymenlaakso,-6890.0,-6.974955
7,MK09,South Karelia,Etelä-Karjala,-4061.0,-5.287141
8,MK10,South Savo,Etelä-Savo,-7556.0,-9.5392
9,MK11,North Savo,Pohjois-Savo,-6162.0,-4.076556


In [30]:
df.to_csv('../data/Indices/working_age_population_index.csv', index=False)