# Calculate population density growth of regions in Finland

Data source: Statistic Finland

Table: 12f8 -- 10. All data groups by municipality, 2010-2021

Link: https://pxdata.stat.fi/PxWeb/pxweb/en/Postinumeroalueittainen_avoin_tieto/Postinumeroalueittainen_avoin_tieto__uusin/paavo_pxt_12f8.px/

In [1]:
import pandas as pd
import numpy as np

In [2]:
# Create the base with region column DataFrame
area_values = [f"MK{idx:02d}" for idx in np.arange(1, 22) if idx != 3 and idx != 20]

base_df = pd.DataFrame({"Region": area_values})
base_df

Unnamed: 0,Region
0,MK01
1,MK02
2,MK04
3,MK05
4,MK06
5,MK07
6,MK08
7,MK09
8,MK10
9,MK11


In [3]:
# load regional area (km2) data
# data source: https://www.statista.com/statistics/526998/total-area-of-finland-by-region/

# Given data as a list of tuples
data = [
    ("Lapland", 92683.24),
    ("North Ostrobothnia", 36830.22),
    ("Kainuu", 20198.17),
    ("North Karelia", 18793.14),
    ("North Savo", 17345.53),
    ("Central Finland", 16042.57),
    ("South Ostrobothnia", 13798.45),
    ("Pirkanmaa", 13249.22),
    ("South Savo", 12651.95),
    ("Southwest Finland", 10667.89),
    ("Uusimaa", 9102.75),
    ("Satakunta", 7822.97),
    ("Ostrobothnia", 7402.91),
    ("Päijät-Häme", 5714.65),
    ("South Karelia", 5326.37),
    ("Kanta-Häme", 5199.25),
    ("Central Ostrobothnia", 5020.44),
    ("Kymenlaakso", 4559.38),
    ("Åland", 1554.18)
]

area = pd.DataFrame(data, columns=["Region Name", "Area (km2)"])
area


Unnamed: 0,Region Name,Area (km2)
0,Lapland,92683.24
1,North Ostrobothnia,36830.22
2,Kainuu,20198.17
3,North Karelia,18793.14
4,North Savo,17345.53
5,Central Finland,16042.57
6,South Ostrobothnia,13798.45
7,Pirkanmaa,13249.22
8,South Savo,12651.95
9,Southwest Finland,10667.89


In [4]:
# create a dictionary of region english-finnish names
region_name_map = pd.read_csv('../data/region_city_data/municipality_region_map.csv')
region_name_map = region_name_map[['Name of region in English','Name of region in Finnish', 'Code of region']]
region_name_map.drop_duplicates(inplace=True)

# create column MKxx where xx is value from Code of region column
region_name_map['Code of region'] = region_name_map['Code of region'].apply(lambda x: f'MK{x:02d}')
region_name_map.sort_values(by='Code of region', inplace=True)
region_name_map.reset_index(drop=True, inplace=True)
region_name_map

Unnamed: 0,Name of region in English,Name of region in Finnish,Code of region
0,Uusimaa,Uusimaa,MK01
1,Southwest Finland,Varsinais-Suomi,MK02
2,Satakunta,Satakunta,MK04
3,Kanta-Häme,Kanta-Häme,MK05
4,Pirkanmaa,Pirkanmaa,MK06
5,Päijät-Häme,Päijät-Häme,MK07
6,Kymenlaakso,Kymenlaakso,MK08
7,South Karelia,Etelä-Karjala,MK09
8,South Savo,Etelä-Savo,MK10
9,North Savo,Pohjois-Savo,MK11


In [5]:
# merge area_df and region_name_map
area_df = pd.merge(area, region_name_map, left_on='Region Name', right_on='Name of region in English', how='left')

# merge base_df and area_df
df = pd.merge(base_df, area_df, left_on='Region', right_on='Code of region', how='left')
df = df[['Region', 'Name of region in English', 'Name of region in Finnish', 'Area (km2)']]
df.columns = ['Region code', 'Region name (en)', 'Region name (fi)', 'Area (km2)']
df

Unnamed: 0,Region code,Region name (en),Region name (fi),Area (km2)
0,MK01,Uusimaa,Uusimaa,9102.75
1,MK02,Southwest Finland,Varsinais-Suomi,10667.89
2,MK04,Satakunta,Satakunta,7822.97
3,MK05,Kanta-Häme,Kanta-Häme,5199.25
4,MK06,Pirkanmaa,Pirkanmaa,13249.22
5,MK07,Päijät-Häme,Päijät-Häme,5714.65
6,MK08,Kymenlaakso,Kymenlaakso,4559.38
7,MK09,South Karelia,Etelä-Karjala,5326.37
8,MK10,South Savo,Etelä-Savo,12651.95
9,MK11,North Savo,Pohjois-Savo,17345.53


In [19]:
# Load population data by region, from 2017 to 2021
regional_df = pd.read_csv('./../data/region_city_data/region_info_2011_2021.csv')
regional_df = regional_df[['Region', 'Information', '2017', '2018', '2019', '2020', '2021']]
regional_df = regional_df[regional_df['Information'] == 'Inhabitants, total (HE)'].copy()
regional_df.drop(columns=['Information'], inplace=True)
regional_df['Region code'] = regional_df['Region'].apply(lambda x: x[:4])
regional_df.drop(columns=['Region'], inplace=True)
regional_df = regional_df.set_index('Region code').stack()

regional_df = pd.DataFrame(regional_df)
regional_df.reset_index(inplace=True)
regional_df.columns = ['Region code', 'Year', 'Population']
regional_df

Unnamed: 0,Region code,Year,Population
0,MK01,2017,1655624.0
1,MK01,2018,1671024.0
2,MK01,2019,1689725.0
3,MK01,2020,1702678.0
4,MK01,2021,1714741.0
...,...,...,...
90,MK21,2017,29489.0
91,MK21,2018,29789.0
92,MK21,2019,29884.0
93,MK21,2020,30129.0


In [25]:
# Calculate population density
df = pd.merge(df, regional_df, on='Region code', how='left')
df['Population Density'] = df['Population'] / df['Area (km2)']

df

Unnamed: 0,Region code,Region name (en),Region name (fi),Area (km2),Year,Population,Population Density
0,MK01,Uusimaa,Uusimaa,9102.75,2017,1655624.0,181.881739
1,MK01,Uusimaa,Uusimaa,9102.75,2018,1671024.0,183.573535
2,MK01,Uusimaa,Uusimaa,9102.75,2019,1689725.0,185.627970
3,MK01,Uusimaa,Uusimaa,9102.75,2020,1702678.0,187.050946
4,MK01,Uusimaa,Uusimaa,9102.75,2021,1714741.0,188.376150
...,...,...,...,...,...,...,...
90,MK21,Åland,Ahvenanmaa,1554.18,2017,29489.0,18.973993
91,MK21,Åland,Ahvenanmaa,1554.18,2018,29789.0,19.167021
92,MK21,Åland,Ahvenanmaa,1554.18,2019,29884.0,19.228146
93,MK21,Åland,Ahvenanmaa,1554.18,2020,30129.0,19.385785


In [27]:
df['Population absolute growth (persons)'] = df.groupby('Region code')['Population'].diff()
df['Population relative growth (%)'] = df['Population absolute growth (persons)'] / df['Population'] * 100

df['Population Density absolute growth (persons/km2)'] = df.groupby('Region code')['Population Density'].diff()
df['Population Density relative growth (%)'] = df['Population Density absolute growth (persons/km2)'] / df['Population Density'] * 100

df

Unnamed: 0,Region code,Region name (en),Region name (fi),Area (km2),Year,Population,Population Density,Population absolute growth (persons),Population relative growth (%),Population Density absolute growth (persons/km2),Population Density relative growth (%)
0,MK01,Uusimaa,Uusimaa,9102.75,2017,1655624.0,181.881739,,,,
1,MK01,Uusimaa,Uusimaa,9102.75,2018,1671024.0,183.573535,15400.0,0.921591,1.691796,0.921591
2,MK01,Uusimaa,Uusimaa,9102.75,2019,1689725.0,185.627970,18701.0,1.106748,2.054434,1.106748
3,MK01,Uusimaa,Uusimaa,9102.75,2020,1702678.0,187.050946,12953.0,0.760743,1.422977,0.760743
4,MK01,Uusimaa,Uusimaa,9102.75,2021,1714741.0,188.376150,12063.0,0.703488,1.325204,0.703488
...,...,...,...,...,...,...,...,...,...,...,...
90,MK21,Åland,Ahvenanmaa,1554.18,2017,29489.0,18.973993,,,,
91,MK21,Åland,Ahvenanmaa,1554.18,2018,29789.0,19.167021,300.0,1.007083,0.193028,1.007083
92,MK21,Åland,Ahvenanmaa,1554.18,2019,29884.0,19.228146,95.0,0.317896,0.061125,0.317896
93,MK21,Åland,Ahvenanmaa,1554.18,2020,30129.0,19.385785,245.0,0.813170,0.157639,0.813170


In [7]:
df.to_csv('./../data/Indices/population_density_index.csv', index=False)