# Association between Fertility & Local Municipal Characteristics in Denmark 

### Imports

In [151]:
import geopandas as gpd # for geospatial data handling
import pandas as pd
import contextily as cx # for plotting
import matplotlib.pyplot as plt # for plotting
from pyproj import CRS # for more advanced CRS modifications and transformations
import numpy as np

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 
warnings.filterwarnings("ignore", category=FutureWarning) 

### Data

Load the data from DenmarkStatistik and geometries for municipalities of Denmark and then merge them into one geo dataframe

'Data/Educational_attainment_by_highest_education_completed_(total)_2021.xlsx'


In [152]:
data_edu = pd.read_excel('Data/Educational_attainment_by_highest_education_completed_(total)_2021.xlsx')
data_fer = pd.read_excel('Data/fertility_rates_by_municipality_2021.xlsx')
data_bir = pd.read_excel('Data/live_births_by_municipality_2021.xlsx')
data_mar = pd.read_excel('Data/market_value_for_real_estate_average_dkk_2021.xlsx')

In [153]:
geometries = gpd.read_file('Data/municipalities_dk.gpkg') #added
# geometry.head()

In [154]:
# rename the columns to prepare for merge
data_edu.rename(columns = {'Unnamed: 0':'municipality_nm', 2021:'edu_att'}, inplace = True)
data_fer.rename(columns = {'Unnamed: 0':'municipality_nm', 2021:'fer_rate'}, inplace = True)
data_bir.rename(columns = {'Unnamed: 0':'municipality_nm', 2021:'live_birth'}, inplace = True)
data_mar.rename(columns = {'Unnamed: 0':'municipality_nm', 2021:'market_val'}, inplace = True)

In [155]:
# merge to one data frame
data = pd.merge(pd.merge(data_fer, data_bir), pd.merge(data_edu, data_mar))
# data.head()

In [156]:
variables = ['edu_att', 'fer_rate', 'live_birth', 'market_val']

deal with missing value and types

In [None]:
# replacing missing values with 0
data = data.replace('..', 0)
# removing spaces and convert numbers to float
data['fer_rate'] = data['fer_rate'].str.replace(' ', '').astype(float)
# replacing non-finite values with 0
data['fer_rate'] = data['fer_rate'] .replace([np.inf, -np.inf, np.nan], 0)


    municipality_nm  fer_rate live_birth  edu_att market_val
0        Copenhagen    1563.9     10 285  501 212  2 399 736
1     Frederiksberg    1787.8      1 534   74 788  3 435 510
2            Dragør    2510.9        146    9 074  4 014 690
3            Tårnby    2056.7        473   28 727  2 823 345
4       Albertslund    1608.7        274   18 664  2 931 561
..              ...       ...        ...      ...        ...
93            Morsø    2080.7        180   13 103  1 321 820
94           Rebild    2400.5        380   20 166  1 997 252
95          Thisted    2063.8        410   28 630  1 422 277
96  Vesthimmerlands    1824.4        314   24 005  1 495 512
97          Aalborg    1604.3      2 488  158 831  2 042 461

[98 rows x 5 columns]


In [None]:
# data = data.replace('\.+', np.nan, regex=True)

In [None]:
# data['fer_rate'] = data['fer_rate'].astype(str).str.replace(' ', 0)
# data['fer_rate'] = data['fer_rate'].fillna(0).astype(int)

In [None]:
data['municipality_nm'] = data['municipality_nm'].replace('Copenhagen', 'København')
geometries = geometries.rename(columns={'navn': 'municipality_nm'}) #added

In [None]:
data = pd.merge(data, geometries[['municipality_nm', 'kommunekode', 'municipal_id']]) #added

In [None]:
gdf = gpd.GeoDataFrame(data, geometry=geometries.geometry, crs = 'EPSG:25832') #added

In [None]:
# save the GeoDataFrame to a GeoJSON file
gdf.to_file('Data/gdf.geojson', driver='GeoJSON')