# Association between Fertility & Local Municipal Characteristics in Denmark 

### Imports

In [79]:
import geopandas as gpd # for geospatial data handling
import pandas as pd
import contextily as cx # for plotting
import matplotlib.pyplot as plt # for plotting
from pyproj import CRS # for more advanced CRS modifications and transformations
import numpy as np

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 
warnings.filterwarnings("ignore", category=FutureWarning) 

### Data

Load the data from DenmarkStatistik and geometries for municipalities of Denmark and then merge them into one geo dataframe

'Data/Educational_attainment_by_highest_education_completed_(total)_2021.xlsx'


In [70]:
data_edu = pd.read_excel('Data/Educational_attainment_by_highest_education_completed_(total)_2021.xlsx')
data_fer = pd.read_excel('Data/fertility_rates_by_municipality_2021.xlsx')
data_bir = pd.read_excel('Data/live_births_by_municipality_2021.xlsx')
data_mar = pd.read_excel('Data/market_value_for_real_estate_average_dkk_2021.xlsx')

In [71]:
geometries = gpd.read_file('Data/municipalities_dk.gpkg') #added
# geometry.head()

In [72]:
# rename the columns to prepare for merge
data_edu.rename(columns = {'Unnamed: 0':'municipality_nm', 2021:'edu_att'}, inplace = True)
data_fer.rename(columns = {'Unnamed: 0':'municipality_nm', 2021:'fer_rate'}, inplace = True)
data_bir.rename(columns = {'Unnamed: 0':'municipality_nm', 2021:'live_birth'}, inplace = True)
data_mar.rename(columns = {'Unnamed: 0':'municipality_nm', 2021:'market_val'}, inplace = True)

In [73]:
# merge to one data frame
data = pd.merge(pd.merge(data_fer, data_bir), pd.merge(data_edu, data_mar))
# data.head()

deal with missing value and types

In [80]:
data = data.replace('\.+', np.nan, regex=True)

In [83]:
# data['fer_rate'] = data['fer_rate'].astype(str).str.replace(r'\D', '')

In [75]:
data['municipality_nm'] = data['municipality_nm'].replace('Copenhagen', 'København')
geometries = geometries.rename(columns={'navn': 'municipality_nm'}) #added

In [76]:
data = pd.merge(data, geometries[['municipality_nm', 'kommunekode', 'municipal_id']]) #added

In [77]:
gdf = gpd.GeoDataFrame(data, geometry=geometries.geometry, crs = 'EPSG:25832') #added

In [78]:
# save the GeoDataFrame to a GeoJSON file
gdf.to_file('Data/gdf.geojson', driver='GeoJSON')