In [None]:
import pandas as pd

# Explore Population Density and Sample Danish Municipalities

In [None]:
# https://www.statbank.dk/statbank5a/default.asp?w=1440
# selected all kommunes and population density 2022

# code for municipality is the first 3 digits of index
# in order to identify the municipality you need to find other towns with the same code
# then use the map above to find which municipality it is. 

In [None]:
df = pd.read_csv('../resources/denmark_pop_and_density.csv', encoding='latin-1')

In [None]:
# split strings in columns into two new columns
df[['municipality_code', 'specific_area_name']] = df['area_name'].str.split('-', expand=True)[[0,1]]

In [None]:
# group by municipality and divide the total population by the total area

df_density = pd.DataFrame([(k, v['Population'].sum(), v['Population'].sum()/v['Area (km2)'].sum())\
                            for k,v in df.groupby('municipality_code')],
                          columns=['municipality_code', 'population', 'population_density'])

df_density.dropna(inplace=True)
df_density.sort_values('population_density', ascending=False, inplace=True)

df_density.plot(kind='scatter', x='population', y='population_density', figsize=(5,5))

In [None]:
print(df_density['municipality_code'].unique().shape)
df_density['population_density'].describe()

In [None]:
df_density['population_density'].median()

In [None]:
# get quantiles from the population density and use them to create a new column
# with the population density category
df_density['population_density_quantile'] = pd.qcut(df_density['population_density'], 5, labels=[5,4,3,2,1])

In [None]:
# sample 3 municipalities from each quantile
municips = [v.municipality_code.sample(3, random_state=42).values\
             for k,v in df_density.groupby('population_density_quantile')]

# flatten the list
municips = [item for sublist in municips for item in sublist]

In [None]:
df_density.sort_values('population_density', ascending=False)

In [None]:
code_mappings: dict = {
    '846': 'Mariagerfjord Kommune',
    '741': 'Samsø Kommune',
    '760': 'Ringkøbing-Skjern Kommune',
    '710': 'Favrskov Kommune',
    '480': 'Nordfyns Kommune',
    '492': 'Ærø Kommune',
    '270': 'Gribskov Kommune',
    '730': 'Randers Kommune',
    '450': 'Nyborg Kommune',
    '169': 'Høje-Taastrup Kommune',
    '621': 'Kolding Kommune',
    '250': 'Frederikssund Kommune',
    '147': 'Frederiksberg Kommune',
    '230': 'Rudersdal Kommune',
    '155': 'Dragør Kommune',
    '101': 'København Kommune',
    '147': 'Frederiksberg Kommune',
    '825': 'Læsø Kommune',
    '550': 'Tønder Kommune',
    '665': 'Lemvig Kommune',
    '760': 'Ringkøbing-Skjern Kommune'
}
# map the codes to the municipality names in the dataframe
df_density['municipality'] = df_density['municipality_code'].map(code_mappings)
df_density[df_density['municipality'].notna()].sort_values(
    'population_density', ascending=False)\
        .to_csv('../resources/denmark_pop_density_sample.csv', index=False)

In [None]:
df.sort_values(by='Population density (km2)', ascending=False, inplace=True)
df = df[df['Population density (km2)']!=0]
# remove rows with "rural areas" in index
#df = df[~df.index.str.contains('Rural areas')]