# Indicators

## Setup

### Drive

In [1]:
from google.colab import drive

In [2]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
%cd /content/drive/MyDrive/quality-of-life

/content/drive/MyDrive/quality-of-life


### Imports

In [4]:
!pip install geopandas



In [5]:
import geopandas as gpd
import pandas as pd
import numpy as np

## Districts 2021

including
- population
- area
- density

In [6]:
districts = gpd.read_file('VG250_KRS.shp')

In [7]:
districts.crs

<Projected CRS: EPSG:25832>
Name: ETRS89 / UTM zone 32N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Europe between 6°E and 12°E: Austria; Belgium; Denmark - onshore and offshore; Germany - onshore and offshore; Norway including - onshore and offshore; Spain - offshore.
- bounds: (6.0, 38.76, 12.01, 84.33)
Coordinate Operation:
- name: UTM zone 32N
- method: Transverse Mercator
Datum: European Terrestrial Reference System 1989 ensemble
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [8]:
districts.shape

(430, 28)

In [9]:
districts.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 430 entries, 0 to 429
Data columns (total 28 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   OBJID     430 non-null    object  
 1   BEGINN    430 non-null    object  
 2   ADE       430 non-null    int64   
 3   GF        430 non-null    int64   
 4   BSG       430 non-null    int64   
 5   ARS       430 non-null    object  
 6   AGS       430 non-null    object  
 7   SDV_ARS   430 non-null    object  
 8   GEN       430 non-null    object  
 9   BEZ       430 non-null    object  
 10  IBZ       430 non-null    int64   
 11  BEM       430 non-null    object  
 12  NBD       430 non-null    object  
 13  SN_L      430 non-null    object  
 14  SN_R      430 non-null    object  
 15  SN_K      430 non-null    object  
 16  SN_V1     430 non-null    object  
 17  SN_V2     430 non-null    object  
 18  SN_G      430 non-null    object  
 19  FK_S3     430 non-null    object  
 20  NU

In [10]:
districts.head()

Unnamed: 0,OBJID,BEGINN,ADE,GF,BSG,ARS,AGS,SDV_ARS,GEN,BEZ,...,SN_G,FK_S3,NUTS,ARS_0,AGS_0,WSK,EWZ,KFL,DLM_ID,geometry
0,DEBKGVG200000C75,2021-12-02,4,4,1,1001,1001,10010000000,Flensburg,Kreisfreie Stadt,...,0,R,DEF01,10010000000,1001000,2008-01-01,91113,56.73,DEBKGDL20000002R,"POLYGON ((526513.753 6075133.412, 526547.941 6..."
1,DEBKGVG200000C76,2021-11-30,4,4,1,1002,1002,10020000000,Kiel,Kreisfreie Stadt,...,0,R,DEF02,10020000000,1002000,2006-01-01,246243,118.65,DEBKGDL20000E43X,"POLYGON ((575841.569 6032148.032, 575869.668 6..."
2,DEBKGVG200000C77,2021-06-24,4,4,1,1003,1003,10030000000,Lübeck,Kreisfreie Stadt,...,0,R,DEF03,10030000000,1003000,2006-02-01,216277,214.19,DEBKGDL20000DYM9,"POLYGON ((623056.151 5983746.445, 623191.574 5..."
3,DEBKGVG200000C78,2021-08-24,4,4,1,1004,1004,10040000000,Neumünster,Kreisfreie Stadt,...,0,R,DEF04,10040000000,1004000,1970-04-26,79496,71.66,DEBKGDL20000E4KX,"POLYGON ((565015.652 6000637.513, 565128.417 6..."
4,DEBKGVG200000C79,2021-11-17,4,4,1,1051,1051,10510044044,Dithmarschen,Kreis,...,0,R,DEF05,10510000000,1051000,2011-08-01,133969,1428.18,DEBKGDL20000E14G,"MULTIPOLYGON (((505053.385 6023856.559, 505143..."


In [None]:
districts.plot(figsize=(12,8))

<Axes: >

In [None]:
districts['AGS'].nunique()

In [None]:
districts['AGS'].value_counts().head(10)

In [None]:
districts['EWZ'].sum()

In [None]:
%%time

districts_dissolved = districts[['AGS', 'GEN', 'BEZ', 'EWZ', 'geometry']].dissolve(by=['AGS', 'GEN', 'BEZ'], aggfunc='sum').reset_index()

In [None]:
districts_dissolved['AGS'].nunique()

In [None]:
districts_dissolved['AGS'].value_counts().head(10)

In [None]:
districts_dissolved['EWZ'].sum()

In [None]:
districts_dissolved = districts_dissolved.rename(columns={'AGS': 'Key', 'GEN': 'District', 'BEZ': 'Type', 'EWZ': 'Population'})

In [None]:
districts_dissolved['geometry'] = districts_dissolved['geometry'].to_crs({'proj':'cea'})
districts_dissolved['Area'] = round(districts_dissolved['geometry'].area / 10 ** 6, 0).astype(int) # km²
districts_dissolved['Density'] = round(districts_dissolved['Population'] / districts_dissolved['Area'], 0).astype(int)

In [None]:
districts_dissolved

## Ideas fo other potentially relevant indicators

- income per capita
- average age
- child care
- taxes
- unemployment
- property prices
- land cover
- climate risk

### Primary income per capita 2021

In [None]:
income_per_capita = pd.read_excel('vgrdl_r2b3_bs2022.xlsx', sheet_name='1.4', skiprows=4, dtype={'Regional-schlüssel': str})

In [None]:
income_per_capita.shape

In [None]:
income_per_capita.dtypes

In [None]:
income_per_capita

In [None]:
income_per_capita = income_per_capita.loc[income_per_capita['NUTS 3'] == 3][['Regional-schlüssel', 'Gebietseinheit', 2021]].reset_index(drop=True)

In [None]:
income_per_capita

In [None]:
income_per_capita.columns = ['Key', 'District', 'Income']

In [None]:
income_per_capita['Income'] = income_per_capita['Income'].astype(np.int64)

In [None]:
income_per_capita.loc[income_per_capita['District'] == 'Hamburg', 'Key'] = '02000'
income_per_capita.loc[income_per_capita['District'] == 'Berlin', 'Key'] = '11000'

In [None]:
income_per_capita

In [None]:
income_per_capita['Key'].nunique()

Note: Eisenach is part of Wartburgkreis since mid-2021 and not an independent district anymore. The income data doesn't seem to yet reflect this change though.

## Merge districts and indicators

In [None]:
indicators = pd.merge(districts_dissolved, income_per_capita.drop('District', axis=1), how='left', on='Key').reset_index(drop=True)

In [None]:
indicators

## Save indicators

In [None]:
indicators.to_file('indicators.shp')