In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import pyproj

plt.style.use("seaborn")

In [15]:
df_geo = gpd.read_file(os.path.join("..", "geo", "shapefiles", "VG250_KRS3", "VG250_KRS.shp"))
df_raw = pd.read_csv(os.path.join("..", "data", "processed", "data.csv"), index_col = 0)

df_geo.to_crs(pyproj.CRS.from_epsg(4326), inplace = True)

In [16]:
df_raw.head()

Unnamed: 0,kreis_key,kreis_name,income_pp_2000,income_pp_2001,income_pp_2002,income_pp_2003,income_pp_2004,income_pp_2005,income_pp_2006,income_pp_2007,...,population_2017,population_2018,population_2019,crimes_pp_2013,crimes_pp_2014,crimes_pp_2015,crimes_pp_2016,crimes_pp_2017,crimes_pp_2018,crimes_pp_2019
0,9361,Amberg,16120,16451,16433,16878,17318,17724,18326,18841,...,42298,42109,42089,0.072358,0.073934,0.071011,0.068852,0.067781,0.068346,0.060919
1,9561,Ansbach,17507,18162,17736,17849,18038,18022,17955,18110,...,41592,41750,41823,0.069715,0.074966,0.077122,0.079669,0.077635,0.074635,0.076298
2,9661,Aschaffenburg,17228,17972,17975,18703,19305,19802,20393,20802,...,69558,70228,70765,0.089326,0.083801,0.087085,0.079957,0.075448,0.073005,0.07573
3,9761,Augsburg,15590,16010,15969,16279,16471,16432,16547,16754,...,291218,293993,295859,0.081192,0.083163,0.09043,0.079436,0.071764,0.072131,0.072345
4,8211,Baden-Baden,25133,25229,23987,23945,23923,25003,26262,27180,...,54513,54921,55154,0.075229,0.07979,0.086138,0.079563,0.07833,0.075927,0.105396


In [26]:
df = df_raw[["kreis_key", "income_pp_2018", "crimes_pp_2018"]]

In [27]:
df.drop_duplicates(inplace=True)
df.dropna(inplace=True)

In [28]:
df.kreis_key = df.kreis_key.astype(int).astype(str).str.zfill(5)
df.income_pp_2018 = pd.to_numeric(df.income_pp_2018)

In [29]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 401 entries, 0 to 472
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   kreis_key       401 non-null    object 
 1   income_pp_2018  401 non-null    int64  
 2   crimes_pp_2018  401 non-null    float64
dtypes: float64(1), int64(1), object(1)
memory usage: 12.5+ KB


In [31]:
df_comb = df_geo.merge(df, left_on = "RS", right_on = "kreis_key", how = "left")

In [32]:
plt.rcParams["figure.figsize"] = [16, 11]

In [33]:
fig, ax = plt.subplots()

df_comb.plot(
    ax=ax, 
    column='income_pp_2018',
    alpha=0.9,
    legend = True
)

ax.set(
    title='Verfügbares Einkommen der privaten Haushalte in Euro je Einwohner (2018)', 
    aspect=1.3, 
    facecolor='lightblue'
);

plt.figtext(0.5, 0.02, "Datenquelle: Regionaldatenbank Deutschland, 2021", ha="center", fontsize=10)

plt.savefig(os.path.join("..", "output", "income_pp_2018.png"))
plt.close("all")


In [34]:
fig, ax = plt.subplots()

df_comb.plot(
    ax=ax, 
    column='crimes_pp_2018',
    alpha=0.9,
    legend = True
)

ax.set(
    title='Erfasste Straftaten je Einwohner (2018)', 
    aspect=1.3, 
    facecolor='lightblue'
);

plt.figtext(0.5, 0.02, "Datenquelle: BKA, 2019", ha="center", fontsize=10)

plt.savefig(os.path.join("..", "output", "crimes_pp_2018.png"))
plt.close("all")