# Germany

In [1]:
import re
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd

pd.options.mode.chained_assignment = None

In [460]:
dtypes = {
    'Land': str,
    'Regierungsbezirk': str,
    'Kreis': str,
    'Verbandsgemeinde': str,
    'Gemeinde': str,
    'Kennziffer Briefwahlzugehörigkeit': str
}

df = pd.read_csv('../../data/raw/deu.csv',
                 encoding='utf-16le', sep='\t', skiprows=4, low_memory=False, dtype=dtypes)

In [461]:
df = df[df.columns[:16]]

In [462]:
meta = pd.read_csv('../../data/raw/deu_meta.csv',
                   encoding='utf-16le', sep='\t', skiprows=4, dtype=dtypes)

In [463]:
df.shape, meta.shape

((86223, 16), (13621, 9))

In [464]:
id_cols = ['Land', 'Regierungsbezirk', 'Kreis', 'Verbandsgemeinde',
           'Gemeinde', 'Kennziffer Briefwahlzugehörigkeit']

In [465]:
df = df.merge(meta, on=id_cols)

In [466]:
df['id'] = (df
    .Land.str.zfill(2)
    .str.cat(df.Regierungsbezirk)
    .str.cat(df.Kreis.str.zfill(2))
    .str.cat(df.Verbandsgemeinde.str.zfill(4))
    .str.cat(df.Gemeinde.str.zfill(3)))

In [498]:
geo = gpd.read_file('../../data/shapefiles/deu/VG250_GEM.shp')

In [468]:
# https://en.wikipedia.org/wiki/Stepenitztal
merge_1 = (geo[geo.GEN.isin(['Börzow', 'Mallentin', 'Papenhusen'])]
    .dissolve(by='GF')
    .reset_index()
    .assign(GEN=lambda x: 'Stepenitztal')
    .assign(RS=lambda x: '130745453093'))

geo = geo.append(merge_1, sort=True)
geo = geo[~geo.GEN.isin(['Börzow', 'Mallentin', 'Papenhusen'])]

In [469]:
# https://de.wikipedia.org/wiki/Ganzlin
merge_2 = (geo[geo.GEN.isin(['Ganzlin', 'Buchberg', 'Wendisch Priborn'])]
    .dissolve(by='GF')
    .reset_index()
    .assign(GEN=lambda x: 'Ganzlin')
    .assign(RS=lambda x: '130765663166'))

geo = geo[~geo.GEN.isin(['Ganzlin', 'Buchberg', 'Wendisch Priborn'])]
geo = geo.append(merge_2, sort=True)

In [470]:
# https://de.wikipedia.org/wiki/K%C3%B6lzin
merge_3 = (geo[geo.GEN.isin(['Gützkow', 'Kölzin'])]
    .sort_index(ascending=False)
    .dissolve(by='GF')
    .reset_index())

geo = geo.append(merge_3, sort=True)
geo = geo[geo.GEN != 'Kölzin']

In [471]:
# https://de.wikipedia.org/wiki/Heinrichsruh_(Torgelow)
merge_4 = (geo[geo.GEN.isin(['Heinrichsruh', 'Torgelow', 'Torgelow-Holländerei'])]
    .sort_index(ascending=False)
    .dissolve(by='GF')
    .reset_index())

geo = geo.append(merge_4, sort=True)
geo = geo[~geo.GEN.isin(['Heinrichsruh', 'Torgelow-Holländerei'])]

In [472]:
# https://de.wikipedia.org/wiki/Friedland_(Mecklenburg)#Eingemeindungen
merge_5 = (geo[geo.RS.isin(['130715152040', '130715152031', '130715152035'])]
    .sort_index(ascending=False)
    .dissolve(by='GF')
    .reset_index())

geo = geo.append(merge_5, sort=True)
geo = geo[~geo.GEN.isin(['Glienke', 'Eichhorst'])]

In [473]:
# https://de.wikipedia.org/wiki/Roggentin_(Mirow)
merge_6 = (geo[geo.RS.isin(['130715155126', '130715155099'])]
    .sort_index(ascending=False)
    .dissolve(by='GF')
    .reset_index())

geo = geo.append(merge_6, sort=True)
geo = geo[geo.RS != '130715155126']

In [474]:
# https://en.wikipedia.org/wiki/Helpt
merge_7 = (geo[geo.GEN.isin(['Woldegk', 'Helpt'])]
    .sort_index(ascending=False)
    .dissolve(by='GF')
    .reset_index())

geo = geo.append(merge_7, sort=True)
geo = geo[geo.GEN != 'Helpt']

In [475]:
# https://en.wikipedia.org/wiki/Severin,_Germany
merge_8 = (geo[geo.GEN.isin(['Severin', 'Domsühl'])]
    .dissolve(by='GF')
    .reset_index())

geo = geo.append(merge_8, sort=True)
geo = geo[geo.GEN != 'Severin']

In [476]:
# https://de.wikipedia.org/wiki/Lutheran
merge_9 = (geo[geo.GEN.isin(['Lutheran', 'Lübz'])]
    .dissolve(by='GF')
    .reset_index())

geo = geo.append(merge_9, sort=True)
geo = geo[geo.GEN != 'Lutheran']

In [477]:
# https://de.wikipedia.org/wiki/Langhagen
merge_10 = (geo[geo.GEN.isin(['Langhagen', 'Lalendorf'])]
    .dissolve(by='GF')
    .reset_index())

geo = geo.append(merge_10, sort=True)
geo = geo[geo.GEN != 'Langhagen']

In [478]:
# https://en.wikipedia.org/wiki/K%C3%B6rchow
merge_11 = (geo[geo.GEN.isin(['Körchow', 'Wittenburg', 'Lehsen'])]
    .sort_index(ascending=False)
    .dissolve(by='GF')
    .reset_index())

geo = geo.append(merge_11, sort=True)
geo = geo[~geo.GEN.isin(['Körchow', 'Lehsen'])]

In [479]:
# https://en.wikipedia.org/wiki/Vitense
merge_12 = (geo[geo.GEN.isin(['Vitense', 'Rehna', 'Nesow'])]
    .sort_index(ascending=False)
    .dissolve(by='GF')
    .reset_index())

geo = geo.append(merge_12, sort=True)
geo = geo[~geo.GEN.isin(['Vitense', 'Nesow'])]

In [480]:
# https://en.wikipedia.org/wiki/Cammin_(Burg_Stargard)
merge_13 = (geo[geo.RS.isin(['130715161024', '130715161021'])]
    .dissolve(by='GF')
    .reset_index())

geo = geo.append(merge_13, sort=True)
geo = geo[~geo.RS.isin(['130715161024'])]

In [481]:
# https://en.wikipedia.org/wiki/Damm,_Parchim
merge_14 = (geo[geo.GEN.isin(['Damm', 'Parchim'])]
    .sort_index(ascending=False)
    .dissolve(by='GF')
    .reset_index())

geo = geo.append(merge_14, sort=True)
geo = geo[~geo.GEN.isin(['Damm'])]

In [482]:
mun = df.groupby('id')
eligible = mun['Wahlberechtigte (A)'].sum()
voters = mun['Wähler (B)'].sum()
names = eligible.index.map(df.drop_duplicates(subset=['id']).set_index('id').Name)
data = pd.DataFrame({'eligible': eligible, 'voters': voters, 'name': names})
data['turnout'] = data.voters / data.eligible * 100

In [483]:
data.name = data.name.replace('Golßen, Stadt', 'Golßen')
data.name = data.name.replace('Oldenbüttel einschl. Gemeinde Tackesdorf', 'Tackesdorf')
data.name = data.name.replace('Hennstedt einschl. Gemeinde Wiedenborstel', 'Wiedenborstel')
data = data[~data.name.str.contains('Briefwahl ')]

In [484]:
# Special case for Berlin, from website
data = data.append(pd.DataFrame({
    'name': ['Berlin'],
    'eligible': 2519758,
    'voters': 1177832,
    'turnout': 46.7},
    index=['110000000000']), sort=True)

In [485]:
geo = geo.merge(data, left_on='RS', right_index=True, how='left')

In [486]:
merge_by_name = ['Drahnsdorf', 'Schlepzig', 'Kasel-Golzig', 'Bersteland', 'Unterspreewald',
                 'Schönwald', 'Krausnick-Groß Wasserburg', 'Wiedenborstel', 'Steinreich',
                 'Rietzneuendorf-Staakow', 'Tackesdorf', 'Golßen']

In [487]:
for mbn in merge_by_name:
    _ = data.loc[lambda x: x.name == mbn, 'turnout'].iloc[0]
    geo.loc[lambda x: x.GEN == mbn, 'turnout'] = _

In [488]:
geo.shape, geo[(geo.turnout.isnull()) & (geo.BEZ != 'Gemeindefreies Gebiet')].shape

((11477, 28), (0, 28))

## Export

In [497]:
geo = geo[['name', 'turnout', 'geometry']]
geo['country'] = 'deu'
geo.to_file('../../data/processed/deu')