In [1]:
import csv
from dateparser import parse

with open('input.csv', mode='r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    data = [x for x in list(csv_reader) if x['Szenarium'] != '']

In [2]:
def fix_location(x):
    if len(x['Bundesland']) != 0:
        x['Bundesland'] = harm_land(x['Bundesland'])
        return x
    for deli in ('/', ','):
        split_1 = x['Ort'].rsplit(deli)
        # print(split_1)
        if len(split_1) > 1:
                x['Bundesland'] = harm_land(split_1[-1].strip())
                x['Ort'] = split_1[0].strip()
                return x
    if x['Ort'] in ('Berlin', 'Hamburg', 'Bremen'):
        x['Bundesland'] = x['Ort']
    return x

In [3]:
abr_text = """BW 	Baden-Württemberg
BY 	Bayern
BE 	Berlin
BB 	Brandenburg
HB 	Bremen
HH 	Hamburg
HE 	Hessen
MV 	Mecklenburg-Vorpommern
NI 	Niedersachsen
NW 	Nordrhein-Westfalen
RP 	Rheinland-Pfalz
SL 	Saarland
SN 	Sachsen
ST 	Sachsen-Anhalt
SH 	Schleswig-Holstein
TH 	Thüringen"""
abr = [x.split() for x in abr_text.split("\n")]
lands = [x.split()[-1] for x in abr_text.split("\n")]

In [4]:
bw=["Ba.-Württ.", "Baden-Württemb.", "Bad.-Württ.", "Bad.-Würt.", "Baden-Württ.", "Baden-Wbg.", "Ba-Wü", "BaWü", "BW"]
rp=["Rheinl.-Pfalz", "Rheinl.-Pf.", "Rhld.-Pfalz", "Rheinl.-Pfalz", "Rheinland-Pf."]
st=["Sachsen-Anh.", "Sn.-Anhalt"]
nr=["NRW", "Nordrhein-Westf."]
ni=["Nds.", "Nieders."]
mv=["MV", "M.V.", "Meckl.-Vorp.", "Mecklenburg-Vorp."]
sh=["Schleswig-Holst.", "Schl.-Hol.", "Schlesw.-Hol.", "Schleswig-H."]
bb=["Brbg", "Brbg."]
be=["Bln."]
he=["Hess."]
sa=["Sachs."]

In [5]:
def harm_land(x):
    if x in bw:
        return "Baden-Württemberg"
    if x in rp:
        return "Rheinland-Pfalz"
    if x in st:
        return "Sachsen-Anhalt"
    if x in nr:
        return "Nordrhein-Westfalen"
    if x in ni:
        return "Niedersachsen"
    if x in mv:
        return "Mecklenburg-Vorpommern"
    if x in sh:
        return "Schleswig-Holstein"
    if x in bb:
        return "Brandenburg"
    if x in be:
        return "Berlin"
    if x in he:
        return "Hessen"
    if x in sa:
        return "Sachsen"
    return x

In [6]:
def harm_place(x):
    if 'Frankfurt' in x['Ort'] and x['Bundesland'] == 'Hessen':
        if x['Ort'] in ('Frankfurt a. M.', 'Frankfurt', 'Frankfurt/Main'):
            x['Ort'] = 'Frankfurt am Main'
        else:
            if x['Ort'].endswith(' Frankfurt'):
                x['Ort'] = x['Ort'].replace(' Frankfurt', ' Frankfurt am Main')
    return x

In [7]:
d1 = [{ k:v.strip() for k, v in d.items()} for d in data]

In [8]:
d1 = [fix_location(x) for x in d1]

In [9]:
d1 = [harm_place(x) for x in d1]

In [10]:
for x in d1:
    if x['Bundesland'] not in lands:
        print(x['Bundesland'])

In [11]:
for x in d1:
    x['Datum'] = parse(x['Datum'], languages=['de']).date().isoformat()
    name = x['Name'] 
    name = name[0].upper() + name[1:]
    x['Name'] = name.replace('Unbek.', 'Unbekannter').replace('Unbek ', 'Unbekannter ').replace('Unbekannter Frau', 'Unbekannte Frau')

In [12]:
binary_cols = ["Opfer mit Schusswaffe","Schusswechsel", "Sondereinsatzbeamte", "Verletzte/getötete Beamte", "Vorbereitete Polizeiaktion", 'Staatsanwaltliches Ermittlungsverfahren', 'Gerichtsverfahren'] 
for x in d1:
    for col in binary_cols:
        x[col] = x[col].replace('ja', 'Ja').replace('nein', 'Nein').replace('unbekannt', 'Unbekannt').replace('unklar', 'Unbekannt').replace('Unklar', 'Unbekannt')

In [13]:
# for x in d1:
#     for col in binary_cols:
#         if not x[col] in ('Ja', 'Nein', 'Unbekannt'):
#             print(x[col])

In [14]:
for x in d1:
    for k,v in x.items():
        if v == 'unbekannt':
            x[k] = 'Unbekannt'

In [15]:
for x in d1:
    x['Fall'] = 'cilip-' + x['Datum'][:4] + '-' + x['Fall']

In [16]:
d1 = sorted(d1, key=lambda x: x['Datum'])

In [17]:
with open('output.csv', 'w', newline='') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=d1[0].keys())

    writer.writeheader()
    writer.writerows(d1)