In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import re

## Landkreise

In [9]:
wiki_kreise = requests.get("https://de.wikipedia.org/wiki/Liste_der_Landkreise_in_Deutschland")

soup = BeautifulSoup(wiki_kreise.text)

df_kreise = pd.DataFrame()

for tr in soup.find_all('tr'):
    """Format: <tr>
                <td>REGIONAL_SCHLÜSSEL</td>
                <td>LANDKREIS</td>
                <td>KFZ-KENNZ</td>
                <td>BUNDESLAND</td>
                <td>KREISSITZ</td>
                <td>EINWOHNER</td>
                <td>FLÄCHE</td>
                <td>BeVÖLKERUNGSDICHTE</td>
                <td>KARTE</td></tr>
                <tr>"""
    tds = tr.find_all('td')
    if(len(tds)>5):
        kreis_key = tds[0].text.strip()
        kreis = tds[1].text.split(",")[0].strip()
        license_plates = tds[2].text.replace(",", " ").replace("(", " ").replace(")", " ").split()

        for license_plate in license_plates:
            df_kreise = df_kreise.append({"license_plate": license_plate.strip(), "kreis_key": kreis_key}, ignore_index = True)

df_kreise

Unnamed: 0,license_plate,kreis_key
0,AC,05334
1,MON,05334
2,AW,07131
3,AIC,09771
4,FDB,09771
...,...,...
660,HCH,08417
661,Z,14524
662,GC,14524
663,HOT,14524


## Kreisfreie Städte

In [10]:
wiki_städte = requests.get("https://de.wikipedia.org/wiki/Liste_der_kreisfreien_St%C3%A4dte_in_Deutschland")

soup = BeautifulSoup(wiki_städte.text)

df_städte = pd.DataFrame()

for tr in soup.find_all('tr'):
    """ Format:
    <tr>
        <td>WAPPEN</td>
        <td>STADT</td>
        <td>REGIONALSCHLÜSSEL
        </td>
        <td>BUNDESLAND</td>
        <td>RGIERUNGSBEZIRK</td>
        <td>KFZ-KENNZEICHEN
        </td>
        <td>FLÄCHE
        </td>
        <td>TEINWOHNER 1939
        </td>
        <td>TEINWOHNER 1950
        </td>
        <td>TEINWOHNER 1970
        </td>
        <td>TEINWOHNER 1990
        </td>
        <td>TEINWOHNER 2011
        </td>
        <td>EINWOHNER 2020</small>
        </td>
        <td>BEVDICHTE
        </td>
        <td>KARTE</td></tr>
    """
    tds = tr.find_all('td')
    if(len(tds) > 12):
        kreis_key = tds[2].text.strip()
        kreis = re.split("\(|\[", tds[1].text)[0].strip()
        license_plates = tds[5].text.replace(",", " ").replace("(", " ").replace(")", " ").split()

        if(all(df_kreise["kreis_key"] != kreis_key)): # Avoid duplicates
            for license_plate in license_plates:
                df_städte = df_städte.append({"license_plate": license_plate.strip(), "kreis_key": kreis_key}, ignore_index = True)

df_städte

Unnamed: 0,license_plate,kreis_key
0,AM,09361
1,AN,09561
2,AB,09661
3,A,09761
4,BAD,08211
...,...,...
106,WOB,03103
107,WO,07319
108,W,05124
109,WÜ,09663


In [11]:
any(df_kreise.duplicated()) or any(df_städte.duplicated()) # check for duplicates

False

In [12]:
df = pd.concat([df_städte, df_kreise])
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 776 entries, 0 to 664
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   license_plate  776 non-null    object
 1   kreis_key      776 non-null    object
dtypes: object(2)
memory usage: 18.2+ KB


In [13]:
df["kreis_key"].replace({"11001": "11000"}, inplace = True) # Fix regional code of Berlin ("11000")

In [14]:
df.to_csv("license_plates.csv")