# Politicians Birthplace Map

Visualize where German MPs were born on an interactive map.

In [1]:
# Import libraries

import pandas as pd

import plotly.graph_objects as go

from pathlib import Path

import yaml

from geopy.geocoders import Nominatim

from geopy.extra.rate_limiter import RateLimiter

import time



# Database connection

import sys

sys.path.insert(0, str(Path.cwd().parent / 'src'))

from xminer.io.db import engine

from sqlalchemy import text



print('‚úÖ Libraries imported successfully')

‚úÖ Libraries imported successfully


In [2]:
# Configuration

PARAMS_FILE = Path("../src/xminer/config/parameters.yml")



with PARAMS_FILE.open("r", encoding="utf-8") as f:

    params = yaml.safe_load(f) or {}



YEAR = int(params.get("year", 2025))

MONTH = int(params.get("month", 12))

YM = f"{YEAR:04d}{MONTH:02d}"



# Graphics directory

GRAPHICS_BASE_DIR = Path(params.get("graphics_base_dir", "../outputs"))

GRAPHICS_DIR = GRAPHICS_BASE_DIR / YM / "graphics" / "birthplace_map"

GRAPHICS_DIR.mkdir(parents=True, exist_ok=True)



print(f"Output: {GRAPHICS_DIR}")

Output: /Users/margespinderi/Documents/PoliMetrics/xminer/outputs/202512/graphics/birthplace_map


## 1. Load Birthplace Data

In [3]:
# Get birthplace counts

query = """

SELECT 

    geburtsort,

    COUNT(*) as mp_count,

    STRING_AGG(DISTINCT partei_kurz, ', ' ORDER BY partei_kurz) as parties

FROM politicians_12_2025

WHERE geburtsort IS NOT NULL

GROUP BY geburtsort

ORDER BY mp_count DESC

"""



with engine.connect() as conn:

    df_birthplaces = pd.read_sql(text(query), conn)



print(f"Loaded {len(df_birthplaces)} unique birthplaces")

print(f"Total MPs: {df_birthplaces['mp_count'].sum()}")

df_birthplaces.head(20)

Loaded 396 unique birthplaces
Total MPs: 628


Unnamed: 0,geburtsort,mp_count,parties
0,Berlin,21,"AfD, B√úNDNIS 90/DIE GR√úNEN, CDU, DIE LINKE., SPD"
1,M√ºnchen,18,"AfD, B√úNDNIS 90/DIE GR√úNEN, CDU, CSU, DIE LINK..."
2,Hamburg,12,"AfD, B√úNDNIS 90/DIE GR√úNEN, CDU, DIE LINKE., SPD"
3,Karlsruhe,8,"AfD, B√úNDNIS 90/DIE GR√úNEN, CDU"
4,M√ºnster,8,"AfD, B√úNDNIS 90/DIE GR√úNEN, CDU, SPD"
5,Stuttgart,7,"B√úNDNIS 90/DIE GR√úNEN, CDU, DIE LINKE., SPD"
6,D√ºsseldorf,6,"AfD, B√úNDNIS 90/DIE GR√úNEN, CDU, DIE LINKE., SPD"
7,Hannover,6,"AfD, B√úNDNIS 90/DIE GR√úNEN, SPD"
8,Essen,6,"AfD, B√úNDNIS 90/DIE GR√úNEN, DIE LINKE., SPD"
9,Bielefeld,5,"AfD, B√úNDNIS 90/DIE GR√úNEN, CDU, DIE LINKE., SPD"


## 2. Geocode Cities

Get latitude/longitude coordinates for each city.

In [4]:
# Initialize geocoder

geolocator = Nominatim(user_agent="polimetrics_birthplace_map")

geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)



# Try to load cached geocoding results

CACHE_FILE = GRAPHICS_DIR / "geocode_cache.csv"



if CACHE_FILE.exists():

    print("Loading cached geocoding results...")

    df_geo_cache = pd.read_csv(CACHE_FILE)

    # Merge with birthplace data

    df_birthplaces = df_birthplaces.merge(

        df_geo_cache[['geburtsort', 'lat', 'lon']], 

        on='geburtsort', 

        how='left'

    )

    missing_count = df_birthplaces['lat'].isna().sum()

    print(f"Loaded {len(df_geo_cache)} cached locations, {missing_count} need geocoding")

else:

    print("No cache found, will geocode all locations")

    df_birthplaces['lat'] = None

    df_birthplaces['lon'] = None

Loading cached geocoding results...
Loaded 396 cached locations, 14 need geocoding


In [5]:
# Geocode missing locations

def geocode_city(city_name):

    """Geocode a German city."""

    try:

        # Try with ", Germany" suffix for better results

        location = geocode(f"{city_name}, Germany")

        if location:

            return location.latitude, location.longitude

        return None, None

    except Exception as e:

        print(f"Error geocoding {city_name}: {e}")

        return None, None



# Geocode missing cities

missing_mask = df_birthplaces['lat'].isna()

missing_cities = df_birthplaces[missing_mask]['geburtsort'].tolist()



if len(missing_cities) > 0:

    print(f"Geocoding {len(missing_cities)} cities...")

    for idx, city in enumerate(missing_cities):

        if idx % 10 == 0:

            print(f"  Progress: {idx}/{len(missing_cities)}")

        

        lat, lon = geocode_city(city)

        mask = df_birthplaces['geburtsort'] == city

        df_birthplaces.loc[mask, 'lat'] = lat

        df_birthplaces.loc[mask, 'lon'] = lon

    

    print("‚úÖ Geocoding complete")

    

    # Save cache

    df_birthplaces[['geburtsort', 'lat', 'lon']].to_csv(CACHE_FILE, index=False)

    print(f"üíæ Saved geocoding cache to {CACHE_FILE}")

else:

    print("‚úÖ All cities already geocoded")



# Check success rate

success_count = df_birthplaces['lat'].notna().sum()

total_count = len(df_birthplaces)

print(f"\nGeocoding success: {success_count}/{total_count} ({100*success_count/total_count:.1f}%)")



# Show failed geocodings

failed = df_birthplaces[df_birthplaces['lat'].isna()]

if len(failed) > 0:

    print(f"\n‚ö†Ô∏è  Failed to geocode {len(failed)} cities:")

    print(failed[['geburtsort', 'mp_count']].to_string(index=False))

Geocoding 14 cities...
  Progress: 0/14


RateLimiter caught an error, retrying (0/2 tries). Called with (*('Tetouan, Germany',), **{}).
Traceback (most recent call last):
  File "/Users/margespinderi/Documents/PoliMetrics/xminer/.venv/lib/python3.14/site-packages/urllib3/connectionpool.py", line 534, in _make_request
    response = conn.getresponse()
  File "/Users/margespinderi/Documents/PoliMetrics/xminer/.venv/lib/python3.14/site-packages/urllib3/connection.py", line 571, in getresponse
    httplib_response = super().getresponse()
  File "/Users/margespinderi/.local/share/uv/python/cpython-3.14.2-macos-aarch64-none/lib/python3.14/http/client.py", line 1450, in getresponse
    response.begin()
    ~~~~~~~~~~~~~~^^
  File "/Users/margespinderi/.local/share/uv/python/cpython-3.14.2-macos-aarch64-none/lib/python3.14/http/client.py", line 336, in begin
    version, status, reason = self._read_status()
                              ~~~~~~~~~~~~~~~~~^^
  File "/Users/margespinderi/.local/share/uv/python/cpython-3.14.2-macos-aarch

  Progress: 10/14


‚úÖ Geocoding complete
üíæ Saved geocoding cache to /Users/margespinderi/Documents/PoliMetrics/xminer/outputs/202512/graphics/birthplace_map/geocode_cache.csv

Geocoding success: 382/396 (96.5%)

‚ö†Ô∏è  Failed to geocode 14 cities:
             geburtsort  mp_count
              Karaganda         1
 Kemmern / Lkr. Bamberg         1
    Marburg an der Lahn         1
       Meran (S√ºdtirol)         1
                Niteroi         1
               Nyk√∂ping         1
              Pinarba≈üi         1
              Sosnowitz         1
                Tetouan         1
   Veer√üen jetzt Uelzen         1
Walsum (jetzt Duisburg)         1
                  Zakho         1
       Ahlen/Westfahlen         1
                Craiova         1


## 3. Create Interactive Map

In [6]:
# Filter out failed geocodings

df_map = df_birthplaces[df_birthplaces['lat'].notna()].copy()



print(f"Creating map with {len(df_map)} cities")

print(f"Total MPs on map: {df_map['mp_count'].sum()}")

Creating map with 382 cities
Total MPs on map: 614


In [7]:
# Filter to cities with 2+ MPs for better readability
df_filtered = df_map[df_map['mp_count'] >= 2].copy()
df_plot = df_filtered.sort_values('mp_count', ascending=False)

print(f"Showing {len(df_plot)} cities with 2+ MPs")
print(f"Covering {df_plot['mp_count'].sum()} MPs out of {df_map['mp_count'].sum()} total")

# Create German map
fig_map_de = go.Figure()

fig_map_de.add_trace(go.Scattermapbox(
    lat=df_plot['lat'],
    lon=df_plot['lon'],
    mode='markers',
    marker=go.scattermapbox.Marker(
        size=16,
        sizemode='diameter',
        color=df_plot['mp_count'],
        colorscale=[
            [0, '#0066CC'],
            [0.3, '#00AA00'],
            [0.6, '#FFAA00'],
            [0.85, '#FF4400'],
            [1, '#CC0000']
        ],
        showscale=True,
        opacity=0.85,
        colorbar=dict(
            title=dict(text="<b>Anzahl<br>MdBs</b>", font=dict(color='#000000', size=19)),
            tickfont=dict(color='#000000', size=16),
            bgcolor='rgba(255, 255, 255, 0.95)',
            thickness=26,
            len=0.68,
            x=0.98,
            bordercolor='#000000',
            borderwidth=2
        )
    ),
    customdata=[[city, count, parties] for city, count, parties in
                zip(df_plot['geburtsort'], df_plot['mp_count'], df_plot['parties'])],
    hovertemplate='<b style="font-size:16px">%{customdata[0]}</b><br><b style="font-size:15px">%{customdata[1]} MdBs</b><br><span style="font-size:13px">%{customdata[2]}</span><extra></extra>',
    name=''
))

fig_map_de.update_layout(
    title=dict(
        text="<b>Geburtsorte der Bundestagsabgeordneten</b><br><sub>St√§dte mit 2+ MdBs | Farbe zeigt Anzahl | Dez. 2025</sub>",
        x=0.5,
        xanchor='center',
        font=dict(size=26, color='#000000', family='Arial Black')
    ),
    mapbox=dict(
        style='open-street-map',
        center=dict(lat=51.2, lon=10.3),
        zoom=6.0
    ),
    plot_bgcolor='#FFFFFF',
    paper_bgcolor='#FFFFFF',
    font=dict(color='#000000', size=14, family='Arial'),
    height=1350,
    width=1080,
    margin=dict(b=15, t=135, l=10, r=110),
    showlegend=False
)

# Create English map
fig_map_en = go.Figure()

fig_map_en.add_trace(go.Scattermapbox(
    lat=df_plot['lat'],
    lon=df_plot['lon'],
    mode='markers',
    marker=go.scattermapbox.Marker(
        size=16,
        sizemode='diameter',
        color=df_plot['mp_count'],
        colorscale=[
            [0, '#0066CC'],
            [0.3, '#00AA00'],
            [0.6, '#FFAA00'],
            [0.85, '#FF4400'],
            [1, '#CC0000']
        ],
        showscale=True,
        opacity=0.85,
        colorbar=dict(
            title=dict(text="<b>Number<br>of MPs</b>", font=dict(color='#000000', size=19)),
            tickfont=dict(color='#000000', size=16),
            bgcolor='rgba(255, 255, 255, 0.95)',
            thickness=26,
            len=0.68,
            x=0.98,
            bordercolor='#000000',
            borderwidth=2
        )
    ),
    customdata=[[city, count, parties] for city, count, parties in
                zip(df_plot['geburtsort'], df_plot['mp_count'], df_plot['parties'])],
    hovertemplate='<b style="font-size:16px">%{customdata[0]}</b><br><b style="font-size:15px">%{customdata[1]} MPs</b><br><span style="font-size:13px">%{customdata[2]}</span><extra></extra>',
    name=''
))

fig_map_en.update_layout(
    title=dict(
        text="<b>Birthplaces of Members of Parliament</b><br><sub>Cities with 2+ MPs | Color shows count | Dec. 2025</sub>",
        x=0.5,
        xanchor='center',
        font=dict(size=26, color='#000000', family='Arial Black')
    ),
    mapbox=dict(
        style='open-street-map',
        center=dict(lat=51.2, lon=10.3),
        zoom=6.0
    ),
    plot_bgcolor='#FFFFFF',
    paper_bgcolor='#FFFFFF',
    font=dict(color='#000000', size=14, family='Arial'),
    height=1350,
    width=1080,
    margin=dict(b=15, t=135, l=10, r=110),
    showlegend=False
)

# Save maps
output_de = GRAPHICS_DIR / "birthplace_map_de.png"
output_en = GRAPHICS_DIR / "birthplace_map_en.png"

fig_map_de.write_image(output_de, width=1080, height=1350, scale=2)
fig_map_en.write_image(output_en, width=1080, height=1350, scale=2)

print(f"‚úÖ Saved: {output_de}")
print(f"‚úÖ Saved: {output_en}")

fig_map_de.show()


Showing 108 cities with 2+ MPs
Covering 340 MPs out of 614 total



*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



‚úÖ Saved: /Users/margespinderi/Documents/PoliMetrics/xminer/outputs/202512/graphics/birthplace_map/birthplace_map_de.png
‚úÖ Saved: /Users/margespinderi/Documents/PoliMetrics/xminer/outputs/202512/graphics/birthplace_map/birthplace_map_en.png


In [8]:
# Create Top 10 Bar Charts
top_10 = df_map.nlargest(10, 'mp_count').sort_values('mp_count', ascending=True)

# German bar chart
fig_bar_de = go.Figure()

fig_bar_de.add_trace(go.Bar(
    x=top_10['mp_count'],
    y=top_10['geburtsort'],
    orientation='h',
    marker=dict(
        color=top_10['mp_count'],
        colorscale='Hot',
        showscale=False,
        colorbar=dict(title="MdBs")
    ),
    text=top_10['mp_count'],
    textposition='outside',
    textfont=dict(size=22, color='white', family='Arial Black'),
    hovertemplate='<b>%{y}</b><br>%{x} MdBs<extra></extra>'
))

fig_bar_de.update_layout(
    title=dict(
        text="<b>Top 10 Geburtsorte der Bundestagsabgeordneten</b><br><sub>Anzahl MdBs pro Stadt (Dezember 2025)</sub>",
        x=0.5,
        xanchor='center',
        font=dict(size=26, color='white', family='Arial Black')
    ),
    xaxis=dict(
        title=dict(text="<b>Anzahl MdBs</b>", font=dict(size=22, color='white')),
        tickfont=dict(size=20, color='white'),
        gridcolor='#444444'
    ),
    yaxis=dict(
        tickfont=dict(size=20, color='white')
    ),
    plot_bgcolor='#000000',
    paper_bgcolor='#000000',
    height=1350,
    width=1080,
    margin=dict(l=150, r=120, t=140, b=100)
)

# English bar chart
fig_bar_en = go.Figure()

fig_bar_en.add_trace(go.Bar(
    x=top_10['mp_count'],
    y=top_10['geburtsort'],
    orientation='h',
    marker=dict(
        color=top_10['mp_count'],
        colorscale='Hot',
        showscale=False,
        colorbar=dict(title="MPs")
    ),
    text=top_10['mp_count'],
    textposition='outside',
    textfont=dict(size=22, color='white', family='Arial Black'),
    hovertemplate='<b>%{y}</b><br>%{x} MPs<extra></extra>'
))

fig_bar_en.update_layout(
    title=dict(
        text="<b>Top 10 MP Birthplaces</b><br><sub>Number of MPs per City (December 2025)</sub>",
        x=0.5,
        xanchor='center',
        font=dict(size=26, color='white', family='Arial Black')
    ),
    xaxis=dict(
        title=dict(text="<b>Number of MPs</b>", font=dict(size=22, color='white')),
        tickfont=dict(size=20, color='white'),
        gridcolor='#444444'
    ),
    yaxis=dict(
        tickfont=dict(size=20, color='white')
    ),
    plot_bgcolor='#000000',
    paper_bgcolor='#000000',
    height=1350,
    width=1080,
    margin=dict(l=150, r=120, t=140, b=100)
)

# Save bar charts
output_bar_de = GRAPHICS_DIR / "top_10_birthplaces_de.png"
output_bar_en = GRAPHICS_DIR / "top_10_birthplaces_en.png"

fig_bar_de.write_image(output_bar_de, width=1080, height=1350, scale=2)
fig_bar_en.write_image(output_bar_en, width=1080, height=1350, scale=2)

print(f"‚úÖ Saved bar chart: {output_bar_de}")
print(f"‚úÖ Saved bar chart: {output_bar_en}")

fig_bar_de.show()


‚úÖ Saved bar chart: /Users/margespinderi/Documents/PoliMetrics/xminer/outputs/202512/graphics/birthplace_map/top_10_birthplaces_de.png
‚úÖ Saved bar chart: /Users/margespinderi/Documents/PoliMetrics/xminer/outputs/202512/graphics/birthplace_map/top_10_birthplaces_en.png


In [9]:
print("=" * 80)

print("BIRTHPLACE MAP SUMMARY")

print("=" * 80)



print(f"\nTotal unique birthplaces: {len(df_birthplaces)}")

print(f"Successfully geocoded: {len(df_map)}")

print(f"Total MPs on map: {df_map['mp_count'].sum()}")



print("\nTop 10 birthplaces:")

print("-" * 80)

top_10 = df_map.nlargest(10, 'mp_count')[['geburtsort', 'mp_count', 'parties']]

for _, row in top_10.iterrows():

    print(f"{row['geburtsort']:20} {row['mp_count']:>3} MPs - {row['parties']}")



print("\n" + "=" * 80)

print(f"‚úÖ Visualization saved to: {GRAPHICS_DIR}")

print("=" * 80)

BIRTHPLACE MAP SUMMARY

Total unique birthplaces: 396
Successfully geocoded: 382
Total MPs on map: 614

Top 10 birthplaces:
--------------------------------------------------------------------------------
Berlin                21 MPs - AfD, B√úNDNIS 90/DIE GR√úNEN, CDU, DIE LINKE., SPD
M√ºnchen               18 MPs - AfD, B√úNDNIS 90/DIE GR√úNEN, CDU, CSU, DIE LINKE., SPD
Hamburg               12 MPs - AfD, B√úNDNIS 90/DIE GR√úNEN, CDU, DIE LINKE., SPD
Karlsruhe              8 MPs - AfD, B√úNDNIS 90/DIE GR√úNEN, CDU
M√ºnster                8 MPs - AfD, B√úNDNIS 90/DIE GR√úNEN, CDU, SPD
Stuttgart              7 MPs - B√úNDNIS 90/DIE GR√úNEN, CDU, DIE LINKE., SPD
D√ºsseldorf             6 MPs - AfD, B√úNDNIS 90/DIE GR√úNEN, CDU, DIE LINKE., SPD
Hannover               6 MPs - AfD, B√úNDNIS 90/DIE GR√úNEN, SPD
Essen                  6 MPs - AfD, B√úNDNIS 90/DIE GR√úNEN, DIE LINKE., SPD
Bielefeld              5 MPs - AfD, B√úNDNIS 90/DIE GR√úNEN, CDU, DIE LINKE., SPD

‚úÖ Visualization save