# Peaks

In [11]:
import requests, re, json, urllib.parse
from bs4 import BeautifulSoup
import pandas as pd

headers = {"User-Agent": "Mozilla/5.0"}
base_list = "https://www.peakbagger.com/list.aspx?cid=3482&lid=1100"

def real_photo(soup):
    for img in soup.find_all("img"):
        src = img.get("src", "")
        if "pbphoto" in src:
            return urllib.parse.urljoin(base_list, src)
    return ""

html = requests.get(base_list, headers=headers, timeout=30).text
table = BeautifulSoup(html, "html.parser").find("table", class_="gray")

rows = []
for tr in table.select("tr"):
    tds = tr.find_all("td")
    if len(tds) < 4:
        continue
    rows.append({
        "rank": int(tds[0].text.strip(" .")),
        "country": tds[1].text.strip(),
        "peak": tds[2].text.strip(),
        "elev_m": int(tds[3].text),
        "peak_url": urllib.parse.urljoin(base_list, tds[2].a["href"])
    })

for row in rows:
    soup = BeautifulSoup(requests.get(row["peak_url"], headers=headers, timeout=30).text, "html.parser")
    info = soup.find("table", class_="gray")
    for tr in info.find_all("tr"):
        cells = tr.find_all("td")
        if len(cells) < 2:
            continue
        label = cells[0].get_text(strip=True)
        value = cells[1].get_text(" ", strip=True)
        if label.startswith("Latitude/Longitude"):
            m = re.match(r"([-0-9.]+)\s*,\s*([-0-9.]+)", value)
            if m:
                row["lat_dd"], row["lon_dd"] = map(float, m.groups())
        elif label.startswith("Prominence"):
            p = re.search(r"(\d+)\s*m", value)
            if p:
                row["prominence_m"] = int(p.group(1))
        elif label.startswith("Isolation"):
            i = re.search(r"([\d.]+)\s*km", value)
            row["isolation_km"] = float(i.group(1)) if i else value.split()[0]
    desc_td = soup.find("td", style=re.compile("padding:10px"))
    if desc_td:
        paras = desc_td.find_all("p")
        row["description"] = " ".join(p.get_text(" ", strip=True) for p in paras)
    else:
        row["description"] = ""
    row["photo_url"] = real_photo(soup)

pd.DataFrame(rows).to_json("peaks.json", orient="records", indent=2, force_ascii=False)


In [14]:
import os
import json
import requests
import pandas as pd
from urllib.parse import urlparse

df = pd.read_json("peaks.json")

os.makedirs("images", exist_ok=True)

paths = []
for _, row in df.iterrows():
    url = row.get("photo_url", "")
    if url:
        parsed = urlparse(url)
        name = os.path.basename(parsed.path)
        slug = row["peak"].lower().replace(" ", "_").replace("/", "_")
        filename = f"{slug}_{name}"
        local_path = os.path.join("images", filename)
        try:
            r = requests.get(url, timeout=10)
            r.raise_for_status()
            with open(local_path, "wb") as f:
                f.write(r.content)
            paths.append(local_path)
        except:
            paths.append("")
    else:
        paths.append("")

df["image_path"] = paths

df.to_json("peaks_with_images.json", orient="records", indent=2, force_ascii=False)


# Capitals

In [17]:
import requests
import pandas as pd
import json

q = """
SELECT ?countryLabel ?capitalLabel ?coord ?desc ?image WHERE {
  ?country wdt:P31 wd:Q3624078; wdt:P36 ?capital.
  ?capital wdt:P625 ?coord.
  OPTIONAL { ?capital schema:description ?desc FILTER(LANG(?desc)="en") }
  OPTIONAL { ?capital wdt:P18 ?image         }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
}
"""
r = requests.get("https://query.wikidata.org/sparql", params={"format":"json","query":q})
rows = []
for b in r.json()["results"]["bindings"]:
    lon, lat = b["coord"]["value"].removeprefix("Point(").removesuffix(")").split(" ")
    rows.append({
        "country":      b["countryLabel"]["value"],
        "capital":      b["capitalLabel"]["value"],
        "description":  b.get("desc",{}).get("value",""),
        "lat_dd":       float(lat),
        "lon_dd":       float(lon),
        "image_url":    b.get("image",{}).get("value","")
    })

df = pd.DataFrame(rows)
df.to_json("capitals.json", orient="records", indent=2, force_ascii=False)
df.head()


Unnamed: 0,country,capital,description,lat_dd,lon_dd,image_url
0,Uzbekistan,Tashkent,capital of Uzbekistan,41.311111,69.279722,http://commons.wikimedia.org/wiki/Special:File...
1,Switzerland,Bern,"city in Switzerland, capital of the canton of ...",46.94798,7.44743,http://commons.wikimedia.org/wiki/Special:File...
2,Singapore,Singapore,sovereign island country and city-state in mar...,1.3,103.8,http://commons.wikimedia.org/wiki/Special:File...
3,North Macedonia,Skopje,capital city of North Macedonia,41.996111,21.431667,http://commons.wikimedia.org/wiki/Special:File...
4,Germany,Berlin,"federated state, capital and largest city of G...",52.516667,13.383333,http://commons.wikimedia.org/wiki/Special:File...


In [18]:
import os
import requests
import pandas as pd
from urllib.parse import urlparse

df = pd.read_json("capitals.json")

os.makedirs("capital_images", exist_ok=True)

paths = []
for _, row in df.iterrows():
    url = row.get("image_url","")
    if url:
        p = urlparse(url)
        ext = os.path.splitext(p.path)[1] or ".jpg"
        slug = row["capital"].lower().replace(" ","_").replace("/","_")
        fname = f"{slug}{ext}"
        local = os.path.join("capital_images", fname)
        try:
            r = requests.get(url, timeout=10)
            r.raise_for_status()
            with open(local, "wb") as f:
                f.write(r.content)
            paths.append(local)
        except:
            paths.append("")
    else:
        paths.append("")

df["image_path"] = paths

df.to_json("capitals_with_images.json", orient="records", indent=2, force_ascii=False)


# Mapping

## Polygons

In [35]:
import pandas as pd
import geopandas as gpd
import requests, zipfile, io
from math import radians, sin, cos, atan2

peaks = pd.read_json("peaks_with_images.json")
caps  = pd.read_json("capitals_with_images.json")

def hav(lat1, lon1, lat2, lon2):
    R=6371
    dlat=radians(lat2-lat1)
    dlon=radians(lon2-lon1)
    a=sin(dlat/2)**2 + cos(radians(lat1))*cos(radians(lat2))*sin(dlon/2)**2
    return R*2*atan2(a**0.5,(1-a)**0.5)

df = peaks.merge(caps, on="country", suffixes=("_pk","_cp"))
df["dist_km"] = df.apply(lambda r: hav(r.lat_dd_pk, r.lon_dd_pk, r.lat_dd_cp, r.lon_dd_cp), axis=1)

r = requests.get("https://naturalearth.s3.amazonaws.com/10m_cultural/ne_10m_admin_0_map_subunits.zip")
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall("ne_subunits")
sub = gpd.read_file("ne_subunits/ne_10m_admin_0_map_subunits.shp")
cn = sub.dissolve(by="ADMIN").reset_index()[["ADMIN","geometry"]].rename(columns={"ADMIN":"country"})
gdf = cn.merge(df, on="country").to_crs(epsg=3857)

def get_coords(p):
    if p.geom_type=="Polygon":
        x,y = p.exterior.coords.xy
        return list(x), list(y)
    xs, ys = [], []
    for part in p.geoms:
        x,y = part.exterior.coords.xy
        xs += list(x)+[None]
        ys += list(y)+[None]
    return xs, ys

gdf["xs"], gdf["ys"] = zip(*gdf.geometry.apply(get_coords))


## Match data

In [109]:
import json

country_map = {
  "Falkland Islands": "United Kingdom",
  "South Georgia and the Islands": "United Kingdom",
  "South Georgia": "United Kingdom",
  "Anguilla": "United Kingdom",
  "Bermuda": "United Kingdom",
  "British Indian Ocean Territory": "United Kingdom",
  "British Virgin Islands": "United Kingdom",
  "Gibraltar": "United Kingdom",
  "Guernsey": "United Kingdom",
  "Isle of Man": "United Kingdom",
  "Jersey": "United Kingdom",
  "Montserrat": "United Kingdom",
  "Pitcairn Islands": "United Kingdom",
  "Pitcairn Island": "United Kingdom",
  "Saint Helena": "United Kingdom",
  "Ascension Island": "United Kingdom",
  "Turks and Caicos Islands": "United Kingdom",
  "United States": "United States of America",
  "United States Virgin Islands": "United States of America",
  "United States Minor Outlying Islands": "United States of America",
  "U.S. Minor Pacific Islands": "United States of America",
  "Navassa": "United States of America",
  "Northern Mariana Islands": "United States of America",
  "Puerto Rico": "United States of America",
  "American Samoa": "United States of America",
  "French Polynesia": "France",
  "French Southern and Antarctic Lands": "France",
  "French Southern Territories": "France",
  "Reunion": "France",
  "Guadeloupe": "France",
  "Martinique": "France",
  "Mayotte": "France",
  "New Caledonia": "France",
  "French Guiana": "France",
  "Wallis and Futuna": "France",
  "Saint Pierre and Miquelon": "France",
  "Curaçao": "Netherlands",
  "Aruba": "Netherlands",
  "Sint Maarten": "Netherlands",
  "Bonaire": "Netherlands",
  "Saba": "Netherlands",
  "Sint Eustatius": "Netherlands",
  "Kingdom of the Netherlands": "Netherlands",
  "Greenland": "Denmark",
  "Faroe Islands": "Denmark",
  "Kingdom of Denmark": "Denmark",
  "Christmas Island": "Australia",
  "Cocos Islands": "Australia",
  "Indian Ocean Territories": "Australia",
  "Ashmore and Cartier Islands": "Australia",
  "Heard and McDonald Islands": "Australia",
  "Coral Sea Islands": "Australia",
  "Coral Sea Islands Territory": "Australia",
  "People's Republic of China": "China",
  "PRC": "China",
  "Hong Kong S.A.R.": "China",
  "Hong Kong": "China",
  "Macao S.A.R": "China",
  "Macau": "China",
  "Czech Republic": "Czechia",
  "Cape Verde": "Cabo Verde",
  "Congo DRC": "Democratic Republic of the Congo",
  "Democratic Republic of the Congo": "Democratic Republic of the Congo",
  "Congo Republic": "Republic of the Congo",
  "Eswatini": "eSwatini",
  "Bahamas": "Bahamas",
  "The Bahamas": "Bahamas",
  "Gambia": "Gambia",
  "The Gambia": "Gambia",
  "Palestinian Authority": "Palestine",
  "State of Palestine": "Palestine",
  "Sixth Republic of South Korea": "South Korea",
  "Republic of Serbia": "Serbia",
  "Sao Tome and Principe": "São Tomé and Príncipe",
  "Ivory Coast": "Ivory Coast",
  "Côte d'Ivoire": "Ivory Coast",
  "Fiji Islands": "Fiji",
  "Portuguese Republic": "Portugal",
  "Republic of Portugal": "Portugal",
  "Republic of Poland": "Poland",
  "Polish Republic": "Poland",
  "United Republic of Tanzania": "Tanzania",
  "Somaliland": "Somalia",
  "East Timor": "Timor Leste",
  "Timor-Leste": "Timor Leste",
  "Jan Mayen": "Norway",
  "Bouvet Island": "Norway",
  "Svalbard": "Norway",
  "Tristan da Cunha": "United Kingdom",
  "Tokelau": "New Zealand",
  "Cook Islands": "Cook Islands",
  "American Samoa": "United States of America",
  "Bouvet Island": "Norway",
  "Cayman Islands": "United Kingdom",
  "Cook Islands": "New Zealand",
  "East Timor": "Timor Leste",
  "French Southern Lands": "France",
  "Guam": "United States of America",
  "Jan Mayen": "Norway",
  "Micronesia": "Federated States of Micronesia",
  "Niue": "New Zealand",
  "Norfolk Island": "Australia",
  "Northern Marianas": "United States of America",
  "Pitcairn Island": "United Kingdom",
  "Puerto Rico": "United States of America",
  "Saint Barthelemy": "France",
  "Saint Helena, Ascension, Tristan da Cunha": "United Kingdom",
  "Saint Martin": "France",
  "Svalbard": "Norway",
  "Tokelau": "New Zealand",
  "U.S. Virgin Islands": "United States of America",
  "Vatican City": "Vatican",
  "Kingdom of Lesotho": "Lesotho",
  "Heard Island and McDonald Islands": "Australia"
}

with open("country_map.json", "w", encoding="utf-8") as f:
    json.dump(country_map, f, ensure_ascii=False, indent=2)


In [86]:
import pandas as pd, json

caps = pd.read_json("caps_clean.json")

with open("country_map.json") as f:
    cmap = json.load(f)
cmap.setdefault("Kosovo", "Kosovo")
with open("country_map.json","w") as f:
    json.dump(cmap,f,indent=2)

if "Kosovo" not in caps.country.values:
    caps.loc[len(caps)] = {
        "country":    "Kosovo",
        "capital":    "Pristina",
        "lat_dd":     42.6629,
        "lon_dd":     21.1655,
        "description":"Capital of Kosovo",
        "photo_url":  None
    }
    caps.to_json("caps_clean.json", orient="records", indent=2)


## Text

In [57]:
top5 = (
    dist
    .nlargest(5, 'dist_km')
    [['country', 'capital', 'peak', 'dist_km']]
    .reset_index(drop=True)
)
min1 = (
    dist
    .nsmallest(1, 'dist_km')
    [['country', 'capital', 'peak', 'dist_km']]
    .reset_index(drop=True)
)


lines_top = [
    f"{row.country}: {row.dist_km:.0f} km ({row.peak}→{row.capital})"
    for row in top5.itertuples()
]
lines_min = [
    f"{row.country}: {row.dist_km:.0f} km ({row.peak}→{row.capital})"
    for row in min1.itertuples()
]

annotation = (
    "Top 5 countries by distance:\n" +
    "\n".join(lines_top) +
    "\n\nShortest distance:\n" +
    "\n".join(lines_min)
)

print(annotation)




Top 5 countries by distance:
United Kingdom: 12255 km (Mount Paget→London)
Netherlands: 6985 km (Mount Scenery→Amsterdam)
Australia: 6036 km (Big Ben→Canberra)
United States of America: 5439 km (Denali→Washington, D.C.)
Canada: 4431 km (Mount Logan→Ottawa)

Shortest distance:
San Marino: 0 km (Monte Titano→San Marino)


## Plot

In [63]:
import json
import pandas as pd
import geopandas as gpd
import numpy as np
from bokeh.io import output_file, save
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool, LinearColorMapper, Div
from bokeh.layouts import column
from bokeh.palettes import Blues256

palette = (Blues256[::-1][15:], "white", "#005285", "#7dbaff", "gray")
show_peaks, show_caps = True, True
map_palette, back_color, peak_color, cap_color, text_color = palette
K, MID = 0.0008, 1500

with open("country_map.json") as f:
    cmap = json.load(f)
unify = lambda n: cmap.get(n, n)

peaks = pd.read_json("peaks_with_images.json")
caps  = pd.read_json("caps_clean.json")
sub   = gpd.read_file("ne_subunits/ne_10m_admin_0_map_subunits.shp")

peaks["country"] = peaks["country"].apply(unify)
caps["country"]  = caps["country"].apply(unify)
sub = sub[sub.ADMIN != "Antarctica"].assign(country=sub.ADMIN.apply(unify))

peaks = peaks[peaks.lat_dd > -60]
peaks = peaks.assign(country=peaks.country.str.split("/")).explode("country")
peaks = peaks.loc[peaks.groupby("country")["elev_m"].idxmax()].reset_index(drop=True)

def hav(a, b, c, d):
    R = 6371
    lat1, lon1, lat2, lon2 = np.radians([a, b, c, d])
    dlat, dlon = lat2 - lat1, lon2 - lon1
    h = np.sin(dlat / 2) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2) ** 2
    return R * 2 * np.arctan2(np.sqrt(h), np.sqrt(1 - h))

dist = peaks.merge(caps, on="country", suffixes=("_pk", "_cp"))
dist["dist_km"] = hav(dist.lat_dd_pk, dist.lon_dd_pk, dist.lat_dd_cp, dist.lon_dd_cp)

cn = sub.dissolve(by="country").reset_index()
cn = cn.merge(dist[["country", "dist_km", "peak", "capital", "elev_m"]], on="country", how="left").explode(ignore_index=True)

crs_proj = "+proj=robin +lon_0=0 +datum=WGS84 +units=m +no_defs"
cn = cn.to_crs(crs_proj)

def get_coords(poly):
    x, y = poly.exterior.coords.xy
    return list(x), list(y)

cn["xs"], cn["ys"] = zip(*cn.geometry.apply(get_coords))
cn = cn.drop(columns="geometry")

peaks = gpd.GeoDataFrame(peaks, geometry=gpd.points_from_xy(peaks.lon_dd, peaks.lat_dd), crs="EPSG:4326").to_crs(crs_proj)
peaks["x"], peaks["y"] = peaks.geometry.x, peaks.geometry.y
peaks = peaks.drop(columns="geometry")

caps = gpd.GeoDataFrame(caps, geometry=gpd.points_from_xy(caps.lon_dd, caps.lat_dd), crs="EPSG:4326").to_crs(crs_proj)
caps["x"], caps["y"] = caps.geometry.x, caps.geometry.y
caps = caps.drop(columns="geometry")

cn["dist_color"] = 1 / (1 + np.exp(-K * ((cn["dist_km"] + 100) - MID)))

gsrc   = ColumnDataSource(cn)
mapper = LinearColorMapper(palette=map_palette, low=cn.dist_color.min(), high=cn.dist_color.max())

xs_all = np.hstack(cn["xs"].to_list())
ys_all = np.hstack(cn["ys"].to_list())
pad_x  = 0.001 * (xs_all.max() - xs_all.min())
pad_y  = 0.001 * (ys_all.max() - ys_all.min())
x_rng  = (xs_all.min() + pad_x, xs_all.max() - pad_x)
y_rng  = (ys_all.min() + pad_y, ys_all.max() - pad_y)

p = figure(
    x_range=x_rng,
    y_range=y_rng,
    match_aspect=True,
    sizing_mode="stretch_both",
    tools="pan,wheel_zoom,reset",
    active_scroll="wheel_zoom",
    toolbar_location=None,
    background_fill_color=back_color,
    border_fill_color=back_color,
    min_border=0,
)

p.outline_line_color = None
p.axis.visible       = False
p.grid.grid_line_color = None

p.title.text            = "Distance from Highest Peak to Capital"
p.title.align           = "center"
p.title.text_font_size  = "30pt"
p.title.text_font_style = "normal"
p.title.text_color      = "gray"
p.min_border_top        = 50

p.patches(
    "xs", "ys", source=gsrc,
    fill_color={"field": "dist_color", "transform": mapper},
    line_color=None, line_width=0,
)

if show_peaks:
    p.scatter("x", "y", source=ColumnDataSource(peaks),
              marker="triangle", size=6,
              fill_color=peak_color, line_color=peak_color,
              legend_label="Highest peak")

if show_caps:
    p.scatter("x", "y", source=ColumnDataSource(caps),
              marker="star", size=6,
              fill_color=cap_color, line_color=cap_color,
              legend_label="Capital city")

tooltip = """
<div>
  <style>
    :host{background:transparent!important;border:1px solid #fff!important;border-radius:8px!important;box-shadow:none!important;padding:0!important;}
    .bk-tooltip-arrow{display:none!important;}
    .inner-box{background-color:#222;color:#eee;padding:6px;border-radius:8px;display:flex;flex-direction:column;gap:4px;}
  </style>
  <div class="inner-box">
    <div style="display:flex;gap:10px;"><span style="font-weight:bold;">@country</span><span>@capital</span><span>@peak</span></div>
    <div style="display:flex;gap:10px;"><span>Elevation&nbsp;@elev_m&nbsp;m</span><span>Distance&nbsp;@dist_km{0.0}&nbsp;km</span></div>
  </div>
</div>
"""
p.add_tools(HoverTool(renderers=[p.renderers[0]], tooltips=tooltip,
                      point_policy="follow_mouse", show_arrow=False))

p.legend.location              = "bottom_left"
p.legend.background_fill_alpha = 0.0
p.legend.label_text_color      = text_color
p.legend.label_text_font_size  = "10pt"
p.legend.border_line_color     = None

credit_div = Div(
    text="""
    <div style='text-align:right; font-size:10pt; color:#555; width:100%;'>
      Data: <a href='https://www.peakbagger.com' target='_blank'>Peakbagger</a> |
      <a href='https://www.wikipedia.org' target='_blank'>Wikipedia</a> |
      <a href='https://www.naturalearthdata.com' target='_blank'>Natural&nbsp;Earth</a>
      &nbsp;&bull;&nbsp; Map framework: <a href='https://bokeh.org' target='_blank'>Bokeh</a> |
      Source code: <a href='https://github.com/jan-mate/peak_capital_distance' target='_blank'>GitHub</a>
    </div>
    """,
    sizing_mode="stretch_width",
    margin=(0, 10, 10, 0)  # top, right, bottom, left
)

layout = column(p, credit_div, sizing_mode="stretch_both")

output_file("peak_capital_distance.html",
            title="Peak–Capital Distance Map")

save(layout)


'/home/mat/Documents/peak_capital_map/peak_capital_distance.html'