In [1]:
import requests


def get_download_id(trajectory_id):
    polygon = trajectories[trajectories['trajectoryid'] == trajectory_id].geometry.values[0]
    coords = [[x, y] for x, y in polygon.exterior.coords]
    url = "https://mein.wien.gv.at/geodownload-backend/app/register"
    payload = {
        "data": {
            "coords": coords,
            "dataset": "KAPPAZUNDER 2020",
            "option": 2
        }
    }
    response = requests.post(url, json=payload)
    response.raise_for_status()
    return response.json().get('items').get('confirmation')


def request_confirm_email(download_id):
    with requests.Session() as session:
        session.get(f"https://mein.wien.gv.at/geodownload-ui/confirm/{download_id}", headers={
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        })
        response = session.patch(f"https://mein.wien.gv.at/geodownload-backend/app/confirm/{download_id}", json={"mail": EMAIL_ADDRESS}, headers={
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36",
            "Accept": "application/json, text/plain, */*",
            "Origin": "https://mein.wien.gv.at",
            "Referer": "https://mein.wien.gv.at/",
        })
        response.raise_for_status()


def confirm_email():
    url = "https://mein.wien.gv.at/geodownload-backend/app/mail/2d4bf8b8-88cb-4c9c-b29b-bf2f5ef50c8f"
    response = requests.patch(url)
    response.raise_for_status()


def download(download_id):
    url = f"https://www.wien.gv.at/ogdgeodata/download/{download_id}.tar"
    headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)" }
    with requests.get(url, headers=headers, stream=True) as r:
        r.raise_for_status()
        total_size_in_bytes = int(r.headers.get('content-length', 0))
        block_size = 8*1024
        progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True, desc=f"Downloading {download_id}.tar")
        with open(download_id + ".tar", "wb") as f:
            for chunk in r.iter_content(chunk_size=block_size):
                progress_bar.update(len(chunk))
                f.write(chunk)
        progress_bar.close()


def extract_and_remove_tar(download_id):
    tar_path = download_id + ".tar"
    with tarfile.open(tar_path, "r") as tar:
        tar.extractall(path=download_id)
    os.remove(tar_path)


def remove_irrelevant_trajectories(download_id, trajectory_id):
    entries = [f for f in os.listdir(download_id) if os.path.isdir(os.path.join(download_id, f))]
    if len(entries) != 1:
        raise RuntimeError(f"Expected exactly one subfolder in {download_id}, found: {entries}")
    inner_folder = os.path.join(download_id, entries[0])

    bild_rohdaten_path = os.path.join(inner_folder, "Bild-Rohdaten")
    if not os.path.isdir(bild_rohdaten_path):
        raise FileNotFoundError(f"No 'Bild-Rohdaten' folder found in {inner_folder}")

    for name in os.listdir(bild_rohdaten_path):
        path = os.path.join(bild_rohdaten_path, name)
        if os.path.isdir(path) and name != f"Trajektorie_{trajectory_id}":
            shutil.rmtree(path)


def remove_top_and_bottom_facing_images(download_id):
    bild_rohdaten_path = os.path.join(download_id, os.listdir(download_id)[0], "Bild-Rohdaten")
    trajectory_folder = os.path.join(bild_rohdaten_path, os.listdir(bild_rohdaten_path)[0])
    for name in os.listdir(trajectory_folder):
        if name.endswith(("0", "5")):
            shutil.rmtree(os.path.join(trajectory_folder, name))


def set_exif_tags(download_id):
    def deg_to_dms_rational(deg_float):
        """
        Convert decimal degrees to EXIF DMS rational format.
        """
        deg_abs = abs(deg_float)
        deg = int(deg_abs)
        min_float = (deg_abs - deg) * 60
        min_ = int(min_float)
        sec = round((min_float - min_) * 60 * 10000)
        return ((deg, 1), (min_, 1), (sec, 10000))

    bild_rohdaten_path = os.path.join(download_id, os.listdir(download_id)[0], "Bild-Rohdaten")
    trajectory_folder = os.path.join(bild_rohdaten_path, os.listdir(bild_rohdaten_path)[0])

    jpg_paths = []
    for root, _, files in os.walk(trajectory_folder):
        for file in files:
            if file.lower().endswith(".jpg"):
                jpg_paths.append(os.path.join(root, file))

    for img_path in tqdm(jpg_paths, desc="Tagging images"):
        file = os.path.basename(img_path)
        row = points.loc[points["image_name"] == file]
        if row.empty:
            print(f"⚠️ No metadata found for {file}, skipping.")
            continue

        lat = float(row["lat"].iloc[0])
        lon = float(row["lon"].iloc[0])
        timestamp = row["epoch"].iloc[0].strftime("%Y:%m:%d %H:%M:%S")

        img_path = os.path.join(root, file)
        img = Image.open(img_path)

        # Load existing EXIF or create new
        exif_dict = piexif.load(img.info.get("exif", b""))

        # GPS
        exif_dict["GPS"] = {
            piexif.GPSIFD.GPSLatitudeRef: b"N",
            piexif.GPSIFD.GPSLatitude: deg_to_dms_rational(lat),
            piexif.GPSIFD.GPSLongitudeRef: b"E",
            piexif.GPSIFD.GPSLongitude: deg_to_dms_rational(lon),
        }

        # DateTimeOriginal
        exif_dict["Exif"][piexif.ExifIFD.DateTimeOriginal] = timestamp.encode("utf-8")

        # Insert EXIF back into image
        exif_bytes = piexif.dump(exif_dict)
        img.save(img_path, "jpeg", exif=exif_bytes)

In [2]:
import imaplib
import email
from email.header import decode_header
import re

class ReadyToDownloadChecker:
    # Exact URL pattern with GUID
    GUID_REGEX = re.compile(
        r'https://www\.wien\.gv\.at/ogdgeodata/download/'
        r'([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.tar'
    )

    def __init__(self, email_address, app_password, email_subject_filter):
        self.email_address = email_address
        self.app_password = app_password
        self.subject_filter = email_subject_filter
        self.imap = None
        self.emails = []  # List of (subject, body)
        self._connect()
        self._fetch_emails_by_subject()

    def _connect(self):
        if self.imap:
            try:
                self.imap.logout()
            except:
                pass
        self.imap = imaplib.IMAP4_SSL("imap.gmail.com")
        self.imap.login(self.email_address, self.app_password)
        self.imap.select("inbox")

    def _fetch_emails_by_subject(self):
        search_criterion = f'(SUBJECT "{self.subject_filter}")'
        status, messages = self.imap.search(None, search_criterion)
        if status != "OK":
            return
        for num in messages[0].split():
            self._fetch_email(num)

    def _fetch_email(self, num):
        status, msg_data = self.imap.fetch(num, "(RFC822)")
        if status != "OK":
            return
        msg = email.message_from_bytes(msg_data[0][1])
        subject = decode_header(msg.get("Subject") or "")[0][0]
        if isinstance(subject, bytes):
            subject = subject.decode(errors="ignore")
        body = ""
        if msg.is_multipart():
            for part in msg.walk():
                if part.get_content_type() == "text/plain":
                    try:
                        body += part.get_payload(decode=True).decode(errors="ignore")
                    except:
                        pass
        else:
            try:
                body = msg.get_payload(decode=True).decode(errors="ignore")
            except:
                pass
        self.emails.append((subject, body))

    def refresh(self):
        """Fetch only unseen emails with matching subject"""
        try:
            self._connect()
            search_criterion = f'(UNSEEN SUBJECT "{self.subject_filter}")'
            status, messages = self.imap.search(None, search_criterion)
            if status == "OK":
                for num in messages[0].split():
                    self._fetch_email(num)
        except Exception as e:
            print("Error refreshing mail:", e)

    def get_ids(self):
        """Return a deduplicated list of GUIDs from cached emails"""
        guids = set()
        for _, body in self.emails:
            matches = self.GUID_REGEX.findall(body)
            guids.update(matches)
        return list(guids)


In [3]:
import tarfile
import os
import shutil
import piexif
from PIL import Image
from tqdm import tqdm
from datetime import timedelta
from time import sleep
import geopandas as gpd
import pandas as pd
from urllib.error import HTTPError


MAPILLARY_USER = "eliasgander"
MAPILLARY_EMAIL = "daring.64tum@icloud.com"
MAPILLARY_PASSWORD = "cesjuD-6tyjmo-maqgif"
EMAIL_ADDRESS = "tomturbo657@gmail.com"
GMAIL_APP_PASSWORD = "kruf aahg aiuc iwtr"
DOWNLOAD_READY_SUBJECT = "Download-Link zu Ihren Geodaten"
IS_DEBUG = True

points = gpd.read_file("points.gpkg")
trajectories_path = "trajectories.gpkg"
trajectories = gpd.read_file(trajectories_path)
points = points.set_index("image_name")
if IS_DEBUG:
  trajectories = trajectories[trajectories['trajectoryid'].isin(['17720', '16101', '16471'])]
#   trajectories.loc[trajectories['trajectoryid'] == '17720', ['download_id', 'download_expires_at']] = ['', pd.Timestamp.now() + timedelta(days=7)]
#   trajectories.loc[trajectories['trajectoryid'] == '16101', ['download_id', 'download_expires_at']] = ['', pd.Timestamp.now() + timedelta(days=7)]
#   trajectories.loc[trajectories['trajectoryid'] == '16471', ['download_id', 'download_expires_at']] = ['', pd.Timestamp.now() + timedelta(days=7)]

!mapillary_tools authenticate --user_name {MAPILLARY_USER} --user_email {MAPILLARY_EMAIL} --user_password {MAPILLARY_PASSWORD}

ready_to_download = ReadyToDownloadChecker(EMAIL_ADDRESS, GMAIL_APP_PASSWORD, DOWNLOAD_READY_SUBJECT)

def save_trajectories():
  if not IS_DEBUG:
    trajectories.to_file(trajectories_path, driver="GPKG")

sensors_completed_column_names = [f"is_sensor{i}_completed" for i in range(1, 5)]

while not trajectories[sensors_completed_column_names].to_numpy().all():
    print(f"Number of uncompleted trajectories: {trajectories[~trajectories[sensors_completed_column_names].to_numpy().all(axis=1)].shape[0]}")

    expiring_trajectories = trajectories[trajectories['download_expires_at'].notna() & (trajectories['download_expires_at'] < pd.Timestamp.now() + timedelta(hours=5))]
    if not expiring_trajectories.empty:
        print(f"Resetting expiring trajectories with IDs: {expiring_trajectories['trajectoryid'].tolist()}")
        trajectories.loc[expiring_trajectories.index, ['download_id', 'download_expires_at']] = [None, None]

    ready_to_download.refresh()
    trajectories_to_download = trajectories[trajectories['download_id'].isin(ready_to_download.get_ids())]
    if len(trajectories_to_download) <= 5:
        print(f"Only {len(trajectories_to_download)} downloadable trajectories left")
        trajectories_to_prepare = trajectories[trajectories['download_id'].isna()]
        if trajectories_to_prepare.empty:
            print("No trajectories left to prepare")
        else:
            trajectories_to_prepare = trajectories_to_prepare.sample(min(10, len(trajectories)))
            print(f"Preparing {len(trajectories_to_prepare)} more trajectories with ids: {trajectories_to_prepare['trajectoryid'].tolist()}")
            for index, trajectory in trajectories_to_prepare.iterrows():
                trajectory_id = trajectory['trajectoryid']
                try:
                    download_id = get_download_id(trajectory_id)
                    request_confirm_email(download_id)
                    sleep(60)
                    confirm_email()
                    trajectories.loc[trajectories['trajectoryid'] == trajectory_id, ['download_id', 'download_expires_at']] = [download_id, pd.Timestamp.now() + timedelta(days=7)]
                    save_trajectories()
                    print(f"Successfully prepared trajectoryid {trajectory_id} with downloadid {download_id}")
                except Exception as e:
                    print(f"Error preparing trajectoryid {trajectory_id} with downloadid {download_id}: {e}")

    if trajectories_to_download.empty:
        print("No trajectories ready for download. Sleeping five minutes.")
        sleep(300)
        continue

    trajectory_to_download = trajectories_to_download.sort_values('download_expires_at').iloc[0]
    trajectory_id = trajectory_to_download['trajectoryid']
    download_id = trajectory_to_download['download_id']
    try:
        download(download_id)
    except HTTPError as e:
        if e.code == 404:
          print(f"Resetting downloadid {download_id} of trajectory {trajectory_id} because 404 error: {e}")
          trajectories.loc[trajectories['trajectoryid'] == trajectory_id, ['download_id', 'download_expires_at']] = [None, None]
          save_trajectories()
        else:
          print(f"Sleeping for five minutes because download of trajectory {trajectory_id} with downloadid {download_id} failed with message: {e}")
          sleep(300)
        continue

    extract_and_remove_tar(download_id)
    remove_irrelevant_trajectories(download_id, trajectory_to_download['original_trajectoryid'])
    remove_top_and_bottom_facing_images(download_id)
    set_exif_tags(download_id)
    for i in range(1, 5):
        if trajectories[trajectories['trajectoryid'] == trajectory_id][f'is_sensor{i}_completed'].iloc[0]:
          continue
        !mapillary_tools process_and_upload \
              --overwrite_all_EXIF_tags \
                save_trajectories Teledyne \
              --device_model Ladybug6 \
              --offset_angle {(i - 1) * 90} \
              --interpolate_directions \
              --user_name {MAPILLARY_USER} \
              --noresume \
              {download_id}/*/Bild-Rohdaten/Sensor_*{i}
        trajectories.loc[trajectories['trajectoryid'] == trajectory_id, f'is_sensor{i}_completed'] = True
        commit_trajectories()
    shutil.rmtree(download_id)


print("All trajectories completed.")

Existing Mapillary profiles:
       Profile name                              User ID                         Username
    1. eliasgander                       855778393453994                              N/A
11:23:39.607 - INFO    - Profile "eliasgander" updated: {'user_upload_token': '[REDACTED]', 'MAPSettingsUserKey': '855778393453994'}


TimeoutError: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond