In [1]:
from pathlib import Path
import re
import yaml
from urllib.parse import urlparse, parse_qs, urlencode

import requests

In [74]:
index_path = "../modis/indexdb/README.md"
with open(index_path, "r") as fp:
    indices = fp.read()

In [75]:
example_links = re.findall("(?<=\[Playground\]\()(.*?)(?=\))", indices)

In [76]:
def to_sh(url):
    parts = urlparse(url)
    old_query = parse_qs(parts.query)
    query = {'datasetId': ['AWS_LOTL2'],
    'lat': ['42.86691'],
    'lng': ['10.78033'],
    'zoom': ['11'],
    'fromTime': ['2020-07-05T00:00:00.000Z'],
    'toTime': ['2020-07-05T23:59:59.999Z'],
    'evalscripturl': old_query["evalscripturl"]}
    new_path = parts._replace(netloc="apps.sentinel-hub.com", path="/eo-browser/", query=urlencode(query, doseq=True), fragment="custom-script")
    return new_path.geturl()

In [77]:
def modis_to_sh(url):
    parts = urlparse(url)
    old_query = parse_qs(parts.query)
    query = {'datasetId': ['MODIS'],
    'lat': ['42.86691'],
    'lng': ['10.78033'],
    'zoom': ['8'],
    'fromTime': ['2020-07-05T00:00:00.000Z'],
    'toTime': ['2020-07-05T23:59:59.999Z'],
    'evalscripturl': old_query["evalscripturl"]}
    new_path = parts._replace(netloc="apps.sentinel-hub.com", path="/eo-browser/", query=urlencode(query, doseq=True), fragment="custom-script")
    return new_path.geturl()

In [78]:
for url in example_links:
    indices = indices.replace(url, modis_to_sh(url))
    
indices = indices.replace("Playground", "EO Browser")
with open(index_path, "w") as fs:
    fs.write(indices)

In [2]:
from dataclasses import dataclass, asdict
from typing import Optional

@dataclass
class ExampleDefinition:
    zoom: int
    lat: float
    lng: float
    datasetId: str # one of ["S2L2A", "S2L1C"]
    fromTime: str
    toTime: str
    platform: list
    evalscripturl: Optional[str] = None
    evalscript: Optional[str] = None
    name: Optional[str] = None
    additionalQueryParams: Optional[list] = None

    def dict(self):
        return {k: v for k, v in asdict(self).items() if v}

In [3]:
cdse_lookup = {
    "S2_L2A_CDAS": "S2L2A",
    "S2_L1C_CDAS": "S2L1C",
    "S1_CDAS_IW_VVVH": "S1_AWS_IW_VVVH",
    "S3SLSTR_CDAS": "S3SLSTR",
    "S3OLCI_CDAS": "S3OLCI",
    "S5_AER_AI_CDAS": "S5_AER_AI",
    "S5_CH4_CDAS": "S5_CH4",
    "S5_NO2_CDAS": "S5_NO2",
    "S5_CLOUD_CDAS": "S5_CLOUD",
    "S5_CO_CDAS": "S5_CO",
    "S5_HCHO_CDAS": "S5_HCHO",
    "S5_O3_CDAS": "S5_O3",
    "S5_SO2_CDAS": "S5_SO2"
}

playground_lookup = {
    "S2": "S2L2A",
    "S2L2A": "S2L2A",
    "S1_AWS_IW_VVVH": "S1_AWS_IW_VVVH",
    "S1-AWS-IW-VVVH": "S1_AWS_IW_VVVH"
}

datasource_lookup = {
    "Sentinel-2 L1C": "S2L1C",
    "Sentinel-1 L2A": "S2L2A",
    "Sentinel-1 AWS (S1-AWS-IW-VVVH)": "S1_AWS_IW_VVVH"
}

In [4]:
def get_urls_to_convert(urls):
    # 1. Ignore playground if there's other links
    # Check how many links contain sentinel-playground
    non_playground = [url for url in urls if "sentinel-playground" not in url]
    if len(non_playground):
        urls = non_playground
    return urls
        
def resolve_redirect(url):
    r = requests.get(url) 
    return r.url

def parse_url(url):
    try:
        # Find which service it is
        if "sentinelshare.page.link" in url:
            url = resolve_redirect(url)
            return parse_sh(url)
        if "link.dataspace.copernicus.eu" in url:
            url = resolve_redirect(url)
            return parse_cdse(url)
        if "sentinel-playground" in url:
            return parse_playground(url)
        if "apps.sentinel-hub.com/eo-browser" in url:
            return parse_sh(url)
        if "dataspace.copernicus.eu/browser" in url:
            return parse_cdse(url)
    except BaseException as e:
        print(url)
        raise e

def parse_sh(url):
    parts = urlparse(url)
    query = parse_qs(parts.query)
    unnecessary = ["demSource3D", "themeId", "visualizationUrl"]
    [query.pop(to_del, None) for to_del in unnecessary]
    datasource = datasource_lookup.get(query.pop("datasource", [None])[0], None)
    return ExampleDefinition(
        zoom = query.pop("zoom", ["10"])[0],
        lat = query.pop("lat")[0],
        lng = query.pop("lng")[0],
        evalscripturl = query.pop("evalscripturl", [None])[0],
        evalscript = query.pop("evalscript", [None])[0],
        datasetId = query.pop("datasetId", [datasource])[0],
        fromTime = query.pop("fromTime", query.get("time"))[0],
        toTime = query.pop("toTime", query.pop("time", None))[0],
        additionalQueryParams=[[k,v[0]] for k,v in query.items()],
        platform = ["CDSE", "EOB"]
    )

def parse_playground(url):
    parts = urlparse(url)
    query = parse_qs(parts.query)
    unnecessary = ["preset", "layers", "gain", "gamma", "atmFilter", "showDates", "temporal"]
    [query.pop(to_del, None) for to_del in unnecessary]
    dataset = playground_lookup[query.pop("source", ["S2L2A"])[0]]
    fromTime, toTime = query.pop("time", ["2023-07-01|2023-08-01"])[0].split("|")
    return ExampleDefinition(
        zoom = query.pop("zoom", ["10"])[0],
        lat = query.pop("lat")[0],
        lng = query.pop("lng")[0],
        evalscripturl = query.pop("evalscripturl", [None])[0],
        evalscript = query.pop("evalscript", [None])[0],
        datasetId = dataset,
        fromTime = fromTime,
        toTime = toTime,
        additionalQueryParams=[[k,v[0]] for k,v in query.items()],
        platform = ["CDSE", "EOB"]
    )

def parse_cdse(url):
    parts = urlparse(url)
    query = parse_qs(parts.query)
    unnecessary = ["demSource3D", "themeId", "visualizationUrl", "dateMode"]
    [query.pop(to_del, None) for to_del in unnecessary]
    dataset = cdse_lookup[query.pop("datasetId")[0]]
    return ExampleDefinition(
        zoom = query.pop("zoom")[0],
        lat = query.pop("lat")[0],
        lng = query.pop("lng")[0],
        evalscripturl = query.pop("evalscripturl", [None])[0],
        evalscript = query.pop("evalscript", [None])[0],
        datasetId = dataset,
        fromTime = query.pop("fromTime")[0],
        toTime = query.pop("toTime")[0],
        additionalQueryParams=[[k,v[0]] for k,v in query.items()],
        platform = ["CDSE", "EOB"]
    )

def split_frontmatter_content(content):
    fm_split = content.split("---\n", 3)
    front_matter = yaml.safe_load(fm_split[1])
    content = fm_split[-1]
    return front_matter, content

In [5]:
too_many = []

def parse_example(path):
    with open(path, 'r') as original: 
        fm, content = split_frontmatter_content(original.read())

    start_examples = content.find("## Evaluate")
    if start_examples == -1:
        return -1
    examples, remaining_content = content[start_examples+2:].split("##", 1)
    urls = re.findall("(?<=\()(.*?)(?=\))", examples)
    to_convert = get_urls_to_convert(urls)
    if len(to_convert) > 2 or len(to_convert) == 0:
        return len(to_convert)
    try:
        example = parse_url(urls[0]).dict()
    except BaseException as e:
        print(path)
        raise e
    fm["examples"] = [example]
    new_fm = yaml.dump(fm, sort_keys=False)
    new_content = "---\n"+ new_fm + "---" + content[:start_examples] + "##" + remaining_content

    with open(path, "w") as fs:
        fs.write(new_content)


In [7]:
readmeas = list(Path("../slstr/").rglob("*.md"))

In [8]:
output = [parse_example(readme) for readme in readmeas]

In [174]:
# EOB provided with evalscript directly, single date
eob_eval = "https://apps.sentinel-hub.com/eo-browser/?zoom=10&lat=42.76703&lng=11.22847&themeId=DEFAULT-THEME&visualizationUrl=https://services.sentinel-hub.com/ogc/wms/bd86bcc0-f318-402b-a145-015f85b9427e&evalscript=Ly9WRVJTSU9OPTMKZnVuY3Rpb24gc2V0dXAoKSB7CiAgICByZXR1cm4gewogICAgICAgIGlucHV0OiBbIkIwMyIsICJCMDQiLCAiQjA4IiwgImRhdGFNYXNrIl0sCiAgICAgICAgb3V0cHV0OiBbCiAgICAgICAgICAgIHsgaWQ6ICJkZWZhdWx0IiwgYmFuZHM6IDQgfSwKICAgICAgICAgICAgeyBpZDogImluZGV4IiwgYmFuZHM6IDEsIHNhbXBsZVR5cGU6ICJGTE9BVDMyIiB9LAogICAgICAgICAgICB7IGlkOiAiZW9icm93c2VyU3RhdHMiLCBiYW5kczogMiwgc2FtcGxlVHlwZTogJ0ZMT0FUMzInIH0sCiAgICAgICAgICAgIHsgaWQ6ICJkYXRhTWFzayIsIGJhbmRzOiAxIH0KICAgICAgICBdCiAgICB9Owp9Cgpjb25zdCByYW1wID0gWwogICAgWy0wLjUsIDB4MGMwYzBjXSwKICAgIFstMC4yLCAweGJmYmZiZl0sCiAgICBbLTAuMSwgMHhkYmRiZGJdLAogICAgWzAsIDB4ZWFlYWVhXSwKICAgIFswLjAyNSwgMHhmZmY5Y2NdLAogICAgWzAuMDUsIDB4ZWRlOGI1XSwKICAgIFswLjA3NSwgMHhkZGQ4OWJdLAogICAgWzAuMSwgMHhjY2M2ODJdLAogICAgWzAuMTI1LCAweGJjYjc2Yl0sCiAgICBbMC4xNSwgMHhhZmMxNjBdLAogICAgWzAuMTc1LCAweGEzY2M1OV0sCiAgICBbMC4yLCAweDkxYmY1MV0sCiAgICBbMC4yNSwgMHg3ZmIyNDddLAogICAgWzAuMywgMHg3MGEzM2ZdLAogICAgWzAuMzUsIDB4NjA5NjM1XSwKICAgIFswLjQsIDB4NGY4OTJkXSwKICAgIFswLjQ1LCAweDNmN2MyM10sCiAgICBbMC41LCAweDMwNmQxY10sCiAgICBbMC41NSwgMHgyMTYwMTFdLAogICAgWzAuNiwgMHgwZjU0MGFdLAogICAgWzEsIDB4MDA0NDAwXSwKXTsKCmNvbnN0IHZpc3VhbGl6ZXIgPSBuZXcgQ29sb3JSYW1wVmlzdWFsaXplcihyYW1wKTsKCmZ1bmN0aW9uIGV2YWx1YXRlUGl4ZWwoc2FtcGxlcykgewogICAgbGV0IHZhbCA9IGluZGV4KHNhbXBsZXMuQjA4LCBzYW1wbGVzLkIwNCk7CiAgICAvLyBUaGUgbGlicmFyeSBmb3IgdGlmZnMgd29ya3Mgd2VsbCBvbmx5IGlmIHRoZXJlIGlzIG9ubHkgb25lIGNoYW5uZWwgcmV0dXJuZWQuCiAgICAvLyBTbyB3ZSBlbmNvZGUgdGhlICJubyBkYXRhIiBhcyBOYU4gaGVyZSBhbmQgaWdub3JlIE5hTnMgb24gZnJvbnRlbmQuCgogICAgLy8gVE9ETzogQ0hFQ0sgSUYgVEhJUyBpbmRleFZhbCBJUyBTVElMTCBORUNFU1NBUlkKICAgIGNvbnN0IGluZGV4VmFsID0gc2FtcGxlcy5kYXRhTWFzayA9PT0gMSA/IHZhbCA6IE5hTjsKICAgIGNvbnN0IGltZ1ZhbHMgPSB2aXN1YWxpemVyLnByb2Nlc3ModmFsKTsKCiAgICByZXR1cm4gewogICAgICAgIGRlZmF1bHQ6IGltZ1ZhbHMuY29uY2F0KHNhbXBsZXMuZGF0YU1hc2spLAogICAgICAgIGluZGV4OiBbaW5kZXhWYWxdLAogICAgICAgIGVvYnJvd3NlclN0YXRzOiBbdmFsLCBpc0Nsb3VkKHNhbXBsZXMpID8gMSA6IDBdLAogICAgICAgIGRhdGFNYXNrOiBbc2FtcGxlcy5kYXRhTWFza10KICAgIH07Cn0KCmZ1bmN0aW9uIGlzQ2xvdWQoc2FtcGxlcykgewogICAgY29uc3QgTkdEUiA9IGluZGV4KHNhbXBsZXMuQjAzLCBzYW1wbGVzLkIwNCk7CiAgICBjb25zdCBiUmF0aW8gPSAoc2FtcGxlcy5CMDMgLSAwLjE3NSkgLyAoMC4zOSAtIDAuMTc1KTsKICAgIHJldHVybiBiUmF0aW8gPiAxIHx8IChiUmF0aW8gPiAwICYmIE5HRFIgPiAwKTsKfQo%3D&datasetId=S2L2A&fromTime=2020-07-12T00:00:00.000Z&toTime=2020-07-12T23:59:59.999Z&demSource3D=%22MAPZEN%22#custom-script"
parts = urlparse(eob_eval)
query = parse_qs(parts.query)
del query["demSource3D"]
del query["themeId"]
del query["visualizationUrl"]
query

{'zoom': ['10'],
 'lat': ['42.76703'],
 'lng': ['11.22847'],
 'evalscript': ['Ly9WRVJTSU9OPTMKZnVuY3Rpb24gc2V0dXAoKSB7CiAgICByZXR1cm4gewogICAgICAgIGlucHV0OiBbIkIwMyIsICJCMDQiLCAiQjA4IiwgImRhdGFNYXNrIl0sCiAgICAgICAgb3V0cHV0OiBbCiAgICAgICAgICAgIHsgaWQ6ICJkZWZhdWx0IiwgYmFuZHM6IDQgfSwKICAgICAgICAgICAgeyBpZDogImluZGV4IiwgYmFuZHM6IDEsIHNhbXBsZVR5cGU6ICJGTE9BVDMyIiB9LAogICAgICAgICAgICB7IGlkOiAiZW9icm93c2VyU3RhdHMiLCBiYW5kczogMiwgc2FtcGxlVHlwZTogJ0ZMT0FUMzInIH0sCiAgICAgICAgICAgIHsgaWQ6ICJkYXRhTWFzayIsIGJhbmRzOiAxIH0KICAgICAgICBdCiAgICB9Owp9Cgpjb25zdCByYW1wID0gWwogICAgWy0wLjUsIDB4MGMwYzBjXSwKICAgIFstMC4yLCAweGJmYmZiZl0sCiAgICBbLTAuMSwgMHhkYmRiZGJdLAogICAgWzAsIDB4ZWFlYWVhXSwKICAgIFswLjAyNSwgMHhmZmY5Y2NdLAogICAgWzAuMDUsIDB4ZWRlOGI1XSwKICAgIFswLjA3NSwgMHhkZGQ4OWJdLAogICAgWzAuMSwgMHhjY2M2ODJdLAogICAgWzAuMTI1LCAweGJjYjc2Yl0sCiAgICBbMC4xNSwgMHhhZmMxNjBdLAogICAgWzAuMTc1LCAweGEzY2M1OV0sCiAgICBbMC4yLCAweDkxYmY1MV0sCiAgICBbMC4yNSwgMHg3ZmIyNDddLAogICAgWzAuMywgMHg3MGEzM2ZdLAogICAgWzAuMzUsIDB4NjA5NjM1XSwKIC

In [119]:
parse_sh(eob_eval).dict()

{'zoom': '10',
 'lat': '42.76703',
 'lng': '11.22847',
 'datasetId': 'S2L2A',
 'fromTime': '2020-07-12T00:00:00.000Z',
 'toTime': '2020-07-12T23:59:59.999Z',
 'platform': ['CDSE', 'EOB'],
 'evalscript': 'Ly9WRVJTSU9OPTMKZnVuY3Rpb24gc2V0dXAoKSB7CiAgICByZXR1cm4gewogICAgICAgIGlucHV0OiBbIkIwMyIsICJCMDQiLCAiQjA4IiwgImRhdGFNYXNrIl0sCiAgICAgICAgb3V0cHV0OiBbCiAgICAgICAgICAgIHsgaWQ6ICJkZWZhdWx0IiwgYmFuZHM6IDQgfSwKICAgICAgICAgICAgeyBpZDogImluZGV4IiwgYmFuZHM6IDEsIHNhbXBsZVR5cGU6ICJGTE9BVDMyIiB9LAogICAgICAgICAgICB7IGlkOiAiZW9icm93c2VyU3RhdHMiLCBiYW5kczogMiwgc2FtcGxlVHlwZTogJ0ZMT0FUMzInIH0sCiAgICAgICAgICAgIHsgaWQ6ICJkYXRhTWFzayIsIGJhbmRzOiAxIH0KICAgICAgICBdCiAgICB9Owp9Cgpjb25zdCByYW1wID0gWwogICAgWy0wLjUsIDB4MGMwYzBjXSwKICAgIFstMC4yLCAweGJmYmZiZl0sCiAgICBbLTAuMSwgMHhkYmRiZGJdLAogICAgWzAsIDB4ZWFlYWVhXSwKICAgIFswLjAyNSwgMHhmZmY5Y2NdLAogICAgWzAuMDUsIDB4ZWRlOGI1XSwKICAgIFswLjA3NSwgMHhkZGQ4OWJdLAogICAgWzAuMSwgMHhjY2M2ODJdLAogICAgWzAuMTI1LCAweGJjYjc2Yl0sCiAgICBbMC4xNSwgMHhhZmMxNjBdLAogICAgWzAuMTc1LCAweGEz

In [175]:
parts = urlparse(url)
query = parse_qs(parts.query)
build_url = parts._replace(query=urlencode(query, doseq=True)).geturl()

In [176]:
build_url

'https://apps.sentinel-hub.com/eo-browser/?zoom=10&lat=42.76703&lng=11.22847&evalscript=Ly9WRVJTSU9OPTMKZnVuY3Rpb24gc2V0dXAoKSB7CiAgICByZXR1cm4gewogICAgICAgIGlucHV0OiBbIkIwMyIsICJCMDQiLCAiQjA4IiwgImRhdGFNYXNrIl0sCiAgICAgICAgb3V0cHV0OiBbCiAgICAgICAgICAgIHsgaWQ6ICJkZWZhdWx0IiwgYmFuZHM6IDQgfSwKICAgICAgICAgICAgeyBpZDogImluZGV4IiwgYmFuZHM6IDEsIHNhbXBsZVR5cGU6ICJGTE9BVDMyIiB9LAogICAgICAgICAgICB7IGlkOiAiZW9icm93c2VyU3RhdHMiLCBiYW5kczogMiwgc2FtcGxlVHlwZTogJ0ZMT0FUMzInIH0sCiAgICAgICAgICAgIHsgaWQ6ICJkYXRhTWFzayIsIGJhbmRzOiAxIH0KICAgICAgICBdCiAgICB9Owp9Cgpjb25zdCByYW1wID0gWwogICAgWy0wLjUsIDB4MGMwYzBjXSwKICAgIFstMC4yLCAweGJmYmZiZl0sCiAgICBbLTAuMSwgMHhkYmRiZGJdLAogICAgWzAsIDB4ZWFlYWVhXSwKICAgIFswLjAyNSwgMHhmZmY5Y2NdLAogICAgWzAuMDUsIDB4ZWRlOGI1XSwKICAgIFswLjA3NSwgMHhkZGQ4OWJdLAogICAgWzAuMSwgMHhjY2M2ODJdLAogICAgWzAuMTI1LCAweGJjYjc2Yl0sCiAgICBbMC4xNSwgMHhhZmMxNjBdLAogICAgWzAuMTc1LCAweGEzY2M1OV0sCiAgICBbMC4yLCAweDkxYmY1MV0sCiAgICBbMC4yNSwgMHg3ZmIyNDddLAogICAgWzAuMywgMHg3MGEzM2ZdLAogICAgWzAuMzUsIDB4NjA5