In [1]:
import json
import pathlib
import urllib.parse
from typing import NamedTuple
from collections.abc import Iterator

import shapely
import requests
from tqdm.auto import tqdm

### Tile generator

In [2]:
class BBox(NamedTuple):
    """Bounding box."""

    left: int = -180
    bottom: int = -90
    right: int = +180
    top: int = +90


bbox = BBox()
bbox

BBox(left=-180, bottom=-90, right=180, top=90)

In [3]:
def tile_generator(
    xstep: int = 90, ystep: int = 90, margin: float = 0.1
) -> Iterator[BBox]:
    assert 360 % xstep == 0
    assert 180 % ystep == 0
    for x in range(-180, +180, xstep):
        for y in range(-90, +90, ystep):
            bbox = BBox(left=x, bottom=y, right=x + xstep, top=y + ystep)
            polygon = shapely.Polygon.from_bounds(*bbox)
            polygon = polygon.buffer(-margin)
            yield BBox(*polygon.bounds)


list(tile_generator())

[BBox(left=-179.9, bottom=-89.9, right=-90.1, top=-0.1),
 BBox(left=-179.9, bottom=0.1, right=-90.1, top=89.9),
 BBox(left=-89.9, bottom=-89.9, right=-0.1, top=-0.1),
 BBox(left=-89.9, bottom=0.1, right=-0.1, top=89.9),
 BBox(left=0.1, bottom=-89.9, right=89.9, top=-0.1),
 BBox(left=0.1, bottom=0.1, right=89.9, top=89.9),
 BBox(left=90.1, bottom=-89.9, right=179.9, top=-0.1),
 BBox(left=90.1, bottom=0.1, right=179.9, top=89.9)]

### Query URL

In [4]:
CDES_ODATA_ENDPOINT_URL = "https://catalogue.dataspace.copernicus.eu/odata/v1/"

In [5]:
base_url = urllib.parse.urlparse(CDES_ODATA_ENDPOINT_URL)

In [6]:
dataset = "COP-DEM_GLO-90-DGED/2024_1"

In [7]:
filter_parts = [
    "Collection/Name eq 'CCM'",
    "Attributes/OData.CSC.StringAttribute/any(att:att/Name eq 'dataset'",
    f"att/OData.CSC.StringAttribute/Value eq '{dataset}')",
]

In [8]:
filter_ = " and ".join(filter_parts)
filter_

"Collection/Name eq 'CCM' and Attributes/OData.CSC.StringAttribute/any(att:att/Name eq 'dataset' and att/OData.CSC.StringAttribute/Value eq 'COP-DEM_GLO-90-DGED/2024_1')"

In [9]:
query_params = {
    "$filter": [filter_],
    "$orderby": [],
    "$top": [],
    "$skip": [],
    "$count": [],
    "$expand": ["Attributes"],
}

In [10]:
query = urllib.parse.unquote_plus(
    urllib.parse.urlencode(query_params, doseq=True)
)
query

"$filter=Collection/Name eq 'CCM' and Attributes/OData.CSC.StringAttribute/any(att:att/Name eq 'dataset' and att/OData.CSC.StringAttribute/Value eq 'COP-DEM_GLO-90-DGED/2024_1')&$expand=Attributes"

In [11]:
url = base_url._replace(
    path=urllib.parse.urljoin(base_url.path, "Products"),
    query=query,
)

In [12]:
url.geturl()

"https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Collection/Name eq 'CCM' and Attributes/OData.CSC.StringAttribute/any(att:att/Name eq 'dataset' and att/OData.CSC.StringAttribute/Value eq 'COP-DEM_GLO-90-DGED/2024_1')&$expand=Attributes"

### Query data

In [13]:
def query_all(url, page_size: int = 1000):
    if isinstance(url, str):
        url = urllib.parse.urlparse(url)

    # count tiles
    count_query_params = urllib.parse.parse_qs(url.query)
    count_query_params.update({
        # "$filter": [next_filter],
        # "$orderby": ["Id desc"],
        "$top": ["1"],
        # "$skip": [str(len(items))] if len(items) else [],
        "$count": ["True"],
        "$expand": [],
    })
    count_query = urllib.parse.unquote_plus(
        urllib.parse.urlencode(count_query_params, doseq=True)
    )
    count_url = url._replace(query=count_query)
    count_url_str = count_url.geturl()

    response = requests.get(count_url_str, json=True)
    response.raise_for_status()
    assert response.ok
    data = response.json()

    # query
    target_count = int(data.get("@odata.count"))
    items = []
    with tqdm(total=target_count, unit="tiles") as pbar:
        for bbox in tile_generator():
            polygon = shapely.Polygon.from_bounds(*bbox)
            polygon_wkt = shapely.to_wkt(polygon)

            # update url
            next_query_params = urllib.parse.parse_qs(url.query)
            next_filter = next_query_params["$filter"][0]
            next_filter_parts = next_filter.split(" and ")
            next_filter_parts.append(
                f"OData.CSC.Intersects(area=geography'SRID=4326;{polygon_wkt}')",
            )
            next_filter = " and ".join(next_filter_parts)

            next_query_params.update({
                "$filter": [next_filter],
                "$orderby": [],
                "$top": [str(page_size)],
                "$skip": [],
                "$count": [],
                "$expand": ["Attributes"],
            })
            next_query = urllib.parse.unquote_plus(
                urllib.parse.urlencode(next_query_params, doseq=True)
            )

            next_url = url._replace(query=next_query)
            next_url_str = next_url.geturl()

            # iter over pages
            while next_url_str:
                response = requests.get(next_url_str, json=True)
                response.raise_for_status()
                assert response.ok
                data = response.json()
                page_items = data["value"]
                items.extend(data["value"])
                next_url_str = data.get("@odata.nextLink", "")
                pbar.update(len(page_items))

    assert len(items) == target_count

    return items

In [14]:
items = query_all(url)

  0%|          | 0/26479 [00:00<?, ?tiles/s]

### Save results

In [15]:
data = {
    "dataset": dataset,
    "tiles": items,
}

In [16]:
outpath = pathlib.Path(dataset.replace("/", "-")).with_suffix(".json")
outpath.write_text(json.dumps(data, indent=2))

106941438

### Perform checks

In [17]:
def get_attribute(name, attributes) -> str:
    for attr in attributes:
        if attr["Name"] == name:
            return attr["Value"]
    raise KeyError(f"'{name}' is not in the attributes list")


def get_grid_id(attributes) -> str:
    try:
        value = get_attribute("gridId", attributes)
    except KeyError:
        print("no gridId")
        try:
            value = get_attribute("eopIdentifier", attributes)

            # 'urn:eop:DLR:CDEM90:Copernicus_DSM_30_N37_00_E051_00:V3386'
            value = value.split(":")[4]
            # 'Copernicus_DSM_30_N37_00_E051_00'
            parts = value.split("_")
            value = f"{parts[3]}_{parts[5]}"
        except KeyError:
            raise KeyError("'gridId' is not in the attributes list") from None
    return value


if True:
    # len(grid_ids): 25788
    import collections

    grid_ids = collections.defaultdict(list)
    for idx, item in enumerate(data["tiles"]):
        try:
            grid_id = get_grid_id(item["Attributes"])
        except KeyError as err:
            raise ValueError(f"invalid item n. {idx}") from err
        else:
            grid_ids[grid_id].append(item)
else:
    # len(grid_ids): 25788
    grid_ids = {get_grid_id(item["Attributes"]) for item in data["tiles"]}

print()
print(f"len(data['tiles']): {len(data['tiles'])}")
print(f"len(grid_ids):      {len(grid_ids)}")
print(
    f"len(data['tiles'] - len(grid_ids): {len(data['tiles']) - len(grid_ids)}"
)

no gridId
no gridId
no gridId

len(data['tiles']): 26479
len(grid_ids):      26097
len(data['tiles'] - len(grid_ids): 382


In [18]:
dups = {k: len(v) for k, v in grid_ids.items() if len(v) > 1}
min(dups.values()), max(dups.values())

(2, 2)

In [19]:
eop_ids = {
    get_attribute("eopIdentifier", item["Attributes"]): item for item in items
}
len(eop_ids)

26097

In [20]:
def get_bbox(item):
    return BBox(*map(int, shapely.geometry.shape(item["GeoFootprint"]).bounds))


bbox_map = {get_bbox(item): get_grid_id(item["Attributes"]) for item in items}
len(bbox_map), len(items)

no gridId
no gridId
no gridId


(26097, 26479)

In [21]:
def bbox_to_grid_id(bbox) -> str:
    return (
        f"{'S' if bbox.bottom < 0 else 'N'}{abs(bbox.bottom):02d}_"
        f"{'W' if bbox.left < 0 else 'E'}{abs(bbox.left):03d}"
    )


for idx, item in enumerate(items):
    bbox = get_bbox(item)
    grid_id = get_grid_id(item["Attributes"])
    assert grid_id == bbox_to_grid_id(bbox), (
        f"item n. {idx}, grid_id: {grid_id}, {bbox_to_grid_id(bbox)}, bbox:"
        f" {bbox}"
    )
print("OK")

no gridId
no gridId
no gridId
OK


### Pandas

In [22]:
import pandas as pd

In [23]:
df = pd.read_excel(
    "COP-DEM_delivery_sheet_2024_1_LatLon_v3.1.xlsx",
    sheet_name="COP-DEM_delivery_sheet_v0.9_ADS",
    skiprows=2,
)

In [24]:
df

Unnamed: 0,eopIdentifier,CPP filename,parentIdentifier (dataset),tsxx:tileVersion,Updated (Y/N),eopIdentifier.1,CPP filename.1,parentIdentifier (dataset).1,tsxx:tileVersion.1,Updated (Y/N).1,...,Updated (Y/N).3,eopIdentifier.3,CPP filename.3,parentIdentifier (dataset).4,tsxx:tileVersion.4,Updated (Y/N).4,eopIdentifier.4,CPP filename.4,parentIdentifier (dataset).5,tsxx:tileVersion.5
0,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N02_00_W0...,DEM1_SAR_INS_10_20101230T214901_20140222T21412...,COP-DEM_EEA-10-INSP/2019_1,1.0,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N02_00_W0...,DEM1_SAR_INS_10_20101230T214901_20140222T21412...,COP-DEM_EEA-10-INSP/2019_1,1,N,...,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N02_00_W0...,DEM1_SAR_INS_10_20101230T214901_20140222T21412...,COP-DEM_EEA-10-INSP/2019_1,1,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N02_00_W0...,DEM1_SAR_INS_10_20101230T214901_20140222T21412...,COP-DEM_EEA-10-INSP/2019_1,1
1,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N02_00_W0...,DEM1_SAR_INS_10_20101230T214901_20140207T09235...,COP-DEM_EEA-10-INSP/2019_1,1.0,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N02_00_W0...,DEM1_SAR_INS_10_20101230T214901_20140207T09235...,COP-DEM_EEA-10-INSP/2019_1,1,N,...,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N02_00_W0...,DEM1_SAR_INS_10_20101230T214901_20140207T09235...,COP-DEM_EEA-10-INSP/2019_1,1,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N02_00_W0...,DEM1_SAR_INS_10_20101230T214901_20140207T09235...,COP-DEM_EEA-10-INSP/2019_1,1
2,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N02_00_W0...,DEM1_SAR_INS_10_20110522T214904_20140323T09232...,COP-DEM_EEA-10-INSP/2019_1,1.0,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N02_00_W0...,DEM1_SAR_INS_10_20110522T214904_20140323T09232...,COP-DEM_EEA-10-INSP/2019_1,1,N,...,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N02_00_W0...,DEM1_SAR_INS_10_20110522T214904_20140323T09232...,COP-DEM_EEA-10-INSP/2019_1,1,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N02_00_W0...,DEM1_SAR_INS_10_20110522T214904_20140323T09232...,COP-DEM_EEA-10-INSP/2019_1,1
3,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N03_00_W0...,DEM1_SAR_INS_10_20101225T214024_20130706T21420...,COP-DEM_EEA-10-INSP/2019_1,1.0,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N03_00_W0...,DEM1_SAR_INS_10_20101225T214024_20130706T21420...,COP-DEM_EEA-10-INSP/2019_1,1,N,...,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N03_00_W0...,DEM1_SAR_INS_10_20101225T214024_20130706T21420...,COP-DEM_EEA-10-INSP/2019_1,1,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N03_00_W0...,DEM1_SAR_INS_10_20101225T214024_20130706T21420...,COP-DEM_EEA-10-INSP/2019_1,1
4,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N03_00_W0...,DEM1_SAR_INS_10_20110608T214027_20140116T09230...,COP-DEM_EEA-10-INSP/2019_1,1.0,Y,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N03_00_W0...,DEM1_SAR_INS_10_20110608T214027_20140116T09230...,COP-DEM_EEA-10-INSP/2020_1,2,N,...,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N03_00_W0...,DEM1_SAR_INS_10_20110608T214027_20140116T09230...,COP-DEM_EEA-10-INSP/2020_1,2,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N03_00_W0...,DEM1_SAR_INS_10_20110608T214027_20140116T09230...,COP-DEM_EEA-10-INSP/2020_1,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
108255,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DTE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DTED/2019_1,1.0,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DTE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DTED/2019_1,1,N,...,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DTE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DTED/2019_1,1,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DTE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DTED/2019_1,1
108256,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DGE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DGED/2019_1,1.0,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DGE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DGED/2019_1,1,N,...,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DGE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DGED/2019_1,1,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DGE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DGED/2019_1,1
108257,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DTE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DTED/2019_1,1.0,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DTE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DTED/2019_1,1,N,...,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DTE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DTED/2019_1,1,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DTE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DTED/2019_1,1
108258,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DGE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DGED/2019_1,1.0,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DGE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DGED/2019_1,1,N,...,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DGE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DGED/2019_1,1,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DGE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DGED/2019_1,1


In [25]:
df.columns

Index(['eopIdentifier', 'CPP filename', 'parentIdentifier (dataset)',
       'tsxx:tileVersion', 'Updated (Y/N)', 'eopIdentifier.1',
       'CPP filename.1', 'parentIdentifier (dataset).1', 'tsxx:tileVersion.1',
       'Updated (Y/N).1', 'eopIdentifier.2', 'CPP filename.2',
       'parentIdentifier (dataset).2', 'tsxx:tileVersion.2', 'Updated (Y/N).2',
       'eopIdentifier22', 'CPP filename22', 'parentIdentifier (dataset).3',
       'tsxx:tileVersion.3', 'Updated (Y/N).3', 'eopIdentifier.3',
       'CPP filename.3', 'parentIdentifier (dataset).4', 'tsxx:tileVersion.4',
       'Updated (Y/N).4', 'eopIdentifier.4', 'CPP filename.4',
       'parentIdentifier (dataset).5', 'tsxx:tileVersion.5'],
      dtype='object')

In [26]:
names = [
    "Updated (Y/N).4",
    "eopIdentifier.4",
    "CPP filename.4",
    "parentIdentifier (dataset).5",
    "tsxx:tileVersion.5",
]
df1 = df[names]
new_names = ["updated", "eop_id", "filename", "parent_id", "tile_version"]
col_map = dict(zip(names, new_names))
df2 = df1.rename(columns=col_map)
df2

Unnamed: 0,updated,eop_id,filename,parent_id,tile_version
0,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N02_00_W0...,DEM1_SAR_INS_10_20101230T214901_20140222T21412...,COP-DEM_EEA-10-INSP/2019_1,1
1,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N02_00_W0...,DEM1_SAR_INS_10_20101230T214901_20140207T09235...,COP-DEM_EEA-10-INSP/2019_1,1
2,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N02_00_W0...,DEM1_SAR_INS_10_20110522T214904_20140323T09232...,COP-DEM_EEA-10-INSP/2019_1,1
3,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N03_00_W0...,DEM1_SAR_INS_10_20101225T214024_20130706T21420...,COP-DEM_EEA-10-INSP/2019_1,1
4,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N03_00_W0...,DEM1_SAR_INS_10_20110608T214027_20140116T09230...,COP-DEM_EEA-10-INSP/2020_1,2
...,...,...,...,...,...
108255,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DTE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DTED/2019_1,1
108256,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DGE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DGED/2019_1,1
108257,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DTE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DTED/2019_1,1
108258,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DGE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DGED/2019_1,1


In [27]:
df2["dataset"] = df2.parent_id.apply(lambda x: x.split("/")[0])
df2

Unnamed: 0,updated,eop_id,filename,parent_id,tile_version,dataset
0,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N02_00_W0...,DEM1_SAR_INS_10_20101230T214901_20140222T21412...,COP-DEM_EEA-10-INSP/2019_1,1,COP-DEM_EEA-10-INSP
1,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N02_00_W0...,DEM1_SAR_INS_10_20101230T214901_20140207T09235...,COP-DEM_EEA-10-INSP/2019_1,1,COP-DEM_EEA-10-INSP
2,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N02_00_W0...,DEM1_SAR_INS_10_20110522T214904_20140323T09232...,COP-DEM_EEA-10-INSP/2019_1,1,COP-DEM_EEA-10-INSP
3,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N03_00_W0...,DEM1_SAR_INS_10_20101225T214024_20130706T21420...,COP-DEM_EEA-10-INSP/2019_1,1,COP-DEM_EEA-10-INSP
4,N,urn:eop:DLR:CDEM10:Copernicus_DSM_03_N03_00_W0...,DEM1_SAR_INS_10_20110608T214027_20140116T09230...,COP-DEM_EEA-10-INSP/2020_1,2,COP-DEM_EEA-10-INSP
...,...,...,...,...,...,...
108255,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DTE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DTED/2019_1,1,COP-DEM_GLO-90-DTED
108256,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DGE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DGED/2019_1,1,COP-DEM_GLO-90-DGED
108257,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DTE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DTED/2019_1,1,COP-DEM_GLO-90-DTED
108258,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DGE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DGED/2019_1,1,COP-DEM_GLO-90-DGED


In [28]:
df3 = df2[df2["dataset"] == "COP-DEM_GLO-90-DGED"].copy()
df3

Unnamed: 0,updated,eop_id,filename,parent_id,tile_version,dataset
55309,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_N00_00_E0...,DEM1_SAR_DGE_90_20110730T174744_20130920T05222...,COP-DEM_GLO-90-DGED/2020_1,2,COP-DEM_GLO-90-DGED
55311,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_N00_00_E0...,DEM1_SAR_DGE_90_20101225T050449_20141127T17395...,COP-DEM_GLO-90-DGED/2020_1,2,COP-DEM_GLO-90-DGED
55313,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_N00_00_E0...,DEM1_SAR_DGE_90_20110504T173021_20140930T05051...,COP-DEM_GLO-90-DGED/2020_1,2,COP-DEM_GLO-90-DGED
55315,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_N00_00_E0...,DEM1_SAR_DGE_90_20110412T173020_20141004T17222...,COP-DEM_GLO-90-DGED/2020_1,2,COP-DEM_GLO-90-DGED
55317,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_N00_00_E0...,DEM1_SAR_DGE_90_20110412T173020_20141128T17225...,COP-DEM_GLO-90-DGED/2019_1,1,COP-DEM_GLO-90-DGED
...,...,...,...,...,...,...
108250,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DGE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DGED/2019_1,1,COP-DEM_GLO-90-DGED
108252,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DGE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DGED/2019_1,1,COP-DEM_GLO-90-DGED
108254,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DGE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DGED/2019_1,1,COP-DEM_GLO-90-DGED
108256,N,urn:eop:DLR:CDEM90:Copernicus_DSM_30_S90_00_W1...,DEM1_SAR_DGE_90_20130522T072046_20140523T06294...,COP-DEM_GLO-90-DGED/2019_1,1,COP-DEM_GLO-90-DGED


In [29]:
def compute_grid_id(s):
    parts = s.split("_")
    return f"{parts[3]}_{parts[5]}"


df3["grid_id"] = df3.eop_id.apply(compute_grid_id)
df3.grid_id

55309     N00_E006
55311     N00_E009
55313     N00_E010
55315     N00_E011
55317     N00_E012
            ...   
108250    S90_W176
108252    S90_W177
108254    S90_W178
108256    S90_W179
108258    S90_W180
Name: grid_id, Length: 26475, dtype: object

In [30]:
grid_ids_from_df = set(df3.eop_id.apply(compute_grid_id).unique())
len(grid_ids_from_df)

26475

In [31]:
grid_ids_keys = set(grid_ids.keys())
len(grid_ids_keys)

26097

In [32]:
len(grid_ids_from_df.difference(grid_ids_keys))

378