# This colab retrieves urls of (Cloud Optimized) GeoTIFFs for a given region from the Open Buildings 2.5D Temporal Dataset.





In [6]:
#@title Imports (need to be run only once)

!sudo apt-get install -y \
    g++ \
    libgdal-dev \
    libproj-dev \
    libgeos-dev \
    swig
!pip install rasterio shapely s2geometry

import functools
import glob
import gzip
import json
import multiprocessing
from multiprocessing.pool import ThreadPool
import os
import shutil
import tempfile
from typing import Optional, Tuple, Iterable, Callable, Any

import geopandas as gpd
from google.auth import credentials
from google.cloud import storage
from IPython import display
import pandas as pd
import pyproj
import rasterio
from rasterio.transform import Affine
import s2geometry as s2
import shapely
from shapely.geometry import Polygon, box
import tqdm.notebook
from shapely.ops import transform

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
g++ is already the newest version (4:11.2.0-1ubuntu1).
swig is already the newest version (4.0.2-1ubuntu1).
libgdal-dev is already the newest version (3.8.4+dfsg-1~jammy0).
libgeos-dev is already the newest version (3.12.1-1~jammy0).
libproj-dev is already the newest version (9.3.1-1~jammy0).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.
Collecting rasterio
  Using cached rasterio-1.4.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting s2geometry
  Using cached s2geometry-0.9.0.tar.gz (1.1 MB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Collecting cligj>=0.5 (from rasterio)
  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)
Collecting click-plugins (from rasterio)
  Downloading click_plugins-1.1.1.2-py2.py3-none-any.whl.me

In [7]:
# @title Prepare urls of GeoTIFFs of the given region

# @markdown First, select a region from either the [Natural Earth low res](https://www.naturalearthdata.com/downloads/110m-cultural-vectors/110m-admin-0-countries/) (fastest), [Natural Earth high res](https://www.naturalearthdata.com/downloads/10m-cultural-vectors/10m-admin-0-countries/) or [World Bank high res](https://datacatalog.worldbank.org/dataset/world-bank-official-boundaries) shapefiles:
region_border_source = "Natural Earth (High Res 10m)"  # @param ["Natural Earth (Low Res 110m)", "Natural Earth (High Res 10m)", "World Bank (High Res 10m)"]
region = "GHA (Ghana)"  # @param ["", "ABW (Aruba)", "AGO (Angola)", "AIA (Anguilla)", "ARG (Argentina)", "ATG (Antigua and Barbuda)", "BDI (Burundi)", "BEN (Benin)", "BFA (Burkina Faso)", "BGD (Bangladesh)", "BHS (The Bahamas)", "BLM (Saint Barthelemy)", "BLZ (Belize)", "BOL (Bolivia)", "BRA (Brazil)", "BRB (Barbados)", "BRN (Brunei)", "BTN (Bhutan)", "BWA (Botswana)", "CAF (Central African Republic)", "CHL (Chile)", "CIV (Ivory Coast)", "CMR (Cameroon)", "COD (Democratic Republic of the Congo)", "COG (Republic of Congo)", "COL (Colombia)", "COM (Comoros)", "CPV (Cape Verde)", "CRI (Costa Rica)", "CUB (Cuba)", "CUW (Cura\u00e7ao)", "CYM (Cayman Islands)", "DJI (Djibouti)", "DMA (Dominica)", "DOM (Dominican Republic)", "DZA (Algeria)", "ECU (Ecuador)", "EGY (Egypt)", "ERI (Eritrea)", "ETH (Ethiopia)", "FLK (Falkland Islands)", "GAB (Gabon)", "GHA (Ghana)", "GIN (Guinea)", "GMB (Gambia)", "GNB (Guinea Bissau)", "GNQ (Equatorial Guinea)", "GRD (Grenada)", "GTM (Guatemala)", "GUY (Guyana)", "HND (Honduras)", "HTI (Haiti)", "IDN (Indonesia)", "IND (India)", "IOT (British Indian Ocean Territory)", "JAM (Jamaica)", "KEN (Kenya)", "KHM (Cambodia)", "KNA (Saint Kitts and Nevis)", "LAO (Laos)", "LBR (Liberia)", "LCA (Saint Lucia)", "LKA (Sri Lanka)", "LSO (Lesotho)", "MAF (Saint Martin)", "MDG (Madagascar)", "MDV (Maldives)", "MEX (Mexico)", "MOZ (Mozambique)", "MRT (Mauritania)", "MSR (Montserrat)", "MUS (Mauritius)", "MWI (Malawi)", "MYS (Malaysia)", "MYT (Mayotte)", "NAM (Namibia)", "NER (Niger)", "NGA (Nigeria)", "NIC (Nicaragua)", "NPL (Nepal)", "PAN (Panama)", "PER (Peru)", "PHL (Philippines)", "PRI (Puerto Rico)", "PRY (Paraguay)", "RWA (Rwanda)", "SDN (Sudan)", "SEN (Senegal)", "SGP (Singapore)", "SHN (Saint Helena)", "SLE (Sierra Leone)", "SLV (El Salvador)", "SOM (Somalia)", "STP (Sao Tome and Principe)", "SUR (Suriname)", "SWZ (Eswatini)", "SXM (Sint Maarten)", "SYC (Seychelles)", "TCA (Turks and Caicos Islands)", "TGO (Togo)", "THA (Thailand)", "TLS (East Timor)", "TTO (Trinidad and Tobago)", "TUN (Tunisia)", "TZA (United Republic of Tanzania)", "UGA (Uganda)", "URY (Uruguay)", "VCT (Saint Vincent and the Grenadines)", "VEN (Venezuela)", "VGB (British Virgin Islands)", "VIR (United States Virgin Islands)", "VNM (Vietnam)", "ZAF (South Africa)", "ZMB (Zambia)", "ZWE (Zimbabwe)"]
# @markdown **or** specify an area of interest in [WKT format](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry) (assumes crs='EPSG:4326'); this [tool](https://arthur-e.github.io/Wicket/sandbox-gmaps3.html) might be useful.
your_own_wkt_polygon = "MULTIPOLYGON(((103.7548510655084 22.34333445531191,103.75404029507267 22.34523065002736,103.75402629729437 22.345267614220347,103.7524626962978 22.350251956234498,103.7524576671559 22.350271988983078,103.75199103404898 22.35233461771343,103.75198701073546 22.35235456664298,103.75144068346417 22.355713752323716,103.75114953343369 22.359104597879735,103.75114752177693 22.359146591214625,103.75129182114557 22.365498377662206,103.7512958444591 22.365541376825476,103.75155623426606 22.36774016577061,103.7515592517512 22.36776120434757,103.75170868169025 22.368728208085017,103.75171563866988 22.368770201419906,103.75176016400442 22.369034098462173,103.75176720480309 22.36907508596868,103.75179126747543 22.369213864401814,103.75181121640497 22.369327858284947,103.75221921187944 22.371377917182738,103.75222323519296 22.371395938274556,103.75243385032768 22.3722958071978,103.7524429027831 22.372332771390788,103.75333717274155 22.375483742383373,103.75334915886309 22.37552079039539,103.7541846076288 22.377877617502065,103.75419156460843 22.377895638593884,103.75428347273633 22.37813173548835,103.754290513535 22.378149672761136,103.75659674197452 22.38311462168087,103.75661568507569 22.383149658036128,103.75968254488468 22.388043518050182,103.7597055951184 22.388075536920297,103.76230656719429 22.391252416987697,103.76264348133209 22.391944721011804,103.76264750464561 22.39195276763885,103.76360544682778 22.393764413604526,103.76363838770725 22.393823422202853,103.7655438632527 22.396933799844938,103.76555593319327 22.396951820936756,103.76682681749685 22.39874721477654,103.76716143191048 22.40074292004834,103.76716637723335 22.40076488063465,103.76730024778155 22.401341643434815,103.76730527692345 22.401362682011776,103.76858717500207 22.40573437806056,103.7692286892089 22.407311094289316,103.7695755857671 22.408301310354112,103.77019574154082 22.409687919446945,103.7703040956514 22.40995423262095,103.77037275616016 22.410083707233067,103.77111256343845 22.411737845664874,103.77112555538837 22.411763913383737,103.77252577719989 22.414357244828185,103.77266977829638 22.41460425951465,103.77519124949599 22.418461471912167,103.7780936137722 22.422040883330023,103.78134649526152 22.425305031994913,103.78491584962127 22.42821975564298,103.78536092867968 22.428546733685703,103.78966459957677 22.431345441955365,103.7896855543347 22.4313574280769,103.78992328736713 22.431479452758754,103.79028634687005 22.43171562190473,103.79392233274345 22.43363372552511,103.79771804467401 22.43521235965316,103.80164186909997 22.436438376229063,103.8056611254417 22.437301564058842,103.80567914653352 22.437304581543984,103.80986886901243 22.43779573158499,103.81149046631883 22.43857846758648,103.8115284363402 22.43859447702154,103.81487733264952 22.439852323977654,103.81809963373306 22.440778420060383,103.81839094319662 22.44092254195441,103.81963286729406 22.441546510935623,103.81971728099872 22.441578731814563,103.81985093500462 22.441644855545118,103.82257296189533 22.44266875089196,103.82387936663994 22.443167408181054,103.82394765324496 22.443185843533858,103.82405050504059 22.443224531423134,103.82691802444207 22.443987755430914,103.82826759747091 22.444352100018158,103.82831800057433 22.444360375540793,103.82838639452761 22.4443785793957,103.83140182864659 22.44486669930422,103.83275287835451 22.44508852378881,103.8327841241372 22.44509045678716,103.83281558390293 22.44509554930033,103.83282463635835 22.44509655512871,103.83713279704904 22.445359484375174,103.83728953968433 22.445369181147953,103.8372908437872 22.445369130048295,103.83729216670426 22.4453692107866,103.84176470138019 22.445197158259933,103.84179815963806 22.44519251645195,103.84183138869281 22.445191214412542,103.84326785597443 22.444988619159624,103.84619808239489 22.444582096248563,103.84626504132272 22.444565904677052,103.84633217979089 22.44455643565878,103.84641516063229 22.44454042622372,103.849027099055 22.443955516100758,103.84907009821826 22.443944535807603,103.85028841568025 22.44361517837358,103.85033040901514 22.443603192252045,103.8513508976211 22.443298824533418,103.85143790177602 22.443271750986174,103.85178729509597 22.443157580532276,103.85345787056693 22.442933609558917,103.85348293245741 22.442929586245395,103.85889755628713 22.44171414271883,103.8589215285302 22.441707185739197,103.86265452946917 22.44044433586035,103.86626286822447 22.438860045087207,103.86971892712153 22.436966439339738,103.8697619262848 22.436940371620874,103.87499772851952 22.433231868077502,103.87504273933955 22.433194903884516,103.87794915114509 22.430589223022718,103.88061906389949 22.42774170336153,103.88063800700066 22.42771965895619,103.88292315490409 22.424829161972955,103.88382397577487 22.423475411763608,103.8841747897293 22.423207827921026,103.88503648365645 22.422428778524665,103.88577546348789 22.421856509171075,103.88693817621048 22.420709476125527,103.88696262941289 22.42068736821461,103.88698719608549 22.42066111730444,103.88698919653457 22.42065914383227,103.8880546862887 22.419802981083816,103.89089663839695 22.41707834935245,103.89348982368453 22.414115968566424,103.89351781924113 22.4140810160302,103.89652601493798 22.409846169806478,103.89784883685529 22.407437666051532,103.89802640629081 22.40713589194763,103.89804836687712 22.40709691609788,103.89813635310244 22.406933094876674,103.898539907541 22.406580936646883,103.90059036795813 22.404395882738402,103.90230458429942 22.403571253682085,103.90233752517888 22.403554238418646,103.90427657398808 22.40242016591497,103.90486005430482 22.402097868621226,103.90486566904606 22.402107517907066,103.90488972510816 22.40209352012877,103.90513845045321 22.401944090811345,103.90864392023884 22.40000777283655,103.9086879252305 22.399979777279956,103.90881600049892 22.399897993219042,103.90884097857037 22.399881983783985,103.91361908719168 22.396354444564047,103.91393609076962 22.39608639130062,103.91760211240562 22.39261602788271,103.91839633546888 22.391674397385533,103.91868183111359 22.391354778821896,103.91894237119821 22.39109589717378,103.91932423170934 22.390635597428076,103.92159465317933 22.388093811370968,103.92452691839613 22.383955086884022,103.92697530094615 22.379512942968162,103.92698728706769 22.37948796489671,103.9273991155204 22.3786054134276,103.92752378607585 22.37833052707545,103.92769605112476 22.378007255672756,103.92770711523694 22.377983199610654,103.9284173046058 22.37621719195773,103.9284585922024 22.376132941861925,103.92848357027385 22.37607502291101,103.92924950747549 22.374182092861634,103.9292574702835 22.374161054284674,103.92932100156709 22.37396999449243,103.9293443619243 22.37391190495828,103.92963236411727 22.373080923077854,103.92961826125871 22.37307603530077,103.93041707977446 22.370673721180232,103.93128854670849 22.36710346464383,103.93129659333553 22.3670634829657,103.93192612771887 22.362970885620364,103.93217719088263 22.358837771315308,103.9320476613254 22.354699065157018,103.93204564966864 22.354673081257186,103.93159305145116 22.350901771392685,103.93105395248087 22.348293109175444,103.93150617864237 22.347646155459923,103.93507635850217 22.345176353679925,103.93509840290751 22.345159338416487,103.93564863301327 22.34469461397926,103.93623328323292 22.344556785546658,103.9362762823962 22.344544799425123,103.93688310175354 22.344358517270585,103.93941286548579 22.345132993568413,103.93946885659898 22.34514799717509,103.94105643749303 22.345542658312496,103.94111745774812 22.345556656090793,103.94250677553192 22.34585238177397,103.94256779578701 22.345864367895505,103.94275944494866 22.345901582113193,103.94297343493663 22.345942653438733,103.94489030277381 22.346267834677434,103.94494428223024 22.346275797485447,103.94904971588191 22.346690373740664,103.94910470116672 22.346693391225806,103.9532479070178 22.34672991642453,103.95330197029325 22.346727904767768,103.95732234470358 22.34639755926553,103.9604947082567 22.3458478833701,103.96084439469067 22.345821761305274,103.96185340819446 22.34568379517907,103.96508958854389 22.34512041398867,103.96512755856526 22.345112367361626,103.96890376209987 22.344139576810523,103.97258168163064 22.34284383666829,103.97613375430605 22.341234857395666,103.97628177871606 22.341159923181312,103.97778470548728 22.340363096908018,103.97780465441683 22.34035203279583,103.97799435829107 22.340246219981058,103.97884637874846 22.339768283862217,103.978845184811 22.339766155420566,103.97924021563601 22.339564998412367,103.9806427572906 22.338666747091576,103.98139446980521 22.3382824450497,103.98516102422911 22.335857081865893,103.98866803921415 22.333069580350863,103.9918808270046 22.329947511579167,103.99291243298403 22.328723301068603,103.99473934445116 22.32719398544592,103.99788827001164 22.323970910226326,104.00023326697156 22.321033323445864,104.00302888559528 22.318371278671023,104.00304891834386 22.318350323913094,104.00311091539155 22.318285343879392,104.00313094814013 22.31826430530243,104.00628129108487 22.31457720198442,104.00901698715025 22.310572784600442,104.00904791637295 22.31052182262916,104.01167724484701 22.305502606154864,104.01177229562897 22.305289538176243,104.01288701624395 22.302552843741157,104.0129069651735 22.302498864284733,104.01433964264793 22.297870559786276,104.01434668344659 22.297842564229683,104.01476233820983 22.296032946192962,104.01476837318012 22.29600394480799,104.01544518373788 22.291732418389003,104.01570923476756 22.28741567337303,104.01555809222442 22.283093501880497,104.01499314935323 22.278805746053553,104.0140196138451 22.27459193078783,104.01326803474753 22.272347281280073,104.01315356371451 22.271822766505757,104.01314954040099 22.27180918782262,104.01168545428597 22.26763307072755,104.01167740765892 22.267613289436063,104.01017671151646 22.264422451863485,104.00978164325437 22.262888377641765,104.00826491834022 22.258736213770955,104.00634851364231 22.25475270874284,104.00405088518548 22.250976225888436,104.00139416038778 22.247443134802072,103.99840392496132 22.244187461082298,103.99510897650791 22.241240558646762,103.99154104718234 22.23863080777686,103.98773449809444 22.236383341800106,103.9837259883928 22.234519805042442,103.97955412221714 22.233058144381562,103.97525907691929 22.232012436408652,103.97088221613338 22.231392751863137,103.9664656914214 22.23120505864594,103.96320455758762 22.23138689968393,103.96340391606167 22.231328246065555,103.96281428619781 22.231408661242405,103.96205203633068 22.231451164345348,103.95876935694542 22.231960318682297,103.95729664306927 22.23216117103741,103.9572626125424 22.232168211836075,103.95613443717251 22.232486968758714,103.9511273256902 22.2319720162529,103.95109832430522 22.23197101042452,103.95104604940407 22.231969227808285,103.95101604219072 22.231968221979905,103.94763192052211 22.23198198153244,103.94497248649226 22.232193081508644,103.944583972916 22.23216698614058,103.94455899484456 22.23216698614058,103.94281662061373 22.232200696889144,103.9427906367139 22.232201702717525,103.94272569895305 22.23220769718794,103.94167210787101 22.232066695532975,103.94164612397118 22.232063678047833,103.93812753926693 22.231794225937712,103.93810054953872 22.23179322010933,103.93642301671207 22.23176197257933,103.93639703281224 22.23176197257933,103.93472468641599 22.231793027125413,103.93469761286875 22.231794032953793,103.93302931532716 22.231887024118475,103.93297634169912 22.231890963612965,103.93116532007322 22.232062477754347,103.93095635922715 22.23208653381645,103.92851945018002 22.232434784584107,103.9284963999463 22.232438724078598,103.92831223886081 22.232470592237185,103.92828927244612 22.232474615550707,103.92383831744887 22.233489195819306,103.92353329999246 22.233575194145846,103.92329072112285 22.233644321938353,103.92326976636492 22.233650356908637,103.92290851048433 22.23375603509915,103.92288847773575 22.233761986250403,103.91831214702056 22.2353948619872,103.91809909617166 22.23549794763494,103.91570485617426 22.23586374063704,103.91141499967024 22.23695883073632,103.90725336973306 22.238469766676918,103.90326017059857 22.240381951768143,103.90108019892227 22.241703319361676,103.90000372697122 22.24221433467035,103.89658391991222 22.244224040937358,103.89656388716364 22.244237032887273,103.8956730398637 22.244829929136383,103.8956540129435 22.2448429210863,103.89490845979351 22.245434761724415,103.89360067691992 22.24620792430174,103.89358768497 22.246216892938133,103.8927490112939 22.246819418652986,103.89127223225756 22.24754411176589,103.89119319091066 22.24758711092916,103.89096054166373 22.247725554049836,103.89074206624336 22.24783474637851,103.89059796917842 22.24791176725419,103.88933337697729 22.248305375107446,103.88493894277283 22.250205323194756,103.88492494499454 22.250212280174388,103.88473194133894 22.250308781310153,103.88471794356064 22.250315822108817,103.88309013664245 22.251176731685952,103.88300908363878 22.251221742505983,103.87928398271339 22.253534482974235,103.8763295205557 22.255794446063437,103.87530742974477 22.25538640267949,103.87527046555178 22.255373410729575,103.87511449105271 22.255326117484014,103.87256535332341 22.254334458683584,103.87233635972876 22.2542594406502,103.87122330647932 22.2539374458993,103.8681114114629 22.252904773230753,103.86808140424955 22.25289681042274,103.86421771322175 22.25205117846613,103.86029473740663 22.251547727854454,103.8602827512851 22.251546722026074,103.8565159868449 22.251388953226776,103.85647499933839 22.251388953226776,103.85006729192104 22.251847034159386,103.85003929636444 22.25185105747291,103.84647914125915 22.25250969587357,103.84298338250834 22.253452128721346,103.84286536531168 22.253489092914332,103.84215162234561 22.25374417341315,103.84084592551385 22.253814300528962,103.83984226675565 22.25385855059541,103.83973276370291 22.253874086863227,103.83949375651775 22.253886923598483,103.8366090337914 22.254317280941176,103.83552331324962 22.254471322720732,103.83543823051247 22.254491947221346,103.83528801823374 22.254514356639252,103.83248358379768 22.255208169042877,103.831283883557 22.255498982657702,103.8312242098187 22.25551973606551,103.83116018372606 22.25553557601408,103.83113319399784 22.255543622641124,103.82818767531185 22.25657578585783,103.82716373580394 22.256931892828536,103.827139214999 22.256943185383843,103.82711311704315 22.25695233058132,103.82324408302824 22.258734478884293,103.82322306412378 22.25874668859128,103.82320150946934 22.25875661514663,103.82232243836621 22.259269854612004,103.8195606920154 22.2608741301243,103.81949864113396 22.26091850871593,103.81943436301694 22.26095603704134,103.81819177283784 22.261853176734853,103.81609588389557 22.26335214979233,103.81599904826696 22.2634363054949,103.81589762541661 22.26350953194269,103.81464760745372 22.264610784896238,103.81288064381295 22.26614637741252,103.81275631396252 22.266276994986846,103.81262446482303 22.266393152721054,103.81189686150032 22.267171515865446,103.81166409244148 22.26733352230073,103.81165080904836 22.26734481600663,103.81134748855808 22.26755192820677,103.80846748758196 22.26988776317706,103.80831443403005 22.27002271181812,103.8050101169737 22.27325206294446,103.80205398303931 22.276802906797396,103.79947713301391 22.280637885787257,103.79730667732815 22.28471665300569,103.79556545083373 22.288996296705953,103.79513240946605 22.290481018906068,103.79512095989831 22.29051224817694,103.79510392956624 22.29057866486478,103.79427177256345 22.29343179176728,103.79402020359359 22.2948050938811,103.79399808498273 22.294891354375995,103.79396758595367 22.29509233088087,103.79382437583371 22.29587410758635,103.79381928839129 22.29589490836811,103.79381537674733 22.295923233137703,103.79344189399279 22.29796205635412,103.79028828929236 22.299184391307907,103.79027035201958 22.29919235411592,103.78909044346044 22.299802500605473,103.78802984829072 22.3002558846857,103.78574183307626 22.301534111801093,103.78551544358619 22.301651180832895,103.78525623588168 22.30180539649244,103.78388307374216 22.302572529975414,103.78086851368587 22.30470183402036,103.77948343747113 22.305607720164772,103.77947044552121 22.305616772620198,103.7755228434859 22.308707414076473,103.7719431500168 22.312217602263413,103.77192311726822 22.312239562849722,103.76899320784389 22.315801823158854,103.76644293690528 22.319645023112592,103.76429904330753 22.323728867868006,103.764287057186 22.323754851767838,103.76246379147909 22.32835665164424,103.76205287351347 22.329857209540638,103.76162466033699 22.330640849144647,103.7616135962248 22.330663815559337,103.76121685821201 22.331509671447517,103.76120587791885 22.33153372750962,103.7605465432629 22.333229504433948,103.75795008871646 22.337074075493838,103.75793215144367 22.337104082707192,103.7567915368833 22.339371229237738,103.75552937476469 22.34174805505539,103.75515365850899 22.342626763931744,103.75499336450812 22.342945372864655,103.75497634924469 22.342986360371164,103.7548510655084 22.34333445531191)))"  # @param {type:"string"}

# @markdown Second, specify which years to download
download_2016 = False  # @param { type: "boolean" }
download_2017 = False  # @param { type: "boolean" }
download_2018 = False  # @param { type: "boolean" }
download_2019 = False  # @param { type: "boolean" }
download_2020 = False  # @param { type: "boolean" }
download_2021 = False  # @param { type: "boolean" }
download_2022 = False  # @param { type: "boolean" }
download_2023 = True  # @param { type: "boolean" }

_GCS_BUCKET = "open-buildings-temporal-data"

_DATASET_VERSION = 'v1'

_GCS_MANIFESTS_FOLDER = "manifests"

_LOCAL_DOWNLOAD_URL_FILE_PATH = "/tmp/downloadable_urls.txt"

_MAX_NUM_THREADS = 8

_MANIFEST_S2_LEVEL = 2


def get_years_as_list() -> list[int]:
  years_to_download = []
  for year in range(2016, 2024):
    should_download = globals()[f"download_{year}"]
    if should_download:
      years_to_download.append(year)
  return years_to_download


def get_region_geometry(
    region_border_source: str, region: str, your_own_wkt_polygon: str
) -> shapely.geometry.base.BaseGeometry:
  """Returns the shapely geometry of the requested region."""

  if your_own_wkt_polygon:
    region_df = gpd.GeoDataFrame(
        geometry=gpd.GeoSeries.from_wkt([your_own_wkt_polygon]), crs="EPSG:4326"
    )
    if not isinstance(
        region_df.iloc[0].geometry, shapely.geometry.polygon.Polygon
    ) and not isinstance(
        region_df.iloc[0].geometry, shapely.geometry.multipolygon.MultiPolygon
    ):
      raise ValueError(
          "`your_own_wkt_polygon` must be a POLYGON or MULTIPOLYGON."
      )
    print(f"Preparing your_own_wkt_polygon.")
    return region_df.iloc[0].geometry

  if not region:
    raise ValueError("Please select a region or set your_own_wkt_polygon.")

  if region_border_source == "Natural Earth (Low Res 110m)":
    url = (
        "https://naciscdn.org/naturalearth/"
        "110m/cultural/ne_110m_admin_0_countries.zip"
    )
    !wget -N {url}
    display.clear_output()
    region_shapefile_path = os.path.basename(url)
  elif region_border_source == "Natural Earth (High Res 10m)":
    url = (
        "https://naciscdn.org/naturalearth/"
        "10m/cultural/ne_10m_admin_0_countries.zip"
    )
    !wget -N {url}
    display.clear_output()
    region_shapefile_path = os.path.basename(url)
  elif region_border_source == "World Bank (High Res 10m)":
    url = (
        "https://datacatalogfiles.worldbank.org/ddh-published/"
        "0038272/DR0046659/wb_countries_admin0_10m.zip"
    )
    !wget -N {url}
    !unzip -o {os.path.basename(url)}
    display.clear_output()
    region_shapefile_path = "WB_countries_Admin0_10m"

  region_iso_a3 = region.split(" ")[0]
  region_df = (
      gpd.read_file(region_shapefile_path)
      .query(f'ISO_A3 == "{region_iso_a3}"')
      .dissolve(by="ISO_A3")[["geometry"]]
  )
  print(f"Preparing {region} from {region_border_source}.")
  return region_df.iloc[0].geometry


def get_bounding_box_s2_covering_tokens(
    region_geometry: shapely.geometry.base.BaseGeometry,
) -> list[str]:
  """Returns the s2_tokens of the bounding box of the provided geometry."""
  region_bounds = region_geometry.bounds
  s2_lat_lng_rect = s2.S2LatLngRect_FromPointPair(
      s2.S2LatLng_FromDegrees(region_bounds[1], region_bounds[0]),
      s2.S2LatLng_FromDegrees(region_bounds[3], region_bounds[2]),
  )
  coverer = s2.S2RegionCoverer()
  # NOTE: Should be kept in-sync with manifest s2 cell level.
  coverer.set_fixed_level(_MANIFEST_S2_LEVEL)
  coverer.set_max_cells(1000000)
  return [cell.ToToken() for cell in coverer.GetCovering(s2_lat_lng_rect)]


def get_matching_manifest_blobs(s2_token: str) -> list[storage.Blob]:
  """Returns a list of manifest blobs for the given s2_token."""
  matching_manifest_blobs = []
  token_manifest_blobs = list(
      storage_client.list_blobs(
          _GCS_BUCKET,
          prefix=os.path.join(_DATASET_VERSION, _GCS_MANIFESTS_FOLDER, f'{s2_token}_'),
      )
  )
  for year in get_years_as_list():
    filtered_token_manifests = [
        blob for blob in token_manifest_blobs if f'_{str(year)}_' in blob.name
    ]
    matching_manifest_blobs.extend(filtered_token_manifests)
  return matching_manifest_blobs


def multithreaded_fn(progress_bar_desc: str, fn: Callable, items: Iterable[Any]):
  """Run `fn` on `items` using multithreading and display a progress bar."""
  total_num_items = len(items)
  fn_results = []
  with tqdm.notebook.tqdm(
      total=len(items), desc=progress_bar_desc
  ) as pbar:
    with ThreadPool(processes=_MAX_NUM_THREADS) as pool:
      for result in pool.map(fn, items):
        fn_results.extend(result)
        pbar.update(1)
  return fn_results



def multithreaded_get_matching_manifest_blobs(
    s2_tokens: list[str],
) -> list[storage.Blob]:
  """Returns a list of manifest blobs for the given s2_tokens."""
  return multithreaded_fn("Fetching matching manifests",
                          get_matching_manifest_blobs, s2_tokens)


def extract_tile_polygons(
    manifest_bytes: bytes,
) -> list[str]:
  """Extracts GeoTIFF urls from a manifest."""
  tile_polys = []
  manifest = json.loads(manifest_bytes)
  crs = None
  for tileset in manifest["tilesets"]:
    for source in tileset["sources"]:
      # All tiles in a manifest should have the same projection
      if crs is None:
        crs = tileset["crs"]
      affine_transform = source["affineTransform"]
      transform = Affine.translation(
          affine_transform["translateX"], affine_transform["translateY"]
      ) * Affine.scale(affine_transform["scaleX"], affine_transform["scaleY"])
      dimensions = source["dimensions"]
      width = dimensions["width"]
      height = dimensions["height"]

      corners = [(0, 0), (width, 0), (width, height), (0, height)]
      corners = [transform * corner for corner in corners]

      uri = source["uris"][0]
      object_path = manifest["uriPrefix"] + uri
      tile_polys.append((object_path, Polygon(corners)))

  return tile_polys, crs


def extract_geotiff_urls(
    manifest_blob: storage.Blob,
    region_geometry: shapely.geometry.base.BaseGeometry,
) -> list[str]:
  """Extracts GeoTIFF urls from a manifest intersecting `region_geometry`."""
  manifest_bytes = manifest_blob.download_as_bytes()
  tile_polys, crs = extract_tile_polygons(manifest_bytes)
  # EPSG:4326 is the standard WGS84 lat/lon coordinate system. We transform
  # region_geometry from EPSG:4326 to manifest's projection before doing
  # intersection check.
  transformer = pyproj.Transformer.from_crs("epsg:4326", crs, always_xy=True)
  region_geometry = transform(transformer.transform, region_geometry)
  geotiff_urls = []
  for (url, poly) in tile_polys:
    if poly.intersects(region_geometry):
      geotiff_urls.append(url)

  return geotiff_urls


def multithreaded_extract_geotiff_urls(
    manifest_blobs: list[storage.Blob],
    region_geometry: shapely.geometry.base.BaseGeometry,
) -> list[str]:
  """Extracts GeoTIFF urls from manifests."""
  return multithreaded_fn(
      "Extracting urls",
      lambda manifest_blob: extract_geotiff_urls(
          manifest_blob, region_geometry
      ),
      manifest_blobs,
  )


def write_to_file(filename: str, urls: list[str]) -> None:
  """Writes urls to file."""
  with open(filename, "w") as f:
    for url in urls:
      f.write(f"{url}\n")


# Clear output after pip install.
display.clear_output()
storage_client = storage.Client(credentials=credentials.AnonymousCredentials())
geometry = get_region_geometry(
    region_border_source, region, your_own_wkt_polygon
)
s2_tokens = get_bounding_box_s2_covering_tokens(geometry)

region_manifest_blobs = multithreaded_get_matching_manifest_blobs(s2_tokens)

geotiff_urls = multithreaded_extract_geotiff_urls(
    region_manifest_blobs, geometry
)

write_to_file(_LOCAL_DOWNLOAD_URL_FILE_PATH, geotiff_urls)

print(f"Finished writing urls to file. File contains {len(geotiff_urls)} urls")

Preparing your_own_wkt_polygon.


Fetching matching manifests:   0%|          | 0/2 [00:00<?, ?it/s]

Extracting urls:   0%|          | 0/7 [00:00<?, ?it/s]

Finished writing urls to file. File contains 9 urls


In [8]:
# @title Download text file with urls

from google.colab import files

files.download(_LOCAL_DOWNLOAD_URL_FILE_PATH)

file_name = os.path.basename(_LOCAL_DOWNLOAD_URL_FILE_PATH)
print(f"""
Download the GeoTIFFs using the gsutil command like so:

cat {file_name} | gsutil -m cp -I /my/target


Alternatively, GeoTIFFs can also be loaded in Earth Engine directly. Example: https://code.earthengine.google.com/ca39918365011141c154e9fa26b2c563

""")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


Download the GeoTIFFs using the gsutil command like so:

cat downloadable_urls.txt | gsutil -m cp -I /my/target


Alternatively, GeoTIFFs can also be loaded in Earth Engine directly. Example: https://code.earthengine.google.com/ca39918365011141c154e9fa26b2c563


