-
Notifications
You must be signed in to change notification settings - Fork 0
/
gen_test_urls.py
189 lines (168 loc) · 6.21 KB
/
gen_test_urls.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# -*- coding: utf-8 -*-
import argparse
import boto3
import json
import fsspec
import math
import random
import click
import morecantile
import numpy as np
import xarray as xr
import zarr
import csv
import os
import sys
sys.path.append('..')
import helpers.zarr_helpers as zarr_helpers
from titiler_xarray.titiler.xarray.reader import xarray_open_dataset, get_variable
def get_arguments():
parser = argparse.ArgumentParser(description="Set environment for the script.")
parser.add_argument("--env", default="dev", help="Environment to run the script in. Options are 'dev' and 'prod'. Default is 'dev'.")
parser.add_argument("--numurls", default=10, help="Number of URLs to generate", type=int)
parser.add_argument("--datasets", default="external", help="Which tests datasets to load", type=str)
args = parser.parse_args()
return args
def _percentage_split(size, percentages):
"""Freely copied from TileSiege https://github.com/bdon/TileSiege"""
prv = 0
cumsum = 0.0
for zoom, p in percentages.items():
cumsum += p
nxt = int(cumsum * size)
yield zoom, prv, nxt
prv = nxt
tms = morecantile.tms.get("WebMercatorQuad")
# ##########################################
# INPUTS
minzoom = 0
maxzoom = 6
default_bounds = [-180, -90, 180, 90]
""
random.seed(3857)
distribution = [
2,
2,
6,
12,
16,
27,
38,
41,
49,
56,
72,
71,
99,
135,
135,
136,
102,
66,
37,
6,
2
] # the total distribution...
def generate_extremas(bounds: list[float]):
w, s, e, n = bounds
extremas = {}
total_weight = 0
for zoom in range(minzoom, maxzoom + 1):
total_weight = total_weight + distribution[zoom]
ul_tile = tms.tile(w, n, zoom, truncate=True)
lr_tile = tms.tile(e, s, zoom, truncate=True)
minmax = tms.minmax(zoom)
extremas[zoom] = {
"x": {
"min": max(ul_tile.x, minmax["x"]["min"]),
"max": min(lr_tile.x, minmax["x"]["max"]),
},
"y": {
"min": max(ul_tile.y, minmax["y"]["min"]),
"max": min(lr_tile.y, minmax["y"]["max"]),
},
}
return extremas, total_weight
def main(args=None):
args = get_arguments()
# Step 2: Merge the dictionaries
sources = json.loads(open(f'../01-generate-datasets/{args.datasets}-datasets.json', 'r').read())
# remove pyramids and https dataset for now
sources = list(filter(lambda x: 'pyramid' not in x[0], sources.items()))
HOST = f"https://{args.env}-titiler-xarray.delta-backend.com"
numurls = args.numurls
print(f"Running script for HOST: {HOST}")
# Prepare the CSV file
csv_file = "zarr_info.csv"
for idx, dataset in enumerate(sources):
key, value = dataset
collection_name = key
source = value['dataset_url']
variable = value["variable"]
reference = value.get("extra_args", {}).get("reference", False)
multiscale = value.get("extra_args", {}).get("multiscale", False)
consolidated = value.get("extra_args", {}).get("consolidated", True)
drop_dim = value.get("extra_args", {}).get("drop_dim", None)
# some datasets will only be accessible via a special role the titiler-xarray lambda has
protected = value.get("extra_args", {}).get("protected", False)
bounds = value.get("extra_args", {}).get("bounds", default_bounds)
array_specs = {
'collection_name': collection_name,
'source': source,
'chunks': 'N/A',
'shape_dict': 'N/A',
'dtype': 'N/A',
'chunk_size_mb': 'N/A',
'compression': 'N/A',
'number_of_spatial_chunks': 'N/A',
'number_coordinate_chunks': 'N/A'
}
if not protected:
ds = xarray_open_dataset(source, reference=reference, consolidated=consolidated)
if not multiscale:
da = get_variable(ds, variable=variable, drop_dim=drop_dim)
lat_extent, lon_extent = zarr_helpers.get_lat_lon_extents(da)
bounds = [lon_extent[0], lat_extent[0], lon_extent[1], lat_extent[1]]
array_specs.update(zarr_helpers.get_array_chunk_information(da, multiscale=multiscale))
mode = "w" if idx == 0 else "a"
with open(csv_file, mode, newline="") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=array_specs.keys())
if idx == 0:
writer.writeheader()
writer.writerow(array_specs)
csvfile.close()
with open(f"urls/{collection_name}_urls.txt", "w") as f:
f.write(f"HOST={HOST}\n")
f.write("PATH=tiles/\n")
f.write("EXT=.png\n")
query_string = f"QUERYSTRING=?reference={reference}&variable={variable}&url={source}&consolidated={consolidated}"
if protected:
query_string += "&anon=false"
if drop_dim:
query_string += f"&drop_dim={drop_dim}"
f.write(f"{query_string}\n")
rows = 0
extremas, total_weight = generate_extremas(bounds=bounds)
for zoom, start, end in _percentage_split(
numurls,
{
zoom: distribution[zoom] / total_weight
for zoom in range(minzoom, maxzoom + 1)
},
):
extrema = extremas[zoom]
rows_for_zoom = end - start
rows += rows_for_zoom
for sample in range(rows_for_zoom):
x = random.randint(extrema["x"]["min"], extrema["x"]["max"])
y = random.randint(extrema["y"]["min"], extrema["y"]["max"])
f.write(
f"$(HOST)/$(PATH){zoom}/{x}/{y}$(EXT)$(QUERYSTRING)\n"
)
if not "quiet":
p1 = " " if zoom < 10 else ""
p2 = " " * (len(str(10000)) - len(str(rows_for_zoom)))
bar = "█" * math.ceil(rows_for_zoom / max_url * 60)
click.echo(f"{p1}{zoom} | {p2}{rows_for_zoom} {bar}", err=True)
if __name__ == "__main__":
main()