# Create a Downhole Collection object

This notebook shows how to create a Downhole Collection object using made up data.

Executing the cell below will create a ServiceManagerWidget and open a browser window for you to login.

Once logged in, a widget will be displayed below allowing you to select an organisation and workspace to work with.

__Required:__ You must provide credentials via `EVO_CLIENT_ID`, `EVO_BASE_URI`, `EVO_DISCOVERY_URL` below.

References:

- https://developer.seequent.com/docs/data-structures/geoscience-objects/schemas/downhole-collection
- https://github.com/SeequentEvo/evo-schemas/blob/main/schema/objects/downhole-collection/1.3.1/downhole-collection.schema.json

In [None]:
import os
import pyarrow as pa

from evo.notebooks import ServiceManagerWidget
from evo.objects import ObjectAPIClient
from evo.objects import ObjectMetadata

# Credentials can be provided from .env or filled into second params below.
client_id = os.getenv("EVO_CLIENT_ID", "")
base_uri = os.getenv("EVO_BASE_URI", "")
discovery_url = os.getenv("EVO_DISCOVERY_URL", "")

manager = await ServiceManagerWidget.with_auth_code(
    client_id=client_id, base_uri=base_uri, discovery_url=discovery_url
).login()

In [None]:
## Helper functions.

# Save a 1D vector (single column) to cache and return a packed reference dict (width=1)
def save_vector(array, pa_type) -> dict:
    col = pa.array(array, type=pa_type)
    table = pa.table({"c0": col})  # name the single column
    ref = data_client.save_table(table)
    ref["width"] = 1
    return ref


# Save a 2D matrix (multi-column) to cache and return a packed reference dict with width set
def save_matrix(columns: list[pa.Array], width: int) -> dict:
    names = [f"c{i}" for i in range(width)]
    table = pa.table(columns, names=names)  # provide names to avoid ValueError
    ref = data_client.save_table(table)
    ref["width"] = width
    return ref


# Sanity check that data is a reference string.
def assert_packed_refs(obj, path="root"):
    if isinstance(obj, dict):
        if (
            "length" in obj
            and "width" in obj
            and ("data_type" in obj or "keys_data_type" in obj or "values_data_type" in obj)
        ):
            assert isinstance(obj.get("data"), str), f"'data' must be a string ref at {path}"
        for k, v in obj.items():
            assert_packed_refs(v, f"{path}.{k}")
    elif isinstance(obj, list):
        for i, v in enumerate(obj):
            assert_packed_refs(v, f"{path}[{i}]")


# Display objects in Evo API.
def display_objects(objects: list[ObjectMetadata]):
    n_objects = len(objects)
    print(f"Found {n_objects} object{'' if n_objects == 1 else 's'}")
    for object in objects:
        print(f"{object.path}: <{object.schema_id}> ({object.id})")

In [None]:
environment = manager.get_environment()
connector = manager.get_connector()

object_client = ObjectAPIClient(environment, connector)
service_health = await object_client.get_service_health()
data_client = object_client.get_data_client(manager.cache)

all_objects = await object_client.list_all_objects(limit_per_request=50)
display_objects(all_objects)

In [None]:
## Create a bare minimum Downhole Collection object.

minimal_downhole_collection = {
    "name": "Sample downhole collection (minimal)",
    "uuid": None,
    "schema": "/objects/downhole-collection/1.3.1/downhole-collection.schema.json",
    "type": "downhole",
    "distance_unit": "m",
    "desurvey": "balanced_tangent",
    "bounding_box": {
        "min_x": 0.0,
        "max_x": 1.0,
        "min_y": 0.0,
        "max_y": 1.0,
        "min_z": 0.0,
        "max_z": 0.0,
    },
    "coordinate_reference_system": {"epsg_code": 32650},
    "location": {
        "hole_id": {
            "values": {
                "data_type": "int32",
                "length": 2,
                "width": 1,
                # "data": [1, 2],
                "data": [],
            },
            "table": {
                "keys_data_type": "int32",
                "values_data_type": "string",
                "length": 2,
                # Flattened key/value pairs: [key1, value1, key2, value2]
                # "data": [1, "ABC-001", 2, "ABC-002"],
                "data": [],
            },
        },
        "coordinates": {
            "data_type": "float64",
            "length": 2,  # two holes
            "width": 3,  # x, y, z
            # "data": [1000.0, 2000.0, 50.0, 1010.0, 2010.0, 48.0],
            "data": [],
        },
        # path: [distance, azimuth, dip]
        "path": {
            "data_type": "float64",
            "length": 2,  # one row per hole
            "width": 3,
            "data": [0.0, 0.0, 90.0, 0.0, 90.0, 90.0],
            "attributes": [],
        },
        # holes rows [hole_index, offset, count]
        "holes": {
            "data_type": "int32/uint64/uint64",
            "length": 2,
            "width": 3,
            "data": [1, 0, 1, 2, 1, 1],
        },
        "attributes": [],
    },
    "collections": [
        {
            "name": "Minimal distance collection",
            "collection_type": "distance",
            "distance": {
                "values": {
                    "data_type": "float64",
                    "length": 2,
                    "width": 1,
                    "data": [1.0, 2.0],
                },
                "attributes": [],
                "unit": "m",
            },
            "holes": {
                "data_type": "int32/uint64/uint64",
                "length": 2,
                "width": 3,
                "data": [1, 0, 1, 2, 1, 1],
            },
        }
    ],
}

# location.hole_id.values (1D, int32)
minimal_downhole_collection["location"]["hole_id"]["values"] = save_vector([1, 2], pa.int32())

# location.hole_id.table (2 columns: keys int32, values string), keys then values.
hole_id_keys = pa.array([1, 2], type=pa.int32())
hole_id_vals = pa.array(["ABC-001", "ABC-002"], type=pa.string())
hole_id_table = pa.table({"keys": hole_id_keys, "values": hole_id_vals})
minimal_downhole_collection["location"]["hole_id"]["table"] = data_client.save_table(hole_id_table)

# location.coordinates (2 rows x 3 cols float64) in row-major order
coords_x = pa.array([1000.0, 1010.0], type=pa.float64())
coords_y = pa.array([2000.0, 2010.0], type=pa.float64())
coords_z = pa.array([50.0, 48.0], type=pa.float64())
minimal_downhole_collection["location"]["coordinates"] = save_matrix([coords_x, coords_y, coords_z], width=3)

# location.distances (columns: final, target, current), one row per hole
dist_final = pa.array([10.0, 12.0], type=pa.float64())
dist_target = pa.array([10.0, 12.0], type=pa.float64())
dist_current = pa.array([10.0, 12.0], type=pa.float64())
minimal_downhole_collection["location"]["distances"] = save_matrix([dist_final, dist_target, dist_current], width=3)

# location.path (2 rows x 3 cols float64): distance, azimuth, dip
path_distance = pa.array([0.0, 0.0], type=pa.float64())
path_azimuth = pa.array([0.0, 90.0], type=pa.float64())
path_dip = pa.array([90.0, 90.0], type=pa.float64())
minimal_downhole_collection["location"]["path"] = save_matrix([path_distance, path_azimuth, path_dip], width=3)

# location.holes (2 rows x 3 cols: int32, uint64, uint64) rows = [hole_index, offset, count]
holes_idx = pa.array([1, 2], type=pa.int32())
holes_offset = pa.array([0, 1], type=pa.uint64())
holes_count = pa.array([1, 1], type=pa.uint64())
minimal_downhole_collection["location"]["holes"] = save_matrix([holes_idx, holes_offset, holes_count], width=3)

# location.collections[0].distance.values (1D float64)
minimal_downhole_collection["collections"][0]["distance"]["values"] = save_vector([1.0, 2.0], pa.float64())

# location.collections[0].holes (same structure as location.holes)
minimal_downhole_collection["collections"][0]["holes"] = save_matrix([holes_idx, holes_offset, holes_count], width=3)

assert_packed_refs(minimal_downhole_collection)

print(minimal_downhole_collection)

In [None]:
import time

from evo.notebooks import FeedbackWidget

downhole_collection_path = f"test/sample-downhole-collection.{time.time()}.json"

# Upload the referenced blobs saved to the cache.
await data_client.upload_referenced_data(minimal_downhole_collection, fb=FeedbackWidget("Uploading data"))

# Create our Downhole Collection object.
new_downhole_collection = await object_client.create_geoscience_object(
    downhole_collection_path, minimal_downhole_collection
)

print(f"{new_downhole_collection.path}: <{new_downhole_collection.schema_id}>")
print(f"\tCreated at: {new_downhole_collection.created_at}")

In [None]:
environment = manager.get_environment()
connector = manager.get_connector()

object_client = ObjectAPIClient(environment, connector)
service_health = await object_client.get_service_health()
data_client = object_client.get_data_client(manager.cache)

all_objects = await object_client.list_all_objects(limit_per_request=50)
display_objects(all_objects)

In [None]:
object = await object_client.download_object_by_path(path="/test/sample-downhole-collection.1758832895.8249798.json")
print(object.as_dict())