<a href="https://colab.research.google.com/github/chadbr/ecimtest/blob/main/ecim_both.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Init

In [None]:
# google-api-core==2.19.1
# google-auth==2.33.0
# google-cloud-core==2.4.1
# google-cloud-storage==2.18.2
# google-crc32c==1.5.0
# google-resumable-media==2.7.2
# googleapis-common-protos==1.63.2

!pip install osdu-api==0.26.0 \
  --extra-index-url https://community.opengroup.org/api/v4/projects/148/packages/pypi/simple
!pip install requests==2.32.3
!pip install tenacity==9.0.0
!pip install urllib3==2.2.2



### Required

In [None]:
import pprint
import requests

import io
import os
import pprint
import urllib3
import json

from pprint import pprint

from IPython.display import display
from PIL import Image

from osdu_api.auth.refresh_token import BaseTokenRefresher

from osdu_api.clients.storage.record_client import RecordClient
from osdu_api.clients.search.search_client import SearchClient
from osdu_api.clients.schema.schema_client import SchemaClient
from osdu_api.clients.dataset.dataset_dms_client import DatasetDmsClient
from osdu_api.clients.dataset.dataset_registry_client import DatasetRegistryClient

from osdu_api.model.storage.legal import Legal
from osdu_api.model.storage.record import Record
from osdu_api.model.storage.record_ancestry import RecordAncestry
from osdu_api.model.storage.acl import Acl
from osdu_api.model.search.query_request import QueryRequest
from osdu_api.model.storage.query_records_request import QueryRecordsRequest
from osdu_api.model.dataset.create_dataset_registries_request import CreateDatasetRegistriesRequest
from google.colab import userdata


urllib3.disable_warnings()
print("\ndone")

#### Option 1: Credentials for Cimpl


Credentials for remote Cimpl

In [None]:
# %%script echo skip the optional part

# CLOUD_PROVIDER = "baremetal"
# os.environ["KEYCLOAK_AUTH_URL"] = "https://keycloak.ecim24.com/realms/osdu/protocol/openid-connect/token"
# os.environ["KEYCLOAK_CLIENT_ID"] = "datafier"
# os.environ["KEYCLOAK_CLIENT_SECRET"] = userdata.get("datafierpassword")
# os.environ["CLOUD_PROVIDER"] = CLOUD_PROVIDER

# schema = 'https'
# BASE_URL = f"{schema}://osdu.ecim24.com"

# print("\ndone")

Credentials for local Cimpl

In [None]:
# %%script echo skip the optional part

# CLOUD_PROVIDER = "baremetal"
# os.environ["KEYCLOAK_AUTH_URL"] = "http://keycloak.localhost/realms/osdu/protocol/openid-connect/token"
# os.environ["KEYCLOAK_CLIENT_ID"] = "osdu-admin"
# os.environ["KEYCLOAK_CLIENT_SECRET"] = ""
# os.environ["CLOUD_PROVIDER"] = CLOUD_PROVIDER

# schema = 'http'
# BASE_URL = f"{schema}://osdu.localhost"

# print("\ndone")

#### Option 2: Credentials for GC

In [None]:
# %%script echo skip the optional part

!gcloud auth application-default login --no-launch-browser


CLOUD_PROVIDER = "gc"

os.environ["CLOUD_PROVIDER"] = CLOUD_PROVIDER

schema = 'https'
BASE_URL = f"{schema}://gcp.ecim24.com"


#### Loading the config

In [None]:
config_url = BASE_URL + "/api/config/v1/postman-environment"
config_response = requests.get(config_url).json()
config = {}
for entity in config_response['values']:
    config[entity['key']] = entity['value']
config['data-partition-id'] = "osdu"

# pprint(config_response)
print(config['SEISMICDMS_HOST'])
print(f"data partition is {config['data-partition-id']}")
print("\ndone")

In [None]:
token_refresher = BaseTokenRefresher()
token_refresher.refresh_token()
#print("\ndone")

#### OSDU clients and default values

In [None]:
search_client = SearchClient(
    search_url=f"{schema}://{config['SEARCH_HOST']}",
    provider=CLOUD_PROVIDER,
    token_refresher=token_refresher,
    data_partition_id=config['data-partition-id']
)

schema_client = SchemaClient(
    schema_url=f"{schema}://{config['SCHEMA_HOST']}/api/schema-service/v1",
    provider=CLOUD_PROVIDER,
    token_refresher=token_refresher,
    data_partition_id=config['data-partition-id']
)

dataset_client = DatasetDmsClient(
    dataset_url=f"{schema}://{config['DATASET_HOST']}",
    provider=CLOUD_PROVIDER,
    token_refresher=token_refresher,
    data_partition_id=config['data-partition-id']
)
dataset_registry_client = DatasetRegistryClient(
    dataset_url=f"{schema}://{config['DATASET_HOST']}",
    provider=CLOUD_PROVIDER,
    token_refresher=token_refresher,
    data_partition_id=config['data-partition-id']
)

storage_client = record_client = RecordClient(
    storage_url=f"{schema}://{config['STORAGE_HOST']}",
    provider=CLOUD_PROVIDER,
    token_refresher=token_refresher,
    data_partition_id=config['data-partition-id']
)

query_request = QueryRequest(
    kind="*:*:*:*",
    query="",
    limit=1,
    returned_fields=["*"]
)
response = search_client.query_records(query_request).json()
pprint(response)

default_acl = response['results'][0]['acl']
default_legal_tags = ["osdu-default-data-tag"]

pprint(response)
pprint(default_acl)
pprint(default_legal_tags)

urllib3.disable_warnings()

print("\ndone")

# Slot 1 - Using Core APIs

#### All to register and make successful connection to workbook and both OSDU instances

In [None]:
search_response = requests.get(f"{schema}://{config['SEARCH_HOST']}/info")

pprint(search_response.json())

print("\ndone")

#### All able to execute and understand the result on some of the core APIs

In [None]:
# List all records
query_request = QueryRequest(
    kind= "*:*:*:*",
    query="",
    limit=2,
    returned_fields=["*"]
)
response = search_client.query_records(query_request)

pprint(response.json())

print("\ndone")

In [None]:
# List a specific subset of records based on schema type/kind
query_request = QueryRequest(
    kind="osdu:wks:master-data--Agreement:1.0.0",
    query="",
    limit=2,
    returned_fields=["*"]
)
response = search_client.query_records(query_request)

pprint(response.json())

print("\ndone")

In [None]:
# Find the description of specific schema/kind to show that schemas are a part of the platform
response = schema_client.get_schema_by_id('osdu:wks:master-data--Agreement:1.0.0').json()
# String representation of the response for pretty printing
schema_json = json.dumps(response, indent=4)
print(schema_json)
print("\ndone")

#### Show that the platform is generic and/extendable and can store any data type in addition to defined "well known" schemas






In [None]:
response = dataset_client.storage_instructions(kind_sub_type='dataset--File.Generic').json()

pprint(response)

In [None]:
# # get the file from Google Drive - OPTION #1

# from google.colab import drive
# drive.mount('/content/drive')

# with open('/content/drive/MyDrive/image.jpg', 'rb') as file_content:
#     file_content = file_content.read()


In [None]:
# get a file from the bucket

from google.cloud import storage as google_storage

gs_client = google_storage.Client.create_anonymous_client()
bucket = gs_client.bucket("ecim-demo-2024")
blob = bucket.get_blob("image.jpg")
file_content = blob.download_as_bytes()

In [None]:
# All to upload an image

def upload_file_and_metadata(file_content: bytes) -> str:
    storage_instruction = dataset_client.storage_instructions(kind_sub_type="dataset--File.Generic")
    storage_location = storage_instruction.json()['storageLocation']
    #### Uploading

    new_file = dataset_client.put_file(url=storage_location['signedUrl'], data=file_content, no_auth=True)

    record_list = [
                    Record( kind='osdu:wks:dataset--File.Generic:1.0.0',
                            acl=Acl(owners=default_acl['owners'], viewers=default_acl['viewers']),
                            legal=Legal(legaltags=default_legal_tags, other_relevant_data_countries=["US"], status="compliant"),
                            data =   {
                                        "DatasetProperties": {
                                            "FileSourceInfo": {
                                                "FileSource": storage_location['fileSource'],
                                                "Name": "image.jpg"
                                            }
                                        },
                                        "Name": "Dataset with images",
                                        "ResourceSecurityClassification": f"{config['data-partition-id']}:reference-data--ResourceSecurityClassification:RESTRICTED:",
                                        "SchemaFormatTypeID": f"{config['data-partition-id']}:reference-data--SchemaFormatType:TabSeparatedColumnarText:"
                                    },
                            ancestry=RecordAncestry(parents=[]))
                ]



    response = dataset_registry_client.register_dataset(CreateDatasetRegistriesRequest(dataset_registries=record_list))
    response.raise_for_status()
    registered_dataset = response.json()

    pprint(storage_location)
    pprint(registered_dataset)
    registered_dataset_id = registered_dataset["datasetRegistries"][0]["id"]
    return registered_dataset_id


dataset_id = upload_file_and_metadata(file_content)


In [None]:
# Use dataset id to download the file
# Get retrieval instructions. Usually, we need a signed url from them

retrieval_instructions = dataset_client.retrieval_instructions(dataset_id)
retrieval_instructions.raise_for_status()
retrieval_instructions = retrieval_instructions.json()
signed_url = retrieval_instructions["datasets"][0]["retrievalProperties"]["signedUrl"]

# Get the file's content with the signed url and display it
response = requests.get(signed_url)
response.raise_for_status()
image_file = io.BytesIO(response.content)

display(Image.open(image_file))



In [None]:
# All to find there image and probably the other attendances images
query_request = QueryRequest(
    kind="*:*:*:*",
    query="data.Name:\"Dataset with images\"",
    limit=10,
    returned_fields=["*"],
)
response = search_client.query_records(query_request)

pprint(response.json())

# Slot 2 - Investigating well data

#### All to search/list wells/wellbores/wellogs/trajectories/... that is uploaded in the **instances**

In [None]:
# Wells
query_request = QueryRequest(
    kind="osdu:wks:master-data--Well:1.0.0", # "*:*:master-data--Well:*",
    query="",
    limit=5,
    returned_fields=["*"]
)
response = search_client.query_records(query_request)

pprint(response.json())

In [None]:
# Welllogs

query_request = QueryRequest(
    kind="osdu:wks:work-product-component--WellLog:*.*.*",
    query="",
    limit=5,
    returned_fields=["*"]
)
response = search_client.query_records(query_request)

pprint(response.json())

In [None]:
# Wellbores

query_request = QueryRequest(
    kind="osdu:wks:master-data--Wellbore:1.0.0",
    query="",
    limit=5,
    returned_fields=["*"]
)
response = search_client.query_records(query_request)

pprint(response.json())

In [None]:
# Trajectories

query_request = QueryRequest(
    kind="osdu:wks:work-product-component--WellboreTrajectory:*.*.*",
    query="",
    limit=5,
    returned_fields=["*"]
)
response = search_client.query_records(query_request)

pprint(response.json())

#### For one of the few well/wellobres that we have prepped a bit

In [None]:
# Use lineage to find welllogs/trajectories/... that belongs to a specific well/wellbore
# Welllogs

query_request = QueryRequest(
    kind="osdu:wks:work-product-component--WellLog:*.*.*",
    query="data.WellboreID:\"osdu:master-data--Wellbore:7233:\"",
    limit=5,
    returned_fields=["*"]
)
response = search_client.query_records(query_request)

pprint(response.json())


In [None]:
# By usage show that the platform can handle and store versions for some of these data
record = Record(
    kind='osdu:wks:dataset--File.Generic:1.0.0',
    acl=Acl(owners=default_acl['owners'], viewers=default_acl['viewers']),
    legal=Legal(legaltags=default_legal_tags, other_relevant_data_countries=["US"], status="compliant"),
    data= {
        "msg": "Initial Message from AutoTest while testing update",
        "weight": 777.0
    })
# create
response = storage_client.create_update_records(records=[record])
stored_record = response.json()
recordId = stored_record['recordIds'][0]
pprint("the new record")
pprint(stored_record)
print("\n")

# get versions
versions = storage_client.get_record_versions(recordId=recordId).json()
pprint("current versions")
pprint(versions)
print("\n")


# update
pprint("updating the record")
record.data["weight"] = 778
record.id = recordId
response = storage_client.create_update_records(records=[])

# get versions
versions = storage_client.get_record_versions(recordId=recordId).json()
pprint("current versions")
pprint(versions)
print("\n")

# update
pprint("updating the record")
record.data["weight"] = 779
record.id = recordId
response = storage_client.create_update_records(records=[record])

# get versions
versions = storage_client.get_record_versions(recordId=recordId).json()
pprint("current versions")
pprint(versions)
checking_version = versions["versions"][-1]
pprint("current version")
pprint(checking_version)
print("\n")

# get specific version
version = storage_client.get_specific_record(recordId=recordId, version=checking_version).json()
pprint("specific version")
pprint(version)

In [None]:
# By usage show the difference between index/Storage/File APIs, for a data object
# In practice the user seeing the indexed data, the stored data and the payload data (CSV, LAS, DLIS, Parquet)

# upload file and metadata using dataset
upload_file_and_metadata(image_file)

# 10 sec for indexing
# search
query_request = QueryRequest(
    kind= "*:*:*:*",
    query="data.Name:\"Dataset with images\"",
    limit=2,
    returned_fields=["*"]
)
images_with_metadata = search_client.query_records(query_request).json()
pprint(images_with_metadata)

# get record from storage
record_id = images_with_metadata['results'][0]['id']
images_with_metadata = storage_client.query_record(recordId=record_id)
pprint(images_with_metadata.json())

# download using File Service
file_host = config["FILE_HOST"]
url = f'{file_host}/files/{record_id}/downloadURL'
downloadUrlObject = requests.get(url, headers={'Data-Partition-Id': config['data-partition-id'], 'Authorization': f'Bearer {token_refresher.access_token}'}).json()
pprint(downloadUrlObject)

file_response = requests.get(downloadUrlObject['SignedUrl'], headers={'Authorization': f'Bearer {token_refresher.access_token}'}, allow_redirects=True)
file_response.raise_for_status()
file_content = file_response.content
file_object = io.BytesIO(file_content)

print("\n", 24 * "*")
print("\nDisplay image:\n")
display(Image.open(file_object))



# Slot 3 - Wellbore DDMS

For one of the few well/wellbores that we have prepped a bit

In [None]:
# By usage see that the DDMS is an added capability to Index/Storage/File
# By usage see that same data can be extracted through both core and DDMS
# Show examples on using some of the DDMS APIs. To be determined
# Get basic well info to show that it's the same info as you get through the core APIs like (well header, wellbore header, ...)
# Demonstrate use of some added functionality in the DDMS
# Most likely related to logs and extraction of 1 specific curve, a specific curve interval, etc.

# Create a new well

url = f"{schema}://{config['WELLBORE_DDMS_HOST']}/ddms/v3/wells"
headers = {'data-partition-id': config['data-partition-id'], 'Authorization': f'Bearer {token_refresher.access_token}'}
body = [{
        "acl": default_acl,
        "data": {
            "FacilityName": "Faciliity_AutoTest_999956686160",
            "ExtensionProperties": {
                "UWI": "20-000-00000-00"
            }
        },
        "id": f"{config['data-partition-id']}:master-data--Well:AutoTest_999956686160",
        "kind": "osdu:wks:master-data--Well:1.0.0",
        "legal": {
            "legaltags": default_legal_tags,
            "otherRelevantDataCountries": [
                "FR",
                "US"
            ],
            "status": "compliant"
        }
    }]


uploaded_well = requests.post(url, json=body, headers=headers).json()
pprint(uploaded_well)

# Get the well using Search
query_request = QueryRequest(
    kind="osdu:wks:master-data--Well:1.0.0",
    query="id:\"osdu:master-data--Well:AutoTest_999956686160\"",
    limit=5,
    returned_fields=["*"]
)
downloaded_well1 = search_client.query_records(query_request).json()
pprint(downloaded_well1)

# Get the well using Wellbore DDMS
url = f"{schema}://{config['WELLBORE_DDMS_HOST']}/ddms/v3/wells/{uploaded_well['recordIds'][0]}"
downloaded_well2 = requests.get(url, headers=headers).json()
pprint(downloaded_well2)


# Create a new wellbore

url = f"{schema}://{config['WELLBORE_DDMS_HOST']}/ddms/v3/wellbores"
headers = {'data-partition-id': config['data-partition-id'], 'Authorization': f'Bearer {token_refresher.access_token}'}
body = [{
        "acl": default_acl,
        "data": {
            "FacilityName": "Faciliity_AutoTest_999956686160",
            "ExtensionProperties": {
                "UWI": "20-000-00000-00"
            }
        },
        "id": f"{config['data-partition-id']}:master-data--Wellbore:AutoTest_999956686160",
        "kind": "osdu:wks:master-data--Wellbore:1.0.0",
        "legal": {
            "legaltags": default_legal_tags,
            "otherRelevantDataCountries": [
                "FR",
                "US"
            ]
        },
        "data": {
            "ExtensionProperties": {
                "airGap": {
                    "unitKey": "ft",
                    "value": 35.0
                },
                "country": "United States of America",
                "operator": "Francois Vinyes",
                "uwi": "33-089-00300-00-01",
                "wellboreType": "bypass"
            },
            "SpatialLocation": {
                "Wgs84Coordinates": {
                    "features": [{
                            "geometry": {
                                "coordinates": [
                                    [
                                        -103.2380248,
                                        46.8925081,
                                        5301.0
                                    ],
                                    [
                                        -103.2380248,
                                        46.8925081,
                                        2801.0
                                    ],
                                    [
                                        -103.2378748,
                                        46.892608100000004,
                                        301.0
                                    ],
                                    [
                                        -103.23742477750001,
                                        46.89270811,
                                        -2199.0
                                    ],
                                    [
                                        -103.23667470999663,
                                        46.892808120001,
                                        -4699.0
                                    ],
                                    [
                                        -103.2356245974865,
                                        46.892908130002,
                                        -7199.0
                                    ]
                                ],
                                "type": "LineString"
                            },
                            "properties": {
                                "name": "Newton 2-31-Lat-1"
                            },
                            "type": "Feature"
                        }
                    ],
                    "type": "FeatureCollection"
                }
            },
            "FacilityName": "Faciliity_AutoTest_99995668616",
            "WellID": "osdu:master-data--Well:AutoTest_999956686160:"
        },
        "meta": [{
                "kind": "Unit",
                "name": "Measure depth default unit",
                "persistableReference": "persistableReference",
                "propertyNames": ["symbol"]
            }
        ]
    }]

uploaded_wellbore = requests.post(url, json=body, headers=headers).json()
pprint(uploaded_wellbore)

# Get the wellbore using Search
query_request = QueryRequest(
    kind="osdu:wks:master-data--Wellbore:1.0.0",
    query="id:\"osdu:master-data--Wellbore:AutoTest_999956686160\"",
    limit=5,
    returned_fields=["*"]
)
downloaded_wellbore1 = search_client.query_records(query_request).json()
# pprint(downloaded_wellbore1)

# Get the wellbore using Wellbore DDMS
url = f"{schema}://{config['WELLBORE_DDMS_HOST']}/ddms/v3/wellbores/{uploaded_wellbore['recordIds'][0]}"
downloaded_wellbore2 = requests.get(url, headers=headers).json()
pprint(downloaded_wellbore2)

# Create a trajectory
url = f"{schema}://{config['WELLBORE_DDMS_HOST']}/ddms/v3/wellboretrajectories"
headers = {'data-partition-id': config['data-partition-id'], 'Authorization': f'Bearer {token_refresher.access_token}'}
body = [
    {
        "acl": default_acl,
        "legal": {
            "legaltags": default_legal_tags,
            "otherRelevantDataCountries": [
                "FR",
                "US"
            ]
        },
        "id": f"{config['data-partition-id']}:work-product-component--WellboreTrajectory:AutoTest_999956686160",
        "kind": f"{config['data-partition-id']}:wks:work-product-component--WellboreTrajectory:1.1.0",
        "data": {
            "Name": "Wellbore_Trajectory_AutoTest_999956686160",
            "WellboreID": f"{config['data-partition-id']}:master-data--Wellbore::AutoTest_999956686160:",
            "TopDepthMeasuredDepth": 0,
            "BaseDepthMeasuredDepth": 2.0,
            "VerticalMeasurement": {
                "EffectiveDateTime": "2021-08-17T14:13:08.174Z",
                "VerticalMeasurement": 0
            },
            "AvailableTrajectoryStationProperties": [
                {
                    "TrajectoryStationPropertyTypeID": f"{config['data-partition-id']}:reference-data--TrajectoryStationPropertyType:MD:",
                    "StationPropertyUnitID": f"{config['data-partition-id']}:reference-data--UnitOfMeasure:m:",
                    "Name": "Measured Depth"
                },
                {
                    "TrajectoryStationPropertyTypeID": f"{config['data-partition-id']}:reference-data--TrajectoryStationPropertyType:INCL:",
                    "StationPropertyUnitID": f"{config['data-partition-id']}:reference-data--UnitOfMeasure:dega:",
                    "Name": "Inclination"
                },
                {
                    "TrajectoryStationPropertyTypeID": f"{config['data-partition-id']}:reference-data--TrajectoryStationPropertyType:AZIM_TN:",
                    "StationPropertyUnitID": f"{config['data-partition-id']}:reference-data--UnitOfMeasure:dega:",
                    "Name": "Azimuth"
                },
                {
                    "TrajectoryStationPropertyTypeID": f"{config['data-partition-id']}:reference-data--TrajectoryStationPropertyType:TVD:",
                    "StationPropertyUnitID": f"{config['data-partition-id']}:reference-data--UnitOfMeasure:m:",
                    "Name": "True Vertical Depth"
                }
            ]
        }
    }
]
uploaded_trajectories = requests.post(url, json=body, headers=headers).json()
pprint(uploaded_trajectories)

# Get the trajectory using Search
query_request = QueryRequest(
    kind="osdu:wks:work-product-component--WellboreTrajectory:1.1.0",
    query="id:\"osdu:work-product-component--WellboreTrajectory:AutoTest_999956686160\"",
    limit=5,
    returned_fields=["*"]
)
downloaded_trajectories1 = search_client.query_records(query_request).json()
pprint(downloaded_trajectories1)

# Get the trajectory using Wellbore DDMS
url = f"{schema}://{config['WELLBORE_DDMS_HOST']}/ddms/v3/wellboretrajectories/{uploaded_trajectories['recordIds'][0]}"
downloaded_trajectories2 = requests.get(url, headers=headers).json()
pprint(downloaded_trajectories2)
