# Init

### Required

In [None]:
!pip install -r requirements.txt

In [None]:
from google.colab import auth
auth.authenticate_user()
print('done')

In [2]:
import pprint
import requests

import os
import pprint
import urllib3
import json

from pprint import pprint

from osdu_api.auth.refresh_token import BaseTokenRefresher

from osdu_api.clients.storage.record_client import RecordClient
from osdu_api.clients.search.search_client import SearchClient
from osdu_api.clients.schema.schema_client import SchemaClient
from osdu_api.clients.dataset.dataset_dms_client import DatasetDmsClient
from osdu_api.clients.dataset.dataset_registry_client import DatasetRegistryClient

from osdu_api.model.storage.legal import Legal
from osdu_api.model.storage.record import Record
from osdu_api.model.storage.record_ancestry import RecordAncestry
from osdu_api.model.storage.acl import Acl
from osdu_api.model.search.query_request import QueryRequest
from osdu_api.model.storage.query_records_request import QueryRecordsRequest
from osdu_api.model.dataset.create_dataset_registries_request import CreateDatasetRegistriesRequest

urllib3.disable_warnings()


In [None]:
BASE_URL = "https://ecim.gcp.gnrg-osdu.projects.epam.com"
CLOUD_PROVIDER = "gc"
os.environ["CLOUD_PROVIDER"] = CLOUD_PROVIDER

config_url = BASE_URL + "/api/config/v1/postman-environment"
config_response = requests.get(config_url).json()
config = {}
for entity in config_response['values']:
  config[entity['key']] = entity['value']

# pprint(config_response)
print(config['SEISMICDMS_HOST'])

In [None]:
token_refresher = BaseTokenRefresher()
token_refresher.refresh_token()

In [None]:
search_client = SearchClient(
    search_url=f"https://{config['SEARCH_HOST']}",
    provider=CLOUD_PROVIDER,
    token_refresher=token_refresher,
    data_partition_id=config['data-partition-id']
)

schema_client = SchemaClient(
    schema_url=f"https://{config['SCHEMA_HOST']}",
    provider=CLOUD_PROVIDER,
    token_refresher=token_refresher,
    data_partition_id=config['data-partition-id']
)

dataset_client = DatasetDmsClient(
    dataset_url=f"https://{config['DATASET_HOST']}",
    provider=CLOUD_PROVIDER,
    token_refresher=token_refresher,
    data_partition_id=config['data-partition-id']
)
dataset_registry_client = DatasetRegistryClient(
    dataset_url=f"https://{config['DATASET_HOST']}",
    provider=CLOUD_PROVIDER,
    token_refresher=token_refresher,
    data_partition_id=config['data-partition-id']
)

storage_client = record_client = RecordClient(
    storage_url=f"https://{config['STORAGE_HOST']}",
    provider=CLOUD_PROVIDER,
    token_refresher=token_refresher,
    data_partition_id=config['data-partition-id']
)

urllib3.disable_warnings()
print('done')

In [None]:
query_request = QueryRequest(
    kind="osdu:wks:dataset--File.Generic:1.0.0",
    query="",
    limit=1,
    returned_fields=["*"]
)
response = search_client.query_records(query_request).json()
default_acl = response['results'][0]['acl']
default_legal_tags = ["osdu-default-data-tag"]

# pprint(response)
pprint(default_acl)
pprint(default_legal_tags)

# Slot 1 - Using Core APIs

#### All to register and make successful connection to workbook and both OSDU instances

In [None]:
search_response = requests.get(f"https://{config['SEARCH_HOST']}/info")

pprint(search_response.json())

#### All able to execute and understand the result on some of the core APIs

In [None]:
# List all records
query_request = QueryRequest(
    kind= "*:*:*:*",
    query="",
    limit=2,
    returned_fields=["*"]
)
response = search_client.query_records(query_request)

pprint(response.json())


In [None]:
# List a specific subset of records based on schema type/kind
query_request = QueryRequest(
    kind="osdu:wks:dataset--File.Generic:1.0.0",
    query="",
    limit=2,
    returned_fields=["*"]
)
response = search_client.query_records(query_request)

pprint(response.json())

In [None]:
# Find the description of specific schema/kind to show that schemas are a part of the platform
response = schema_client.get_schema_by_id('').json()
pprint(response)

In [None]:
# Extract some data from a specific data type
# ???

#### Show that the platform is generic and/extendable and can store any data type in addition to defined "well known" schemas

In [None]:
response = dataset_client.storage_instructions(kind_sub_type='dataset--File.Generic').json()
pprint(response)

In [None]:
# All to upload an image

def upload_file_and_metadata():
  storage_instruction = dataset_client.storage_instructions(kind_sub_type="dataset--File.Generic")
  storage_location = storage_instruction.json()['storageLocation']
  #### Uploading
  with open('image.jpeg', 'rb') as file_content:
    new_file = dataset_client.put_file(url=storage_location['signedUrl'], data=file_content, no_auth=True)

  record_list = [
                  Record( kind='osdu:wks:dataset--File.Generic:1.0.0',
                          acl=Acl(owners=default_acl['owners'], viewers=default_acl['viewers']),
                          legal=Legal(legaltags=default_legal_tags, other_relevant_data_countries=["US"], status="compliant"),
                          data =   {
                                      "DatasetProperties": {
                                          "FileSourceInfo": {
                                              "FileSource": storage_location['fileSource'],
                                              "Name": "image.jpeg"
                                          }
                                      },
                                      "Name": "Dataset with images",
                                      "ResourceSecurityClassification": f"{config['data-partition-id']}:reference-data--ResourceSecurityClassification:RESTRICTED:",
                                      "SchemaFormatTypeID": f"{config['data-partition-id']}:reference-data--SchemaFormatType:TabSeparatedColumnarText:"
                                  },
                          ancestry=RecordAncestry(parents=[]))
              ]

  registered_dataset = dataset_registry_client.register_dataset(CreateDatasetRegistriesRequest(dataset_registries=record_list))

  pprint(storage_location)
  pprint(registered_dataset.json())

upload_file_and_metadata()

In [None]:
# All to find there image and probably the other attendances images
query_request = QueryRequest(
    kind="*:*:*:*",
    query="data.Name:\"Dataset with images\"",
    limit=10,
    returned_fields=["*"],
)
response = search_client.query_records(query_request)

pprint(response.json())

In [None]:
# Show images in workbook and maybe on stage as they get uploaded
# ???

# Slot 2 - Investigating well data

#### All to search/list wells/wellbores/wellogs/trajectories/... that is uploaded in the **instances**

In [None]:
# Wells
query_request = QueryRequest(
    kind="osdu:wks:master-data--Well:1.0.0", # "*:*:master-data--Well:*",
    query="",
    limit=5,
    returned_fields=["*"]
)
response = search_client.query_records(query_request)

pprint(response.json())

In [None]:
# Welllogs

query_request = QueryRequest(
    kind="osdu:wks:work-product-component--WellLog:*.*.*",
    query="",
    limit=5,
    returned_fields=["*"]
)
response = search_client.query_records(query_request)

pprint(response.json())

In [None]:
# Wellbores

query_request = QueryRequest(
    kind="osdu:wks:master-data--Wellbore:1.0.0",
    query="",
    limit=5,
    returned_fields=["*"]
)
response = search_client.query_records(query_request)

pprint(response.json())

In [None]:
# Trajectories

query_request = QueryRequest(
    kind="osdu:wks:work-product-component--WellboreTrajectory:*.*.*",
    query="",
    limit=5,
    returned_fields=["*"]
)
response = search_client.query_records(query_request)

pprint(response.json())

#### For one of the few well/wellobres that we have prepped a bit

In [None]:
# Use lineage to find welllogs/trajectories/... that belongs to a specific well/wellbore
# Welllogs

query_request = QueryRequest(
    kind="osdu:wks:work-product-component--WellLog:*.*.*",
    query="data.WellboreID:\"osdu:master-data--Wellbore:7233:\"",
    limit=5,
    returned_fields=["*"]
)
response = search_client.query_records(query_request)

pprint(response.json())


In [None]:
# By usage show that the platform can handle and store versions for some of these data
record = Record(
    kind='osdu:wks:dataset--File.Generic:1.0.0',
    acl=Acl(owners=default_acl['owners'], viewers=default_acl['viewers']),
    legal=Legal(legaltags=default_legal_tags, other_relevant_data_countries=["US"], status="compliant"),
    data= {
        "msg": "Initial Message from AutoTest while testing update",
        "weight": 777.0
    })
# create
response = storage_client.create_update_records(records=[record])
stored_record = response.json()
recordId = stored_record['recordIds'][0]
pprint("the new record")
pprint(stored_record)

# get versions
versions = storage_client.get_record_versions(recordId=recordId).json()
pprint("current versions")
pprint(versions)

# update
record.data["weight"] = 778
record.id = recordId
response = storage_client.create_update_records(records=[])

# get versions
versions = storage_client.get_record_versions(recordId=recordId).json()
pprint("current versions")
pprint(versions)

# update
record.data["weight"] = 779
record.id = recordId
response = storage_client.create_update_records(records=[record])

# get versions
versions = storage_client.get_record_versions(recordId=recordId).json()
pprint("current versions")
pprint(versions)
checking_version = versions["versions"][2]

pprint(checking_version)

# get specific version
version = storage_client.get_specific_record(recordId=recordId, version=checking_version).json()
pprint("specific version")
pprint(version)

In [21]:
# By usage show the difference between index/Storage/File APIs, for a data object
# In practice the user seeing the indexed data, the stored data and the payload data (CSV, LAS, DLIS, Parquet)

# upload file and metadata using dataset
upload_file_and_metadata()

# 10 sec for indexing
# search
query_request = QueryRequest(
    kind= "*:*:*:*",
    query="data.Name:\"Dataset with images\"",
    limit=2,
    returned_fields=["*"]
)
images_with_metadata = search_client.query_records(query_request).json()
pprint(images_with_metadata)

# get record from storage
record = Record(  kind='osdu:wks:dataset--File.Generic:1.0.0',
                  acl=Acl(owners=default_acl['owners'], viewers=default_acl['viewers']),
                  legal=Legal(legaltags=["osdu-default-data-tag"], other_relevant_data_countries=["US"], status="compliant"),
                  data =   {
                    "DatasetProperties": {
                        "FileSourceInfo": {
                            # "FileSource": storage_location['fileSource'],
                            "Name": "image.jpeg"
                        }
                    },
                    "Name": "Dataset with images",
                })
records_request = QueryRecordsRequest(records=[record])
images_with_metadata = storage_client.query_records(query_records_request=records_request)
pprint(images_with_metadata)

# download using file service

<Response [400]>


# Slot 3 - Wellbore DDMS

For one of the few well/wellbores that we have prepped a bit

In [None]:
# By usage see that the DDMS is an added capability to Index/Storage/File
# By usage see that same data can be extracted through both core and DDMS

# wellbore ddms collection

In [None]:
# Show examples on using some of the DDMS APIs. To be determined
# Get basic well info to show that it's the same info as you get through the core APIs like (well header, wellbore header, ...)
# Demonstrate use of some added functionality in the DDMS
# Most likely related to logs and extraction of 1 specific curve, a specific curve interval, etc.