## Seismic Data Bank Custom Schema Generation and Ingestion to OSDU

This notebook is intended to document a summarized version of the process of generating and ingesting custom schemas into OSDU taking Seismic Data Bank as example.

Note that the notebook imports and makes use of some utilities hosted in the official [schema_generator repository](https://github.com/equinor/osdu_custom_schema_generator) and ONLY provides a summarized version of the process, therefore check the main repository for details if required.

In [1]:
import os
import requests

# Adding .env file variables as environment variables
from dotenv import load_dotenv
load_dotenv()

True

#### 1. SDB API Authentication

In [2]:
"""[POST] Access token request: Let's get an access token by sending a request using an offline token (refresh token)."""

access_token_uri = os.environ['ds_security_url']+'/protocol/openid-connect/token'

body = {
    "grant_type": "refresh_token",
    "refresh_token": os.environ['sdb_refresh_token'],
    "client_id":'enterprise-search'
}

headers = {
      "Content-Type": "application/x-www-form-urlencoded"
    }

session = requests.session()
session.headers.update({"User-Agent": "osdu/sismic_metadata"})
response = getattr(session, "post")(access_token_uri, body, headers )

sdb_access_token = response.json()['access_token']
print(f"iEnergy access_token response code: {response}")


"""[GET] Metadata request: Let's send a request to SDB API to retrieve the entities metadata."""

metadata_uri = "{}/{}".format(os.environ['sdb_metadata_url'], "$metadata?")

payload = {
    'headers': {
    'Authorization': f'Bearer {sdb_access_token}',
    'Accept': '*/*'
    },
    'timeout': 10
}

session.headers.update({"User-Agent": "osdu/sismic_metadata"})
response = getattr(session, "get")(metadata_uri, **payload)
print(f"iEnergy metadata response code: {response}")

response_metadata = response.text
print(f"SDB entities metadata: {response_metadata[:200]}")

iEnergy access_token response code: <Response [200]>
iEnergy metadata response code: <Response [200]>
SDB entities metadata: <?xml version='1.0' encoding='utf-8'?><edmx:Edmx Version="1.0" xmlns:edmx="http://schemas.microsoft.com/ado/2007/06/edmx" xmlns:dsds="http://www.lgc.com/dsdataserver/annotation"><edmx:DataServices m:D


#### 2. Custom Schema Generation

In [3]:
"""SDB response for the metadata endpoint does not give an JSON format response, so it has to be parsed
into a JSON in order to use these dimensions when constructing the custom schema for each SDB entity."""

from schema_generator.utils.xml_parse_to_json import XMLParseToJSON

sdb_entity_name = 'seismicsurveys'

seismic_surveys_metadata_parsed = XMLParseToJSON(
    metadata_xml=response_metadata,
    entity_type_name=sdb_entity_name
    ).get_properties

In [4]:
"""Let's contruct the custom schema for the seismic surveys entity.
The process should be repeated for each SDB entity."""

from schema_generator.utils.schema_util_methods import common_properties, get_schema_meta, get_required_properties

schema_version = '1.0.0'


"""Start schema by adding its metadata and identity definitions."""
seismic_surveys_custom_schema = get_schema_meta(
    source='iEnergy',
    authority='eqn',
    schema_name=sdb_entity_name,
    schema_version=schema_version
)


"""Get schema required properties."""
req_attrs = get_required_properties()


"""Get common properties for the schema."""
custom_attributes = {
    'properties' : seismic_surveys_metadata_parsed,
    'type': 'object'
}

schema_prop = common_properties(entity_type=sdb_entity_name)
schema_prop['data'] = {}                                        
schema_prop['data']['allOf'] = []
schema_prop['data']['allOf'].append(custom_attributes)


"""Check point."""
assert custom_attributes == schema_prop['data']['allOf'][0]

"""Add required attributes and custom common properties to the schema."""
seismic_surveys_custom_schema['schema']['required'] = req_attrs
seismic_surveys_custom_schema['schema']['properties'] = schema_prop


"""Add legal and ACL schema definitions retrieved from OSDU Schema API."""

from libs.osdu_service.osdu_http_client import OsduHttpClient
import json

client = OsduHttpClient("npequinor-dev", client_type="public-client")
acl_schema = client.app_get_returning_json(
    service_relative_uri="schema-service/v1/schema/osdu:wks:AbstractAccessControlList:1.0.0"
)

legaltag_schema = client.app_get_returning_json(
    service_relative_uri="schema-service/v1/schema/osdu:wks:AbstractLegalTags:1.0.0"
)

seismic_surveys_custom_schema['schema']['definitions']["osdu:wks:AbstractLegalTags:1.0.0"] = legaltag_schema
seismic_surveys_custom_schema['schema']['definitions']["osdu:wks:AbstractAccessControlList:1.0.0"] = acl_schema
 
    
"""Let's have a look to the schema we would send to OSDU."""

print(json.dumps(seismic_surveys_custom_schema, indent=4))


A local browser window will be open for you to sign in. CTRL+C to cancel.
{
    "schemaInfo": {
        "schemaIdentity": {
            "authority": "eqn",
            "source": "iEnergy",
            "entityType": "seismicsurveys",
            "schemaVersionMajor": 1,
            "schemaVersionMinor": 0,
            "schemaVersionPatch": 0
        },
        "status": "DEVELOPMENT"
    },
    "schema": {
        "x-osdu-inheriting-from-kind": [],
        "x-osdu-license": "Copyright 2021, The Open Group \\nLicensed under the Apache License,\n             Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. \n             You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 .\n             Unless required by applicable law or agreed to in writing, \n             software distributed under the License is distributed on an \"AS IS\" BASIS, \n             WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, \n             e

#### 3. Custom Schema Ingetion to OSDU

In [5]:
"""[POST] Request to create a new schema in OSDU"""
try:
    osdu_response = client.app_post_returning_json(
        service_relative_uri="schema-service/v1/schema__",
        payload=json.dumps(seismic_surveys_custom_schema)
        )
except:
    print("Not sent to OSDU as part of the DEMO.")

Not sent to OSDU as part of the DEMO.
