## Monitor schemas counts - OSDU

This notebook is intended to run once a day to gatter the counts by kind, time stamp them and append it to the spark table "OSDU_schema_counts", which is connected to the [OSDU metrics Power BI report](https://app.powerbi.com/groups/488759ef-47c7-4ee5-8fd8-25fc16dc5468/reports/ec64e727-9c8b-4189-bcbc-1b2f486908f5/ReportSection89b287e681d8e41e0363).

Find here the [synapse notebook](https://web.azuresynapse.net/en/authoring/analyze/notebooks/00_Monitor_Schemas?workspace=%2Fsubscriptions%2F36edefc2-154f-412b-aef5-5bf1ad68b170%2FresourceGroups%2Fosdu-common-rg%2Fproviders%2FMicrosoft.Synapse%2Fworkspaces%2Fosdu-synapse).

In [1]:
from libs.osdu_service.osdu_http_client import OsduHttpClient
from datetime import datetime
from libs.utilities import response2df, format_date_new
import pandas as pd

# Adding .env file variables as environment variables
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
"""Time stamp to be attached."""

base = datetime.utcnow().isoformat()
base = format_date_new(base)


"""OSDU environments to loop through."""
osdu_envs = [ "npequinor-test", "npequinor-dev", "equinor-data"]


"""Payload to be sent as the body of the requests."""
realtive_uri = "search/v2/query"
payload = {
    "kind": "*:*:*:*",
    "limit": 1000,
    'aggregateBy': "kind",
    "query": "*"
    }


"""OSDU aggregated records."""
full_resp = []
for env in osdu_envs:
    try:
        print(f'Working on OSDU environment {env}')

        # Osdu Public Client
        osdu_env = OsduHttpClient(env, client_type="public-client")
        # osdu_env = OsduHttpClient(env, client_type="private-client")
        # osdu_env = OsduHttpClient(env, client_type = "token-client")
        
        # POST request to OSDU search API
        resp = osdu_env.app_post_returning_json(
            realtive_uri,
            payload
        )
        
        resp = resp['aggregations']
        for record in resp:
            record['kind'] = record.pop('key')
            record['captureDate'] = base
            record['osdu_env'] = env

        full_resp.extend(resp)
    except:
        continue

"""Let's flatten the response to a DataFrame."""
df_counts = response2df(full_resp)


Working on OSDU environment npequinor-test
Working on OSDU environment npequinor-dev
Working on OSDU environment equinor-data
650/650

In [3]:
df_counts['count'] =df_counts['count'].astype(int)
df_counts.sum()

count                                                   28857292
kind           osdu:wks:dataset--File.Generic:1.0.0eqnr:smda-...
captureDate    2023-10-06 12:33:51.6702362023-10-06 12:33:51....
osdu_env       npequinor-testnpequinor-testnpequinor-testnpeq...
dtype: object

In [4]:
"""Seismic Databank Records"""
df_counts[df_counts.kind.isin([i for i in df_counts.kind if "iEnergy" in i])].head(5)

Unnamed: 0,count,kind,captureDate,osdu_env
8,18875,eqnr:iEnergy-sdb:seismicpoststackdatasets:1.0.0,2023-10-06 12:33:51.670236,npequinor-test
10,18824,eqnr:iEnergy-sdb:poststackcubegeometries:1.0.0,2023-10-06 12:33:51.670236,npequinor-test
11,18824,eqnr:iEnergy-sdb:poststackcubes:1.0.0,2023-10-06 12:33:51.670236,npequinor-test
17,4486,eqnr:iEnergy-diskos:navigationsets:1.0.0,2023-10-06 12:33:51.670236,npequinor-test
20,3245,eqnr:iEnergy-sdb:navigationsets:1.0.0,2023-10-06 12:33:51.670236,npequinor-test
