In [2]:
import warnings
warnings.filterwarnings('ignore')

import os
os.environ["PYTHONWARNINGS"] = "ignore"


# Set up environment variables
from dotenv import load_dotenv
load_dotenv()

import pandas as pd
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential


# Set flags (currently unused)
DEBUG = False

data_dir = %env DATA_DIR
azure_key = %env AZURE_KEY
azure_endpoint = %env AZURE_ENDPOINT

# Load SQL extension
%load_ext sql
%config SqlMagic.autopandas = True
%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False


# Have DuckDB use in-memory storage (comment out to user $DATABASE_URL file)
%sql duckdb:///:memory:

In [3]:
# Load data from parquet

# Load schema metadata from ${data_dir}physionet_schema.csv
tables = pd.read_csv(f'{data_dir}physionet_schema.csv', delimiter='\t', usecols=['schema', 'table']).drop_duplicates().reset_index(drop=True)
# display(tables)


# Load data from parquet

# VSCode dies on FutureWarnings, suppress them
with warnings.catch_warnings():
    warnings.simplefilter(action='ignore', category=FutureWarning)
    for (schema, table) in tables.values:
        print(f'Loading {schema}.{table}')
        %sql DROP TABLE IF EXISTS {{schema}}.{{table}}
        %sql CREATE SCHEMA IF NOT EXISTS {{schema}}
        %sql CREATE TABLE {{schema}}.{{table}} AS SELECT * FROM read_parquet('{{data_dir}}parquet/{{schema}}/{{table}}.parquet')

Loading mimiciv_note.radiology_detail
Loading mimiciv_note.discharge_detail
Loading mimiciv_note.discharge


: 

: 

In [None]:
# Authenticate the client using your key and endpoint
def azure_auth_client ():
    ta_credential = AzureKeyCredential (azure_key)
    text_analytics_client = TextAnalyticsClient (
        endpoint=azure_endpoint,
        credential=ta_credential)
    return text_analytics_client

In [None]:

azure_client = azure_auth_client()

In [None]:
def azure_health(client, documents):
    poller = client.begin_analyze_healthcare_entities(documents)
    result = poller.result()

    docs = [doc for doc in result if not doc.is_error]

    for idx, doc in enumerate(docs):
        for entity in doc.entities:
            print("Entity: {}".format(entity.text))
            print("...Normalized Text: {}".format(entity.normalized_text))
            print("...Category: {}".format(entity.category))
            print("...Subcategory: {}".format(entity.subcategory))
            print("...Offset: {}".format(entity.offset))
            print("...Confidence score: {}".format(entity.confidence_score))
        for relation in doc.entity_relations:
            print("Relation of type: {} has the following roles".format(relation.relation_type))
            for role in relation.roles:
                print("...Role '{}' with entity '{}'".format(role.name, role.entity.text))
        print("------------------------------------------")

In [None]:
# Test the function
documents = [
    """
    Patient needs to take 50 mg of ibuprofen.
    """
]

azure_health(azure_client, documents)