In [43]:
# https://github.com/MicrosoftDocs/azure-ai-docs/blob/main/articles/ai-services/language-service/custom-named-entity-recognition/how-to/call-api.md#tab/client
# https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/textanalytics/azure-ai-textanalytics/samples/sample_recognize_custom_entities.py
# https://learn.microsoft.com/en-us/python/api/overview/azure/ai-textanalytics-readme?view=azure-python

In [44]:
import os
import json
import openai
import pandas as pd
from azure.core.credentials import AzureKeyCredential
from azure.ai.textanalytics import TextAnalyticsClient

from dotenv import load_dotenv
from openai import AzureOpenAI
load_dotenv(override=True)

aoai_client = AzureOpenAI(
  azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"), 
  api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
  api_version="2024-07-01-preview"
)

AZURE_LANGUAGE_ENDPOINT = os.environ["AZURE_LANGUAGE_ENDPOINT"]
AZURE_LANGUAGE_KEY = os.environ["AZURE_LANGUAGE_KEY"]
CUSTOM_ENTITIES_PROJECT_NAME = os.environ["CUSTOM_ENTITIES_PROJECT_NAME"]
CUSTOM_ENTITIES_DEPLOYMENT_NAME = os.environ["CUSTOM_ENTITIES_DEPLOYMENT_NAME"]
path_to_sample_document = "../data/processed/text/full_names_0.txt"

text_analytics_client = TextAnalyticsClient(
    endpoint=AZURE_LANGUAGE_ENDPOINT,
    credential=AzureKeyCredential(AZURE_LANGUAGE_KEY),
)


In [123]:
CUSTOM_ENTITIES_DEPLOYMENT_NAME

'customNER'

In [45]:
names_df = pd.read_parquet("../data/processed/full_names.parquet")

### Extracting first and last names using SDK

In [46]:
def sample_recognize_custom_entities(document) -> None:
    # [START recognize_custom_entities]

    poller = text_analytics_client.begin_recognize_custom_entities(
        [document],
        project_name=CUSTOM_ENTITIES_PROJECT_NAME,
        deployment_name=CUSTOM_ENTITIES_DEPLOYMENT_NAME
    )

    document_results = poller.result()

    return document_results

In [47]:
custom_document_results = sample_recognize_custom_entities("John F. Kennedy")

In [48]:
[i for i in custom_document_results]



### Extracting first and last names using API

In [232]:
import requests
import time

In [347]:
def GetFirstLastNamesAPI(document):
    
    headers = {"Ocp-Apim-Subscription-Key": AZURE_LANGUAGE_KEY, "Content-Type": "application/json"}

    url = f"{AZURE_LANGUAGE_ENDPOINT}/language/analyze-text/jobs?api-version=2022-10-01-preview"
    body = {
    "displayName": "Extracting entities",
    "analysisInput": {
        "documents": [
            {
                "id": "1",
                "language": "en",
                "text": f"{document}"
            }]
    },
    "tasks": [
        {
            "kind": "CustomEntityRecognition",
            "taskName": "Entity Recognition",
            "parameters": {
                "projectName": "CustomNameExtraction",
                "deploymentName": "customNER"
            }
        }
    ]
}
    response = requests.post(url, headers=headers, json=body)

    get_url = response.headers.pop("operation-location")
    get_response = requests.get(get_url, headers=headers)
    status = get_response.json()['status']

    while status != "succeeded":
        time.sleep(2)
        get_response = requests.get(get_url, headers=headers)
        status = get_response.json()['status']
    
    get_entities = get_response.json()['tasks']['items'][0]['results']['documents'][0]['entities']

    return get_entities

In [348]:
response = GetFirstLastNamesAPI("Chellete, Travis M.")

In [349]:
response

[{'text': 'Chellete',
  'category': 'last_name',
  'offset': 0,
  'length': 8,
  'confidenceScore': 1.0},
 {'text': 'Travis',
  'category': 'first_name',
  'offset': 10,
  'length': 6,
  'confidenceScore': 1.0}]