### AI Language Studio를 활용하여, NER(Named Entity Recognition)하기
NER(Named Entity Recognition)를 사용하여 텍스트에서 엔터티를 식별하고 이를 사람, 위치, 이벤트, 제품 및 조직과 같은 사전 정의된 클래스 또는 유형으로 분류하는 기능

In [None]:
import requests # type: ignore
import uuid
import gradio as gr # type: ignore
import os
from dotenv import load_dotenv # type: ignore

load_dotenv()
LANGUAGE_ENDPINT = os.getenv("LANGUAGE_ENDPINT")
LANGUAGE_APIKEY = os.getenv("LANGUAGE_APIKEY")

#### 1. Named Entity Recognition
이름 요소들 분리해서 가져오기

In [None]:
# Named Entity Recognition
def request_ner(text="", language="ko"):
    endpoint = LANGUAGE_ENDPINT

    header = {
        "Content-Type":"application/json",
        "Ocp-Apim-Subscription-Key":LANGUAGE_APIKEY
    }

    request_id = str(uuid.uuid4())

    body = {
    "kind": "EntityRecognition",
    "parameters": {
        "modelVersion": "latest"
    },
    "analysisInput":{
        "documents":[
            {
                "id":request_id,
                "language": language,
                "text": text
            }
        ]
        }   
    }

    response = requests.post(endpoint, headers=header, json=body)
    if response.status_code == 200:
        response_json = response.json()
        return response_json
    else:
        return None

# request_ner("""Canada’s draft deal to participate in Europe’s defense industry will bring contracts to Canadian manufacturers and help lessen dependence on the United States.""", language="en")

#### 2. PiiEntityRecognition
NER(명명된 엔터티 인식)을 사용하여 텍스트에서 민감한 개인 식별 정보(PII)를 식별하고 이를 사람, 주소, 이메일, 전화번호, 여권 번호, 은행 계좌 번호 등과 같은 사전 정의된 클래스 또는 유형으로 분류.
민감한 정보는 * 표기로 지움.

In [None]:
def request_pii(text="", model_type="EntityRecognition", language="ko"):
    endpoint = LANGUAGE_ENDPINT

    header = {
        "Content-Type":"application/json",
        "Ocp-Apim-Subscription-Key":LANGUAGE_APIKEY
    }

    request_id = str(uuid.uuid4())

    body = {
    "kind": model_type,
    "parameters": {
        "modelVersion": "latest"
    },
    "analysisInput":{
        "documents":[
            {
                "id":request_id,
                "language": language,
                "text": text
            }
        ]
        }   
    }

    response = requests.post(endpoint, headers=header, json=body)
    if response.status_code == 200:
        response_json = response.json()
        return response_json
    else:
        return None

# request_pii("""[파이낸셜뉴스] 이재명 더불어민주당 대표가 20일 이재용 삼성전자 회장과 만나 지금까지 해온 것처럼 삼성이 경제성장 견인차 역할을 잘 해 달라고 당부했다. 양측은 이날 '청년취업 지원' 등 경제 현안에 대한 논의도 이어갈 예정이다. 이재명 대표는 이날 오전 10시 서울 강남구 삼성 청년 소프트웨어 아카데미(SSAFY)를 방문, 청년 취업 지원을 위한 현장 간담회에 참석했다.""", model_type="PiiEntityRecognition", language="en")

#### 3. Extract key phrases
Key Phrase Extraction API는 구조화되지 않은 텍스트를 평가하고, 각 JSON 문서에 대해 Key Phrase 목록을 반환합니다. 이 기능은 텍스트에서 주요 요점을 빠르게 식별해야 하는 경우에 유용

In [None]:
def request_Extract_key_phrases(text="", model_type="EntityRecognition", language="en"):
    endpoint = LANGUAGE_ENDPINT

    header = {
        "Content-Type":"application/json",
        "Ocp-Apim-Subscription-Key":LANGUAGE_APIKEY
    }

    request_id = str(uuid.uuid4())

    body = {
    "kind": model_type,
    "parameters": {
        "modelVersion": "latest"
    },
    "analysisInput":{
        "documents":[
            {
                "id":request_id,
                "language": language,
                "text": text
            }
        ]
        }   
    }

    response = requests.post(endpoint, headers=header, json=body)
    if response.status_code == 200:
        response_json = response.json()
        return response_json
    else:
        return None

# request_Extract_key_phrases("""Donald Trump and Volodymyr Zelenskyy had a “very good telephone call” on Wednesday, according to Trump, in the first conversation between the US president and his Ukrainian counterpart since their disastrous showdown in the White House three weeks ago. Zelenskyy described the call as “positive, very substantive and frank”, and said he had signed up to a partial ceasefire that Trump agreed with Vladimir Putin a day earlier. The White House said Trump had promised to help with a Ukrainian request to source more air defence batteries for Kyiv. The last encounter between Trump and Zelenskyy ended in an angry exchange of words between the two presidents and the US vice-president, JD Vance. It marked a low point in US-Ukrainian relations and spooked other allies that Washington may be about to abandon Kyiv.""", model_type="KeyPhraseExtraction", language="en")

#### 4. EntityLinking / Find linked entities
이 사전 구축된 기능은 지식 기반에 연결하여 텍스트에서 발견된 엔터티의 신원을 모호하게 합니다. Text Analytics의 엔터티 연결 기능은 지식 기반으로 Wikipedia에 대한 링크를 제공 -> 입력한 데이터에 대해서 위키피디아 검색 결과 링크를 보내줌.

In [None]:
def request_EntityLinking(text="", model_type="EntityRecognition", language="en"):
    endpoint = LANGUAGE_ENDPINT

    header = {
        "Content-Type":"application/json",
        "Ocp-Apim-Subscription-Key":LANGUAGE_APIKEY
    }

    request_id = str(uuid.uuid4())

    body = {
    "kind": model_type,
    "parameters": {
        "modelVersion": "latest"
    },
    "analysisInput":{
        "documents":[
            {
                "id":request_id,
                "language": language,
                "text": text
            }
        ]
        }   
    }

    response = requests.post(endpoint, headers=header, json=body)
    if response.status_code == 200:
        response_json = response.json()
        return response_json
    else:
        return None

# request_EntityLinking("""Donald Trump and Volodymyr Zelenskyy had a “very good telephone call” on Wednesday, according to Trump, in the first conversation between the US president and his Ukrainian counterpart since their disastrous showdown in the White House three weeks ago. Zelenskyy described the call as “positive, very substantive and frank”, and said he had signed up to a partial ceasefire that Trump agreed with Vladimir Putin a day earlier. The White House said Trump had promised to help with a Ukrainian request to source more air defence batteries for Kyiv. The last encounter between Trump and Zelenskyy ended in an angry exchange of words between the two presidents and the US vice-president, JD Vance. It marked a low point in US-Ukrainian relations and spooked other allies that Washington may be about to abandon Kyiv.""", model_type="EntityLinking", language="en")

#### 5. LanguageDetection
주어진 문장에서 어떤 언어가 가장 많이 쓰였는지, 그래서 결과적으로 이 내용은 어떤 언어로 이루어져 있는지 확인.

In [None]:
def request_language_detection(text=""):
    endpoint = LANGUAGE_ENDPINT

    header = {
        "Content-Type":"application/json",
        "Ocp-Apim-Subscription-Key":LANGUAGE_APIKEY
    }

    request_id = str(uuid.uuid4())

    body = {
    "kind": "LanguageDetection",
    "parameters": {
        "modelVersion": "latest"
    },
    "analysisInput":{
        "documents":[
            {
                "id":request_id,
                "text": text
            }
        ]
        }   
    }

    response = requests.post(endpoint, headers=header, json=body)
    if response.status_code == 200:
        response_json = response.json()
        language_code = response_json["results"]["documents"][0]["detectedLanguage"]['iso6391Name']
        return language_code
    else:
        return "ko"

# request_language_detection("Donald Trump and Volodymyr Zelenskyy had a “very good telephone call” on Wednesday, according to Trump, in the first conversation between the US president and his Ukrainian counterpart since their disastrous showdown in the White House three weeks ago. Zelenskyy described the call as “positive, very substantive and frank”, and said he had signed up to a partial ceasefire that Trump agreed with Vladimir Putin a day earlier. The White House said Trump had promised to help with a Ukrainian request to source more air defence batteries for Kyiv. The last encounter between Trump and Zelenskyy ended in an angry exchange of words between the two presidents and the US vice-president, JD Vance. It marked a low point in US-Ukrainian relations and spooked other allies that Washington may be about to abandon Kyiv. 기록을 갈아치울 태세다. 그는 통산 164홈런을 기록, 1위 타이론 우즈를 10개 차이로 뒤쫓고 있다. 시즌 초반 우즈의 기록을 넘을 수 있을 것으로 기대된다. 통산 타점도 521개로 이 부문 1위 제이 데이비스와 70개의 격차를 보이고 있으나, 경신이 유력하다.지난 시즌까지 통산 938승을 기록 중인 김경문 감독은 1천승 달성을 앞두고 있다. 43년 역사의 KBO 리그에서 1천승은 김응용, 김성근 감독 단 2명 만이 달성한 바 있다.")

#### 6. CLU
이 문장이 어떤 문장인지. 대화인지 요청인지, 등등 의미. email 데이터를 직접 학습시켜서 생성한 모델

In [None]:
APIM_REQUEST_ID = os.getenv("APIM_REQUEST_ID")

def request_CLU(text=""):
    endpoint = LANGUAGE_ENDPINT

    header = {
        "Content-Type":"application/json",
        "Ocp-Apim-Subscription-Key":LANGUAGE_APIKEY,
        "Apim-Request-Id":APIM_REQUEST_ID
    }

    request_id = str(uuid.uuid4())

    body = {
        "kind": "Conversation",
        "analysisInput": {
            "conversationItem": {
            "id": "12345",
            "text": text,
            "modality": "text",
            "language": "en",
            "participantId": "user123"
            }
        },
        "parameters": {
            "projectName": "CLU_Email_project",
            "verbose": True,
            "deploymentName": "email-v1",
            "stringIndexType": "TextElement_V8"
        }
    }

    response = requests.post(endpoint, headers=header, json=body)
    if response.status_code == 200:
        response_json = response.json()
        top_intent = response_json["result"]["prediction"]["topIntent"]
        intent_list = response_json["result"]["prediction"]["intents"]
        entity_list = response_json["result"]["prediction"]["entities"]
        return (top_intent, intent_list, entity_list)
    else:
        return ""

# request_CLI("Hello, how can I help you today? Send my email to jane")

#### 7. Custom Named Entity Recognition
이것도 데이터로 모델 학습시켜서 대출 관련 문서를 활용한 엔티티 인식하기.
아니 이거 202뜸;; -> get 요청으로 넘겨서 했어야 했음.

In [None]:
import time

CNER_ENPOINT = os.getenv("CNER_ENPOINT")

def Custom_Named_Entity_Recognition(text):
    # API 엔드포인트
    url = CNER_ENPOINT

    # 요청 헤더
    headers = {
        "Ocp-Apim-Subscription-Key": LANGUAGE_APIKEY,
        "Content-Type": "application/json"
    }

    request_id = str(uuid.uuid4())
    # 요청 데이터
    body = {
        "tasks": [
            {
                "kind": "CustomEntityRecognition",
                "parameters": {
                    "projectName": "loanDemo",
                    "deploymentName": "loanDeployment",
                    "stringIndexType": "TextElement_v8"
                }
            }
        ],
        "displayName": "CustomTextPortal_CustomEntityRecognition",
        "analysisInput": {
            "documents": [
                {
                    "id": request_id,
                    "text": text,
                    "language": "en"
                }
            ]
        }
    }

    # POST 요청 보내기
    response = requests.post(url, headers=headers, json=body)

    if response.status_code == 202:
            # response.headers['Operation-Location'] 위치에 결과로 나온 enpoint url이 나옴.
            result_url = response.headers['Operation-Location']
            # print(result_url)

            if not result_url:
                print("Error: Operation-Location not found in response headers")
                return None

            # 상태 확인 및 결과 가져오기
            while True:
                result_response = requests.get(result_url, headers=headers)
                result_data = result_response.json()
                result_status = result_data.get("status", "")

                if result_status == "succeeded":
                    break
                elif result_status in ["failed", "canceled"]:
                    print(f"Processing failed: {result_data}")
                    return None
                time.sleep(2)  # 2초 대기 후 다시 요청

            # 최종 결과 출력
            try:
                entity_list = result_data["tasks"]["items"][0]["results"]["documents"][0]["entities"]
                return entity_list
            except KeyError:
                print("Error: Unexpected response format")
                return None

# 응답 출력
# print(response.status_code)
entities = Custom_Named_Entity_Recognition("The loan amount given by lender to borrower is eight hundred eighty-six thousand seven hundred fifty-seven Dollars ($886,757.00)")
print(entities)


[{'text': 'eight hundred eighty-six thousand seven hundred fifty-seven Dollars', 'category': 'LoanAmountWords', 'offset': 47, 'length': 67, 'confidenceScore': 1.0}, {'text': '$886,757.00', 'category': 'LoanAmountNumbers', 'offset': 116, 'length': 11, 'confidenceScore': 1.0}]


#### 8. Gradio로 화면 구성하기 - 라디오 활용

In [None]:
def change_radio(radio_type):
    return radio_type

def click_send(radio_type, text):
    print(radio_type, text)

    if radio_type == "LanuageDetection":
        result_response = request_language_detection(text)
    elif radio_type == "NER":
        result_response = request_ner(text)
    elif radio_type == "PII":
        result_response = request_pii(text)
    else:
        result_response = ""
    return result_response
        

with gr.Blocks() as demo:
    type_choice_list = ["LanuageDetection", "NER", "PII"]
    
    type_radio = gr.Radio(label="타입선택", choices=type_choice_list, value="LanuageDetection")
    input_textbox = gr.Textbox(label="입력 데이터", lines=10)
    send_button = gr.Button("전송")
    output_textbox = gr.Textbox(label="결과 출력", lines=10)

    type_radio.change(change_radio, inputs=[type_radio], outputs=[type_radio])
    send_button.click(click_send, inputs=[type_radio, input_textbox], outputs=[output_textbox])

demo.launch()

* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.




LanuageDetection Canada’s draft deal to participate in Europe’s defense industry will bring contracts to Canadian manufacturers and help lessen dependence on the United States.
NER Canada’s draft deal to participate in Europe’s defense industry will bring contracts to Canadian manufacturers and help lessen dependence on the United States.
PII Canada’s draft deal to participate in Europe’s defense industry will bring contracts to Canadian manufacturers and help lessen dependence on the United States.
