# Import and Asset Define

In [1]:
import os 
import json
import requests
import pandas as pd 
from sentence_transformers import SentenceTransformer
import faiss
from tqdm import tqdm
from dotenv import load_dotenv
import base64
from openai import OpenAI
from fpdf import FPDF
import textwrap
from src.extraction import *
# .env 파일 로드
load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

In [2]:
PROGRAM_PATH = os.path.abspath(os.getcwd())
ASSETS_PATH = os.path.join(PROGRAM_PATH, "assets")
UPSTAGE_API_KEY = os.getenv("UPSTAGE_API_KEY")
client = OpenAI(
    base_url="https://api.upstage.ai/v1/information-extraction",
    api_key=UPSTAGE_API_KEY
)
MIME_TYPE = 'application/pdf'
# 모델 로드
model_name = "sentence-transformers/all-MiniLM-L12-v2"
model = SentenceTransformer(model_name)
RESPONSE_FORMAT = {
        "type": "json_schema",
        "json_schema": {
            "name": "document_schema",
            "schema": {
                "type": "object",
                "properties": {
                    "procedure": {
                        "type": "string",
                        "description": "Type of imaging study performed."
                    },
                    "finding": {
                        "type": "string",
                        "description": "Clinical observations or results identified during examination or testing."
                    },
                    "body_structure": {
                        "type": "string",
                        "description": "body part examined"
                    },
                    "disorder": {
                        "type": "string",
                        "description": "Diseases or pathological conditions diagnosed or suspected in a patient. Usually find in impression of Radiology text"
                    },
                    "morphologic_abnormality": {
                        "type": "string",
                        "description": "Structural abnormalities or deviations in tissue or organ morphology."
                    },
                    "regime_therapy": {
                        "type": "string",
                        "description": "Planned treatments or therapeutic programs administered to a patient."
                    },
                    "cell_structure": {
                        "type": "string",
                        "description": "Microscopic structural components found within cells or tissues."
                    }
                },
                "required": [
                    "procedure",
                    "finding",
                    "body_structure",
                    "disorder",
                    "morphologic_abnormality",
                    "regime_therapy",
                    "cell_structure"
                ]
            }
        }
    }

def message_formatter(coded):
    return ([
        {
            "role": "user",
            "content": [
                {
                    "type": "image_url",
                    "image_url": {"url": f"data:{MIME_TYPE};base64,{coded}"},
                },
            ],
        }
    ])


In [3]:
def extraction_and_mapping(text_input):
    #file 형식 맞추기
    pdf_path = text_to_pdf(text_input)
    base64_encoded = encode_to_base64(pdf_path)
    
    # Information Extraction
    extraction_response = client.chat.completions.create(
        model="information-extract",
        messages=message_formatter(base64_encoded),
        response_format=RESPONSE_FORMAT,
    )
    output_json = json.loads(extraction_response.choices[0].message.content)
    df = mapping(output_json, ASSETS_PATH, model)
    return df

In [4]:
import gradio as gr

demo = gr.Interface(
    fn=extraction_and_mapping,
    inputs=gr.Textbox(
        label="Input Text (예: 판독문 등)",
        lines=15,
        placeholder="여기에 Radiology 판독문 입력..."
    ),
    outputs=gr.Dataframe(
        headers=["Hierarchy", "Input Description", "Match #", "Concept Name", "Concept ID"],
        label="SNOMED 매핑 결과",
        wrap=True
    ),
    title="🧠 SNOMED CT Term Mapping Assistant",
    description="자연어 판독문을 SNOMED CT 개념으로 자동 매핑합니다. Upstage Information Extraction API 기반"
)

demo.launch()


* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




# Text Input

In [None]:
# 사용자 입력
text = """Chest PA-Lat XR
 Imaging Study
 Xray Chest PA and Lateral
 Exam: 2 views of the chest XXXX/XXXX.
 Comparison: None.
 Indication: Positive TB test
 Findings:
 The cardiac silhouette and mediastinum size are within normal limits.
 There is no pulmonary edema. There is no focal consolidation. There
 are no XXXX of a pleural effusion. There is no evidence of
 pneumothorax.
 Impression:
 Normal chest x-XXXX.
 This examination and reported findings have been reviewed and
 confirmed by the undersigned"""