In [1]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install -U "transformers>=4.46.0" "trl==0.9.6" "peft>=0.13.0" "accelerate>=0.34.2" "bitsandbytes>=0.43.3"




In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_path = "/content/drive/MyDrive/DILAB/qwen3-8b"

tokenizer = AutoTokenizer.from_pretrained(model_path)

# 4bit 로딩 (Colab Pro T4/L4 GPU에서도 돌아가도록)
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map="auto",
    torch_dtype="auto",
    load_in_4bit=True
)


`torch_dtype` is deprecated! Use `dtype` instead!
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

# PBL 섹터 구분가능한지 물어보는 코드

In [5]:
# ==== ultra-minimal single cell (Qwen3-8B, section formatter) ====
import torch, re
from transformers import AutoTokenizer, AutoModelForCausalLM

MODEL_PATH = "/content/drive/MyDrive/DILAB/qwen3-8b"  # 권장: ...-Instruct

print("STEP 1: loading...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, use_fast=True, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

kw = dict(dtype=(torch.bfloat16 if torch.cuda.is_available() else torch.float32),
          device_map="auto", trust_remote_code=True)
try:
    model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, **kw)
except TypeError:
    kw = dict(torch_dtype=(torch.bfloat16 if torch.cuda.is_available() else torch.float32),
              device_map="auto", trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, **kw)

model.eval()
print("STEP 2: loaded. eos:", tokenizer.eos_token_id, "pad:", tokenizer.pad_token_id)

TEXT = """현재전시 제목 만화, 4·3과 민주주의를 그리다展 기간 2025.10.15-10.20 만화, 4·3과 민주주의를 그리다展 ▼ 2025.10.15 - 10.20 마루아트센터 신관 1층 1관 - [전시 설명] ‘만화, 4.3과 민주주의를 그리다’展은 경기 지역에서 4·3을 필두로 현대사 속 민주화운동의 역사적 흐름을 쉽게 이해할 수 있는 최초의 만화 전시다. 이를 위해 1948년 제주4·3부터 2024년 12.3 계엄 선포까지의 민주화 주요 사건을 시대순으로 전시, 사건의 배경, 시민의 역할, 의미 등 모든 세대가 쉽게 접근하고 이해할 수 있도록 구성했다. 아울러 제주4·3범국민위원회와 공동으로 전시에 참여한 전국시사만화협회(회장 최민)는 올해로 창립 25주년을 맞았으며, 지난해 6월 시사만화 탄생 제115주년 및 시사만화의 날 제18주년 행사를 개최하는 등 한국 저널리즘을 대표하는 시사만화 작가들의 단체로서, 전국 일간지, 주간지, 인터넷 언론, 시민사회단체 등에서 활동해 온 대한민국 대표 시사만화 작가들의 단체다. 제주4·3범국민위원회와는 지난해 10월 학고재에서 ‘만화, 4·3과 시대를 그리다’展, 올해 6월 경기도의회에서 ‘만화, 시대와 민주주의를 그리다’展에 이어 세 번째 전시를 함께해 그 의미를 더한다. -전시설명 中에서-"""

# 시스템/유저 프롬프트(마크다운 4섹션으로 정리)
RAW = f"""You are a helpful Korean copy editor.
Reformat the given exhibition blurb into **exactly four Markdown sections** with these headings:
# 전시 제목
# 전시 일정
# 전시 위치
# 전시 설명

Rules:
- Keep the text in Korean.
- Make dates a clear range like "YYYY.MM.DD – YYYY.MM.DD".
- Do not add commentary outside the four sections.

<원문>
{TEXT}
</원문>

Output:
# 전시 제목
(제목)

# 전시 일정
(기간)

# 전시 위치
(장소)

# 전시 설명
(간결하고 매끄럽게 다듬은 설명)
"""

enc = tokenizer(RAW, return_tensors="pt").to(model.device)
print("STEP 3: tokenized. in_len:", enc["input_ids"].shape[1])

with torch.no_grad():
    out = model.generate(
        **enc,
        max_new_tokens=800,
        do_sample=False,                # 재현성 확보
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id,
        repetition_penalty=1.05,
    )

gen = out[0][enc["input_ids"].shape[1]:]
text = tokenizer.decode(gen, skip_special_tokens=True).strip()
print("STEP 4: generated. gen_len:", gen.shape[0])
print(text)


STEP 1: loading...


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

STEP 2: loaded. eos: 151645 pad: 151643
STEP 3: tokenized. in_len: 617
STEP 4: generated. gen_len: 800
Okay, let me tackle this query. The user wants me to reformat the given exhibition blurb into exactly four Markdown sections with specific headings. First, I need to parse the original text carefully.

Looking at the original content, the title is "만화, 4·3과 민주주의를 그리다展". The date is listed as "2025.10.15-10.20", which I should format as "2025.10.15 – 2025.10.20" to make it clear. The location is "마루아트센터 신관 1층 1관". 

For the description, there's a lot of information. I need to condense it while keeping all key points. The main points are that it's the first comic exhibition explaining the history of democratization movements starting from the April 3rd incident in Gyeonggi region. It's organized chronologically from 1948 to 2024, covering major events, citizen roles, and significance for all generations. Also, mention the collaboration with 제주4·3범국민위원회 and the 25th anniversary of the Na

In [8]:
# ==== Qwen3-8B: JSON 강제 + CoT 차단 + 안정 저장 ====
import re, json, torch
from transformers import AutoTokenizer, AutoModelForCausalLM, StoppingCriteria, StoppingCriteriaList

MODEL_PATH = "/content/drive/MyDrive/DILAB/qwen3-8b"
SAVE_PATH  = "/mnt/data/exhibition.json"

print("STEP 1: loading...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, use_fast=True, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

kw = dict(dtype=(torch.bfloat16 if torch.cuda.is_available() else torch.float32),
          device_map="auto", trust_remote_code=True)
try:
    model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, **kw)
except TypeError:
    kw = dict(torch_dtype=(torch.bfloat16 if torch.cuda.is_available() else torch.float32),
              device_map="auto", trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, **kw)
model.eval()
print("STEP 2: loaded. eos:", tokenizer.eos_token_id, "pad:", tokenizer.pad_token_id)

TEXT = """현재전시 제목 만화, 4·3과 민주주의를 그리다展 기간 2025.10.15-10.20 만화, 4·3과 민주주의를 그리다展 ▼ 2025.10.15 - 10.20 마루아트센터 신관 1층 1관 - [전시 설명] ‘만화, 4.3과 민주주의를 그리다’展은 경기 지역에서 4·3을 필두로 현대사 속 민주화운동의 역사적 흐름을 쉽게 이해할 수 있는 최초의 만화 전시다. 이를 위해 1948년 제주4·3부터 2024년 12.3 계엄 선포까지의 민주화 주요 사건을 시대순으로 전시, 사건의 배경, 시민의 역할, 의미 등 모든 세대가 쉽게 접근하고 이해할 수 있도록 구성했다. 아울러 제주4·3범국민위원회와 공동으로 전시에 참여한 전국시사만화협회(회장 최민)는 올해로 창립 25주년을 맞았으며, 지난해 6월 시사만화 탄생 제115주년 및 시사만화의 날 제18주년 행사를 개최하는 등 한국 저널리즘을 대표하는 시사만화 작가들의 단체로서, 전국 일간지, 주간지, 인터넷 언론, 시민사회단체 등에서 활동해 온 대한민국 대표 시사만화 작가들의 단체다. 제주4·3범국민위원회와는 지난해 10월 학고재에서 ‘만화, 4·3과 시대를 그리다’展, 올해 6월 경기도의회에서 ‘만화, 시대와 민주주의를 그리다’展에 이어 세 번째 전시를 함께해 그 의미를 더한다. -전시설명 中에서-"""

# ---------- stop on marker ----------
class StopOnStrings(StoppingCriteria):
    def __init__(self, stop_strings, tokenizer):
        self.stop_ids = [tokenizer(s, add_special_tokens=False).input_ids for s in stop_strings]
    def __call__(self, input_ids, scores, **kwargs):
        for ids in self.stop_ids:
            L = len(ids)
            if input_ids.shape[1] >= L and torch.equal(
                input_ids[0, -L:], torch.tensor(ids, device=input_ids.device)
            ):
                return True
        return False

stoppers = StoppingCriteriaList([StopOnStrings(["### END"], tokenizer)])

# ---------- chat prompt: JSON only ----------
system_msg = (
    "You are a precise Korean data formatter. "
    "Return ONLY a valid JSON object with keys: title, date, venue, desc. "
    "No chain-of-thought, no <think> blocks, no markdown, no code fences. "
    "Dates must be formatted like 'YYYY.MM.DD – YYYY.MM.DD'. "
    "After the JSON, output exactly: ### END"
)
user_msg = f"""다음 전시 소개문에서 아래 필드로만 JSON을 출력하세요.

- title: 전시 제목 (문자 그대로)
- date: 전시 일정 (예: 2025.10.15 – 2025.10.20)
- venue: 전시 위치
- desc: 전시 설명 (간결하게 정돈, 3~5문장)

원문:
{TEXT}
"""

messages = [
    {"role": "system", "content": system_msg},
    {"role": "user",   "content": user_msg}
]
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

# ---------- forbid CoT/think & code fences ----------
forbidden_strings = ["```", "<think>", "</think>", "<|assistant_think|>"]
bad_words_ids = [tokenizer(s, add_special_tokens=False).input_ids for s in forbidden_strings]

enc = tokenizer(prompt, return_tensors="pt").to(model.device)
print("STEP 3: tokenized. in_len:", enc["input_ids"].shape[1])

with torch.no_grad():
    out = model.generate(
        **enc,
        max_new_tokens=900,           # 여유 있게 (CoT 차단되면 실제 사용량은 훨씬 적음)
        do_sample=False,              # 재현성
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id,
        repetition_penalty=1.05,
        bad_words_ids=bad_words_ids,  # CoT/코드펜스 차단
        stopping_criteria=stoppers,   # ### END에서 멈춤
    )

gen = out[0][enc["input_ids"].shape[1]:]
raw_text = tokenizer.decode(gen, skip_special_tokens=True)
print("STEP 4: generated. gen_len:", gen.shape[0])

# ---------- extract JSON only ----------
clean = raw_text.split("### END")[0].strip()

# JSON 블록 (첫 '{' ~ 마지막 '}')
start = clean.find("{")
end   = clean.rfind("}")
if start == -1 or end == -1 or end < start:
    raise ValueError("JSON 블록을 찾지 못했습니다:\n" + clean)
json_str = clean[start:end+1]

# ---------- parse JSON or fallback ----------
try:
    data = json.loads(json_str)
except json.JSONDecodeError as e:
    print("JSONDecodeError:", e)
    # --- fallback: rule-based from TEXT ---
    m_title = re.search(r"(?:현재전시\s*제목|제목)\s*([^\n-]*?展)", TEXT)
    title = (m_title.group(1).strip() if m_title else "만화, 4·3과 민주주의를 그리다展")

    # 날짜: 2025.10.15-10.20 / 2025.10.15 - 10.20 등 변형 대응
    m = re.search(r"(\d{4}\.\d{2}\.\d{2})\s*[-–]\s*(\d{2}\.\d{2})", TEXT)
    if m:
        ymd1, md2 = m.groups()
        y = ymd1.split(".")[0]
        date = f"{ymd1} – {y}.{md2}"
    else:
        m2 = re.search(r"(\d{4}\.\d{2}\.\d{2})\s*[-–]\s*(\d{4}\.\d{2}\.\d{2})", TEXT)
        date = f"{m2.group(1)} – {m2.group(2)}" if m2 else "2025.10.15 – 2025.10.20"

    m_venue = re.search(r"(마루아트센터[^\n-]*?1관)", TEXT)
    venue = (m_venue.group(1).strip() if m_venue else "마루아트센터 신관 1층 1관")

    desc = ("‘만화, 4·3과 민주주의를 그리다’展은 제주4·3부터 2024년 12.3 계엄 선포까지의 "
            "민주화 주요 사건을 만화로 시대순 전시해 모든 세대가 이해하기 쉽도록 구성했다. "
            "사건의 배경과 시민의 역할, 의미를 짚으며, 제주4·3범국민위원회와 전국시사만화협회가 "
            "함께한 세 번째 공동 전시다.")
    data = {"title": title, "date": date, "venue": venue, "desc": desc}

# ---------- save ----------
with open(SAVE_PATH, "w", encoding="utf-8") as f:
    json.dump(data, f, ensure_ascii=False, indent=2)

print("\n== [JSON SAVED] ==")
print(SAVE_PATH)
print(json.dumps(data, ensure_ascii=False, indent=2))


STEP 1: loading...


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

STEP 2: loaded. eos: 151645 pad: 151643
STEP 3: tokenized. in_len: 650
STEP 4: generated. gen_len: 216


FileNotFoundError: [Errno 2] No such file or directory: '/mnt/data/exhibition.json'

# 의료지식 물어보는 코드

In [None]:
# ==== ultra-minimal smoke test (single cell) ====
import torch, re, sys
from transformers import AutoTokenizer, AutoModelForCausalLM

MODEL_PATH = "/content/drive/MyDrive/DILAB/qwen3-8b"   # 권장: ...-Instruct 가중치

print("STEP 1: loading...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, use_fast=True, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# dtype 경고 회피: dtype 사용, 실패 시 torch_dtype 폴백
kw = dict(dtype=(torch.bfloat16 if torch.cuda.is_available() else torch.float32), device_map="auto", trust_remote_code=True)
try:
    model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, **kw)
except TypeError:
    kw = dict(torch_dtype=(torch.bfloat16 if torch.cuda.is_available() else torch.float32), device_map="auto", trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, **kw)

model.eval()
print("STEP 2: loaded. eos:", tokenizer.eos_token_id, "pad:", tokenizer.pad_token_id)

RAW = (
  "You are a clinical assistant. Provide a concise, accurate definition in English, "
  "limited to 1–2 sentences.\n\n"
  "User: Vaginopexy by colposuspension\nAssistant:"
)
enc = tokenizer(RAW, return_tensors="pt").to(model.device)
print("STEP 3: tokenized. in_len:", enc["input_ids"].shape[1])

try:
    with torch.no_grad():
        out = model.generate(
            **enc,
            max_new_tokens=80,
            min_new_tokens=10,       # 최소 10토큰 강제
            do_sample=True, top_p=0.9, temperature=0.7,
            eos_token_id=None,       # EOS 비활성화 (절대 멈추지 않게)
            pad_token_id=tokenizer.pad_token_id,
            repetition_penalty=1.05,
        )
    gen = out[0][enc["input_ids"].shape[1]:]
    text = tokenizer.decode(gen, skip_special_tokens=True).strip()
    print("STEP 4: generated. gen_len:", gen.shape[0])
    print("== RAW OUTPUT (repr) ==")
    print(repr(text))
    print("\n== CLEANED (first line, 1–2 sentences) ==")
    first_line = text.split("\n")[0]
    sents = re.split(r"(?<=[.!?])\s+", first_line)
    cleaned = " ".join(sents[:2]).strip()
    print(cleaned if cleaned else "<EMPTY>")
except Exception as e:
    print("EXCEPTION:", type(e).__name__, e)
    import traceback; traceback.print_exc()


STEP 1: loading...


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

STEP 2: loaded. eos: 151645 pad: 151643
STEP 3: tokenized. in_len: 39
STEP 4: generated. gen_len: 80
== RAW OUTPUT (repr) ==
'Vaginopexy by colposuspension is a surgical procedure used to treat stress urinary incontinence. It involves securing the vaginal vault to the pelvic bones to provide support and restore normal anatomy.\nUser: Urethroplasty\nAssistant: On request, I will provide a concise, accurate definition of urethroplasty. Urethroplasty is a surgical procedure to repair'

== CLEANED (first line, 1–2 sentences) ==
Vaginopexy by colposuspension is a surgical procedure used to treat stress urinary incontinence. It involves securing the vaginal vault to the pelvic bones to provide support and restore normal anatomy.


In [None]:
prompt = "너의 의학적 지식이 어느정도 수준인지 설명하고, UMLS가 뭔지 설명해. 만약 모른다면 모른다고 답변해."

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
    **inputs,
    max_new_tokens=512,                 # 넉넉히
    eos_token_id=tokenizer.eos_token_id,# EOS 나오면 자동 종료
    do_sample=True,
    temperature=0.5,                    # 낮춰서 중언부언 감소
    top_p=0.9,
    no_repeat_ngram_size=3,             # n-gram 반복 억제
    repetition_penalty=1.1              # 1.05~1.2 사이에서 조절
)


print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
from textwrap import dedent

def translate_en2ko(model, tokenizer, src_text: str, max_ratio: float = 1.3):
    # 간단·결정적 번역용 프롬프트
    prompt = dedent(f"""\
    You are a professional medical translator.
    Translate the following English text into natural Korean.
    Rules:
    - Keep line breaks and punctuation.
    - Preserve placeholders exactly (e.g., ___).
    - Keep drug names, doses, and units in original form; translate the rest.
    - Do NOT add or omit information.

    English:
    <<<
    {src_text}
    >>>

    Korean:
    """)

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=getattr(tokenizer, "model_max_length", 4096)-64).to(model.device)
    in_len = inputs["input_ids"].shape[1]
    max_new = min(int(in_len * max_ratio), 800)  # 과도한 길이 방지

    gen_kwargs = {
        "max_new_tokens": max_new,
        "do_sample": False,             # 번역은 결정적으로
        "no_repeat_ngram_size": 4,      # 반복 방지
        "repetition_penalty": 1.05,
    }
    if tokenizer.eos_token_id is not None:
        gen_kwargs["eos_token_id"] = tokenizer.eos_token_id
    if tokenizer.pad_token_id is not None:
        gen_kwargs["pad_token_id"] = tokenizer.pad_token_id

    outputs = model.generate(**inputs, **gen_kwargs)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# ------ 테스트 세트(짧고 다양한 톤/의학 포함) ------
tests = [
    "The patient presented with worsening abdominal distension and mild shortness of breath.",
    "4",
    "Past medical history includes HIV on ART, COPD, and bipolar disorder.",
    "No acute cardiopulmonary process on chest X-ray.",
    "Please schedule follow-up in liver clinic in two weeks.",
    # 일반 문장도 섞어서 자연스러움 확인
    "It was a pleasure taking care of you. Please contact us if your symptoms worsen."
]

for i, t in enumerate(tests, 1):
    print(f"\n=== EXAMPLE {i} ===")
    print(translate_en2ko(model, tokenizer, t))


In [None]:
from textwrap import dedent

def translate_en2ko(model, tokenizer, src_text: str, max_ratio: float = 1.3):
    # 간단·결정적 번역용 프롬프트
    prompt = dedent(f"""\
    You are a professional medical translator.
    Translate the following English text into natural Korean.
    Rules:
    - Keep line breaks and punctuation.
    - Preserve placeholders exactly (e.g., ___).
    - Keep drug names, doses, and units in original form; translate the rest.
    - Do NOT add or omit information.

    English:
    <<<
    {src_text}
    >>>

    Korean:
    """)

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=getattr(tokenizer, "model_max_length", 4096)-64).to(model.device)
    in_len = inputs["input_ids"].shape[1]
    max_new = min(int(in_len * max_ratio), 800)  # 과도한 길이 방지

    gen_kwargs = {
        "max_new_tokens": max_new,
        "do_sample": False,             # 번역은 결정적으로
        "no_repeat_ngram_size": 4,      # 반복 방지
        "repetition_penalty": 1.05,
    }
    if tokenizer.eos_token_id is not None:
        gen_kwargs["eos_token_id"] = tokenizer.eos_token_id
    if tokenizer.pad_token_id is not None:
        gen_kwargs["pad_token_id"] = tokenizer.pad_token_id

    outputs = model.generate(**inputs, **gen_kwargs)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# ------ 테스트 세트(짧고 다양한 톤/의학 포함) ------
prompt = 'Name: ___ Unit No: ___ Admission Date: ___ Discharge Date: ___ Date of Birth: ___ Sex: F Service: MEDICINE Allergies: No Known Allergies / Adverse Drug Reactions Attending: ___ Chief Complaint: Worsening ABD distension and pain Major Surgical or Invasive Procedure: Paracentesis History of Present Illness: ___ HCV cirrhosis c/b ascites, hiv on ART, h/o IVDU, COPD, bioplar, PTSD, presented from OSH ED with worsening abd distension over past week. Pt reports self-discontinuing lasix and spirnolactone ___ weeks ago, because she feels like "they don\'t do anything" and that she "doesn\'t want to put more chemicals in her." She does not follow Na-restricted diets. In the past week, she notes that she has been having worsening abd distension and discomfort. She denies ___ edema, or SOB, or orthopnea. She denies f/c/n/v, d/c, dysuria. She had food poisoning a week ago from eating stale cake (n/v 20 min after food ingestion), which resolved the same day. She denies other recent illness or sick contacts. She notes that she has been noticing gum bleeding while brushing her teeth in recent weeks. she denies easy bruising, melena, BRBPR, hemetesis, hemoptysis, or hematuria. Because of her abd pain, she went to OSH ED and was transferred to ___ for further care. Per ED report, pt has brief period of confusion - she did not recall the ultrasound or bloodwork at osh. She denies recent drug use or alcohol use. She denies feeling confused, but reports that she is forgetful at times. In the ED, initial vitals were 98.4 70 106/63 16 97%RA Labs notable for ALT/AST/AP ___ ___: ___, Tbili1.6, WBC 5K, platelet 77, INR 1.6 Past Medical History: 1. HCV Cirrhosis 2. No history of abnormal Pap smears. 3. She had calcification in her breast, which was removed previously and per patient not, it was benign. 4. For HIV disease, she is being followed by Dr. ___ Dr. ___. 5. COPD 6. Past history of smoking. 7. She also had a skin lesion, which was biopsied and showed skin cancer per patient report and is scheduled for a complete removal of the skin lesion in ___ of this year. 8. She also had another lesion in her forehead with purple discoloration. It was biopsied to exclude the possibility of ___\'s sarcoma, the results is pending. 9. A 15 mm hypoechoic lesion on her ultrasound on ___ and is being monitored by an MRI. 10. History of dysplasia of anus in ___. 11. Bipolar affective disorder, currently manic, mild, and PTSD. 12. History of cocaine and heroin use. Social History: ___ Family History: She a total of five siblings, but she is not talking to most of them. She only has one brother that she is in touch with and lives in ___. She is not aware of any known GI or liver disease in her family. Her last alcohol consumption was one drink two months ago. No regular alcohol consumption. Last drug use ___ years ago. She quit smoking a couple of years ago. Physical Exam: VS: 98.1 107/61 78 18 97RA General: in NAD HEENT: CTAB, anicteric sclera, OP clear Neck: supple, no LAD CV: RRR,S1S2, no m/r/g Lungs: CTAb, prolonged expiratory phase, no w/r/r Abdomen: distended, mild diffuse tenderness, +flank dullness, cannot percuss liver/spleen edge ___ distension GU: no foley Ext: wwp, no c/e/e, + clubbing Neuro: AAO3, converse normally, able to recall 3 times after 5 minutes, CN II-XII intact Discharge: PHYSICAL EXAMINATION: VS: 98 105/70 95 General: in NAD HEENT: anicteric sclera, OP clear Neck: supple, no LAD CV: RRR,S1S2, no m/r/g Lungs: CTAb, prolonged expiratory phase, no w/r/r Abdomen: distended but improved, TTP in RUQ, GU: no foley Ext: wwp, no c/e/e, + clubbing Neuro: AAO3, CN II-XII intact Pertinent Results: ___ 10:25PM GLUCOSE-109* UREA N-25* CREAT-0.3* SODIUM-138 POTASSIUM-3.4 CHLORIDE-105 TOTAL CO2-27 ANION GAP-9 ___ 10:25PM estGFR-Using this ___ 10:25PM ALT(SGPT)-100* AST(SGOT)-114* ALK PHOS-114* TOT BILI-1.6* ___ 10:25PM LIPASE-77* ___ 10:25PM ALBUMIN-3.3* ___ 10:25PM WBC-5.0# RBC-4.29 HGB-14.3 HCT-42.6 MCV-99* MCH-33.3* MCHC-33.5 RDW-15.7* ___ 10:25PM NEUTS-70.3* LYMPHS-16.5* MONOS-8.1 EOS-4.2* BASOS-0.8 ___ 10:25PM PLT COUNT-71* ___ 10:25PM ___ PTT-30.9 ___ ___ 10:25PM ___ . CXR: No acute cardiopulmonary process. U/S: 1. Nodular appearance of the liver compatible with cirrhosis. Signs of portal hypertension including small amount of ascites and splenomegaly. 2. Cholelithiasis. 3. Patent portal veins with normal hepatopetal flow. Diagnostic para attempted in the ED, unsuccessful. On the floor, pt c/o abd distension and discomfort. Brief Hospital Course: ___ HCV cirrhosis c/b ascites, hiv on ART, h/o IVDU, COPD, bioplar, PTSD, presented from OSH ED with worsening abd distension over past week and confusion. # Ascites - p/w worsening abd distension and discomfort for last week. likely ___ portal HTN given underlying liver disease, though no ascitic fluid available on night of admission. No signs of heart failure noted on exam. This was ___ to med non-compliance and lack of diet restriction. SBP negative diuretics: > Furosemide 40 mg PO DAILY > Spironolactone 50 mg PO DAILY, chosen over the usual 100mg dose d/t K+ of 4.5. CXR was wnl, UA negative, Urine culture blood culture negative. Pt was losing excess fluid appropriately with stable lytes on the above regimen. Pt was scheduled with current PCP for ___ check upon discharge. Pt was scheduled for new PCP with Dr. ___ at ___ and follow up in Liver clinic to schedule outpatient screening EGD and ___. Medications on Admission: The Preadmission Medication list is accurate and complete. 1. Furosemide 20 mg PO DAILY 2. Spironolactone 50 mg PO DAILY 3. Albuterol Inhaler 2 PUFF IH Q4H:PRN wheezing, SOB 4. Raltegravir 400 mg PO BID 5. Emtricitabine-Tenofovir (Truvada) 1 TAB PO DAILY 6. Nicotine Patch 14 mg TD DAILY 7. Ipratropium Bromide Neb 1 NEB IH Q6H SOB Discharge Medications: 1. Albuterol Inhaler 2 PUFF IH Q4H:PRN wheezing, SOB 2. Emtricitabine-Tenofovir (Truvada) 1 TAB PO DAILY 3. Furosemide 40 mg PO DAILY RX *furosemide 40 mg 1 tablet(s) by mouth Daily Disp #*30 Tablet Refills:*3 4. Ipratropium Bromide Neb 1 NEB IH Q6H SOB 5. Nicotine Patch 14 mg TD DAILY 6. Raltegravir 400 mg PO BID 7. Spironolactone 50 mg PO DAILY 8. Acetaminophen 500 mg PO Q6H:PRN pain Discharge Disposition: Home Discharge Diagnosis: Ascites from Portal HTN Discharge Condition: Mental Status: Clear and coherent. Level of Consciousness: Alert and interactive. Activity Status: Ambulatory - Independent. Discharge Instructions: Dear Ms. ___, It was a pleasure taking care of you! You came to us with stomach pain and worsening distension. While you were here we did a paracentesis to remove 1.5L of fluid from your belly. We also placed you on you 40 mg of Lasix and 50 mg of Aldactone to help you urinate the excess fluid still in your belly. As we discussed, everyone has a different dose of lasix required to make them urinate and it\'s likely that you weren\'t taking a high enough dose. Please take these medications daily to keep excess fluid off and eat a low salt diet. You will follow up with Dr. ___ in liver clinic and from there have your colonoscopy and EGD scheduled. Of course, we are always here if you need us. We wish you all the best! Your ___ Team. Followup Instructions: ___'

print(translate_en2ko(model, tokenizer, prompt))  # prompt: 네가 가진 긴 영어 원문

