<a href="https://colab.research.google.com/github/finnchoi72/Textmind/blob/main/TextMind.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ⬛ 셀 0 (런타임 재시작 후 가장 먼저)
pkgs="tensorflow jax jaxlib pymc fastai treescope blosc2 torchvision"
!pip uninstall -y $pkgs >/dev/null 2>&1 && echo "🧹 불필요 패키지 제거 완료"

# ⬛ 셀 1 – 핵심 패키지 설치(기존 수정본)
!pip -q install "torch==2.2.2+cu121" --index-url https://download.pytorch.org/whl/cu121
!pip -q install "transformers>=4.41.0" sentencepiece
!pip -q install "numpy==1.24.4" "spacy==3.7.2" "thinc==8.2.2"
!python -m spacy download ko_core_news_sm


🧹 불필요 패키지 제거 완료
Collecting ko-core-news-sm==3.7.0
  Downloading https://github.com/explosion/spacy-models/releases/download/ko_core_news_sm-3.7.0/ko_core_news_sm-3.7.0-py3-none-any.whl (14.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.7/14.7 MB[0m [31m97.1 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('ko_core_news_sm')


In [2]:
# ⬛ 셀 2 : 태그 목록
TAGS = [
    "depressed_mood","anhedonia","irritability","elevated_mood","grandiosity",
    "rapid_switching","persecutory_paranoia","reference_ideas","auditory_hallucination",
    "visual_hallucination","thought_disorganization","obsessive_thoughts",
    "aggression_violence","self_harm","reckless_behavior","compulsive_ritual",
    "insomnia","hypersomnia","panic_attacks","hypervigilance","avoidance_behavior",
    "flashbacks","emotional_numbing","dissociation","abandonment_fear",
    "unstable_relationships","manipulative_behavior","identity_disturbance",
    "somatic_complaints","substance_abuse"
]

# ⬛ 셀 3 : 증상→질환 가중치 매핑 (단순 예시)
SYM2DX = {
  "depressed_mood": {"MDD": 2, "BIPOLAR": 1},
  "anhedonia":      {"MDD": 2},
  "elevated_mood":  {"BIPOLAR": 3},
  "grandiosity":    {"BIPOLAR": 2, "NPD": 3},
  "persecutory_paranoia": {"SCHIZO": 3, "PPD": 2},
  "aggression_violence": {"ASPD": 2, "BPD": 1},
  # … 필요한 만큼 추가
}
DISEASES = ["MDD","BIPOLAR","PTSD","SCHIZO","ASPD","BPD","PPD","NPD"]

In [3]:
# ⬛ 셀 4
import re, spacy, torch, warnings, json
from collections import Counter
from transformers import pipeline

nlp = spacy.load("ko_core_news_sm")
warnings.filterwarnings("ignore")

RULES = {
    "depressed_mood": [r"절망", r"우울", r"슬픔"],
    "irritability":   [r"버럭", r"화를.*냈"],
    "aggression_violence": [r"폭행|주먹|부숴"],
    # ... 필요한 만큼 추가
}

# Zero-shot 분류기 (영어·한글 모두 어느 정도 인식)
classifier = pipeline(
    "zero-shot-classification",
    model="facebook/bart-large-mnli",
    device=0 if torch.cuda.is_available() else -1
)

def tags_by_rule(text):
    found=[]
    for tag, patterns in RULES.items():
        if any(re.search(p, text) for p in patterns):
            found.append(tag)
    return found

def tags_by_zeroshoot(text, threshold=0.4):
    out = classifier(text, TAGS, multi_label=True)
    return [l for l,s in zip(out["labels"], out["scores"]) if s>=threshold]

def extract_tags(sentence):
    t = tags_by_rule(sentence)
    if not t:                      # 규칙 못 잡으면 zero-shot 호출
        t = tags_by_zeroshoot(sentence)
    return t

config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu


In [4]:
# ⬛ 셀 5
def diagnose(tag_counter:Counter):
    scores = {d:0 for d in DISEASES}
    for tag, cnt in tag_counter.items():
        for d, w in SYM2DX.get(tag, {}).items():
            scores[d] += w*cnt
    top = max(scores.values())
    if top==0:
        return {"message":"증거 부족"}
    return {d: round(v/top,3) for d,v in sorted(scores.items(), key=lambda x:-x[1])}

def analyze_text(text:str):
    tag_counter = Counter()
    annotated=[]
    for sent in nlp(text).sents:
        tags = extract_tags(sent.text)
        if tags:
            annotated.append({"sent":sent.text, "tags":tags})
            tag_counter.update(tags)
    return {"sentences":annotated, "diagnosis":diagnose(tag_counter)}

In [7]:
# ⬛ 셀 6
sample = """그는 사소한 일에도 버럭 소리쳤다.
방을 마구 부숴 버리고 나서도 후회하는 기색이 없었다.
모두가 자신을 음해한다고 중얼거렸다."""
result = analyze_text(sample)
print("=== 진단 결과 ===")
print(result["diagnosis"])
print("\n=== 태그된 문장 ===")
print(json.dumps(result["sentences"], ensure_ascii=False, indent=2))

=== 진단 결과 ===
{'BIPOLAR': 1.0, 'NPD': 0.6, 'ASPD': 0.4, 'BPD': 0.2, 'MDD': 0.0, 'PTSD': 0.0, 'SCHIZO': 0.0, 'PPD': 0.0}

=== 태그된 문장 ===
[
  {
    "sent": "그는 사소한 일에도 버럭 소리쳤다.\n",
    "tags": [
      "irritability"
    ]
  },
  {
    "sent": "방을 마구 부숴 버리고 나서도 후회하는 기색이 없었다.\n",
    "tags": [
      "aggression_violence"
    ]
  },
  {
    "sent": "모두가 자신을 음해한다고 중얼거렸다.",
    "tags": [
      "reference_ideas",
      "dissociation",
      "unstable_relationships",
      "elevated_mood",
      "rapid_switching",
      "visual_hallucination",
      "identity_disturbance",
      "avoidance_behavior",
      "irritability",
      "grandiosity"
    ]
  }
]
