In [16]:
# -*- coding: utf-8 -*-
"""
emovec_autorun_cls.py
- 인자 없이 실행:  python emovec_autorun_cls.py
- DATA_PATH에 있는 JSON/JSONL(샘플과 동일 포맷) 로드
- 감정코드(예: "E18")를 **분류(LogisticRegression, multinomial, class_weight='balanced')**
  로 예측한 뒤, 미리 정의한 EMOTION_VEC 프로토타입으로 4D 벡터로 매핑
- 평가: accuracy / macro-F1 (+ 매핑 후 MAE 참고용)
- 데모 문장 예측 출력
- 모델/리포트 저장

사용 전 수정할 부분:
    DATA_PATH = "YOUR_FULL_DATASET.json"  # 전체 데이터 파일명
    OUT_DIR   = "emovec_autorun_cls_out"  # 산출물 폴더
    USE_SS    = False                     # SS(공감 문장) 포함 여부
"""

from __future__ import annotations
import json, pathlib, sys
from typing import List, Dict
import numpy as np
from joblib import dump
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, f1_score, mean_absolute_error

# ------------------- SETTINGS -------------------
DATA_PATH = "C:/Users/Admin/Documents/GitHub/emotionAI/감성대화말뭉치(최종데이터)_Training.json"     # JSON array or JSONL
OUT_DIR   = "emovec_autorun_cls_out"
USE_SS    = False                        # 기본은 HS만 사용(노이즈 감소)

TEST_TEXTS = [
    "일이 왜 이렇게 끝이 없지? 화나.",
    "요즘 회사 생활이 편하고 좋아.",
    "면접에서 갑자기 예상치 못한 질문이 나와서 당황했어.",
    "친구들은 다 취업했는데 나만 못 해서 불안해.",
]

# ------------------- Emotion code → 4D prototype -------------------
EMOTION_VEC = {
  "E10":[0.32,0.28,0.70,0.15],
  "E11":[0.55,0.45,0.40,0.35],
  "E12":[0.45,0.40,0.55,0.25],
  "E15":[0.28,0.22,0.78,0.12],
  "E16":[0.55,0.45,0.50,0.30],
  "E18":[0.30,0.20,0.80,0.10],
  "E19":[0.35,0.30,0.65,0.18],
  "E20":[0.25,0.30,0.55,0.20],
  "E21":[0.40,0.38,0.52,0.28],
  "E22":[0.22,0.28,0.60,0.18],
  "E23":[0.48,0.40,0.58,0.30],
  "E24":[0.42,0.38,0.56,0.26],
  "E25":[0.33,0.32,0.68,0.18],
  "E26":[0.30,0.34,0.58,0.22],
  "E30":[0.38,0.36,0.60,0.24],
  "E32":[0.34,0.34,0.62,0.22],
  "E33":[0.45,0.35,0.55,0.25],
  "E35":[0.40,0.30,0.60,0.20],
  "E36":[0.44,0.42,0.48,0.30],
  "E37":[0.50,0.50,0.45,0.35],
  "E39":[0.46,0.44,0.46,0.34],
  "E40":[0.28,0.30,0.58,0.22],
  "E42":[0.36,0.32,0.62,0.22],
  "E44":[0.30,0.26,0.70,0.16],
  "E47":[0.35,0.33,0.63,0.20],
  "E49":[0.26,0.28,0.57,0.20],
  "E50":[0.40,0.40,0.50,0.30],
  "E51":[0.52,0.46,0.42,0.32],
  "E52":[0.44,0.40,0.54,0.28],
  "E53":[0.40,0.38,0.52,0.30],
  "E54":[0.36,0.34,0.56,0.28],
  "E55":[0.34,0.36,0.56,0.26],
  "E56":[0.48,0.46,0.48,0.32],
  "E57":[0.42,0.40,0.52,0.30],
  "E58":[0.40,0.38,0.50,0.30],
  "E59":[0.43,0.41,0.49,0.31],
  "E60":[0.62,0.58,0.38,0.42],
  "E62":[0.35,0.35,0.65,0.20],
  "E64":[0.70,0.78,0.30,0.48],
  "E65":[0.76,0.70,0.36,0.44],
  "E66":[0.60,0.58,0.42,0.38],
  "E67":[0.68,0.64,0.40,0.42],
  "E68":[0.80,0.78,0.32,0.46],
  "E69":[0.82,0.80,0.35,0.46]
}
EMO_DEFAULT = [0.5, 0.5, 0.5, 0.5]
KEYS = ["dopamine","serotonin","norepinephrine","melatonin"]

# ------------------- Fallback sample (파일 없을 때) -------------------
SAMPLE_JSON = [
    {"profile":{"emotion":{"type":"E18"}},"talk":{"content":{"HS01":"일은 왜 해도 해도 끝이 없을까? 화가 난다.", "SS01":"많이 힘드시겠어요."}}},
    {"profile":{"emotion":{"type":"E66"}},"talk":{"content":{"HS01":"요즘 직장생활이 너무 편하고 좋은 것 같아!", "SS01":"복지가 좋아서 마음이 편해."}}},
    {"profile":{"emotion":{"type":"E35"}},"talk":{"content":{"HS01":"면접에서 부모님 직업 질문이 나와서 당혹스러웠어.", "SS01":"무척 놀라셨겠어요."}}},
    {"profile":{"emotion":{"type":"E37"}},"talk":{"content":{"HS01":"졸업반이라 취업 걱정은 되지만 너무 불안해하긴 싫어.", "SS01":"느긋한 태도가 낫다고도 생각해."}}},
]

def load_json_any(path: pathlib.Path):
    s = path.read_text(encoding="utf-8").strip()
    if s.startswith("["):
        return json.loads(s)
    rows = []
    for line in s.splitlines():
        line = line.strip()
        if line:
            rows.append(json.loads(line))
    return rows

def flatten_for_classification(items, min_char=3, use_hs=True, use_ss=False):
    X, y = [], []
    for ex in items:
        try:
            emo_type = ex["profile"]["emotion"]["type"]
            content = ex["talk"]["content"]
            for k, v in content.items():
                if not isinstance(v, str):
                    continue
                s = v.strip()
                if not s or len(s) < min_char:
                    continue
                if k.startswith("HS") and not use_hs:
                    continue
                if k.startswith("SS") and not use_ss:
                    continue
                X.append(s); y.append(emo_type)
        except Exception:
            continue
    return X, y

def build_model():
    return Pipeline([
        ("tfidf", TfidfVectorizer(
            analyzer="char_wb",
            ngram_range=(3,5),
            min_df=5,
            max_features=300_000,
            sublinear_tf=True
        )),
        ("clf", LogisticRegression(
            multi_class="multinomial",
            class_weight="balanced",
            max_iter=400,
            C=2.0,
            solver="lbfgs"
        ))
    ])

def map_vec(labels):
    return np.asarray([EMOTION_VEC.get(l, EMO_DEFAULT) for l in labels], dtype=float)

def IPT2NTL(IPT):
    data_path = pathlib.Path(DATA_PATH)
    out_dir = pathlib.Path(OUT_DIR); out_dir.mkdir(parents=True, exist_ok=True)

    if data_path.exists():
        items = load_json_any(data_path); used_path = str(data_path)
    else:
        items = SAMPLE_JSON; used_path = "(embedded SAMPLE_JSON)"

    X, y = flatten_for_classification(items, min_char=3, use_hs=True, use_ss=USE_SS)
    if not X:
        print("No samples parsed. Check DATA_PATH or format.", file=sys.stderr); sys.exit(2)

    # CHANGED: 모델/리포트 경로 미리 정의 + 로드 시도
    model_path = out_dir/"emovec_autorun_cls.pkl"                 # CHANGED
    report_path = out_dir/"emovec_autorun_cls_report.json"        # CHANGED
    if model_path.exists():                                       # CHANGED
        pipe = load(model_path)                                   # CHANGED
        # 리포트가 있으면 읽고, 없으면 최소 정보만 채움
        try:                                                      # CHANGED
            report = json.loads(report_path.read_text(encoding="utf-8"))  # CHANGED
        except Exception:                                         # CHANGED
            report = {                                            # CHANGED
                "mode": "classification->vector_map",             # CHANGED
                "data_used": used_path,                           # CHANGED
                "use_ss": USE_SS,                                 # CHANGED
                "note": "loaded pre-trained model"                # CHANGED
            }                                                     # CHANGED
    else:
        # (기존 학습/평가/저장 블록 유지)
        Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
        pipe = build_model()
        pipe.fit(Xtr, ytr)
        pred_lbl = pipe.predict(Xte)

        acc = accuracy_score(yte, pred_lbl)
        f1m = f1_score(yte, pred_lbl, average="macro")
        mae_vec = mean_absolute_error(map_vec(yte), map_vec(pred_lbl))

        dump(pipe, model_path)  # (기존) 모델 저장
        report = {
            "mode": "classification->vector_map",
            "data_used": used_path,
            "use_ss": USE_SS,
            "n_train": len(Xtr), "n_test": len(Xte),
            "acc": float(acc), "f1_macro": float(f1m),
            "mae_after_mapping": float(mae_vec),
            "keys": KEYS
        }
        report_path.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8")  # (기존) 리포트 저장

    # (기존) 데모 예측/결과 구성 — 그대로 둠
    demo_lbl = pipe.predict([IPT])
    demo_vec = map_vec(demo_lbl).tolist()
    demo = [{"text": t, "label": l, "vector": v, "keys": KEYS} for t, l, v in zip(TEST_TEXTS, demo_lbl, demo_vec)]

    result = [v for v in demo_vec]

    print(json.dumps({
        "saved_model": str(model_path),
        "report_path": str(report_path),
        "report": report,
        "demo_predictions": demo
    }, ensure_ascii=False, indent=2))

    return result[0]


if __name__ == "__main__":
    print(IPT2NTL('일이 왜 이렇게 끝이 없지? 화나.'))


{
  "saved_model": "emovec_autorun_cls_out\\emovec_autorun_cls.pkl",
  "report_path": "emovec_autorun_cls_out\\emovec_autorun_cls_report.json",
  "report": {
    "mode": "classification->vector_map",
    "data_used": "C:\\Users\\Admin\\Documents\\GitHub\\emotionAI\\감성대화말뭉치(최종데이터)_Training.json",
    "use_ss": false,
    "n_train": 116759,
    "n_test": 29190,
    "acc": 0.21531346351490235,
    "f1_macro": 0.2148529140589591,
    "mae_after_mapping": 0.09443936279547083,
    "keys": [
      "dopamine",
      "serotonin",
      "norepinephrine",
      "melatonin"
    ]
  },
  "demo_predictions": [
    {
      "text": "일이 왜 이렇게 끝이 없지? 화나.",
      "label": "E10",
      "vector": [
        0.32,
        0.28,
        0.7,
        0.15
      ],
      "keys": [
        "dopamine",
        "serotonin",
        "norepinephrine",
        "melatonin"
      ]
    }
  ]
}
[0.32, 0.28, 0.7, 0.15]


In [18]:
# 감정 저장소
class emotion_storage():
    def __init__(self):
        self.NTL_bef = [0] # 이전 상태의 NTL
        self.K = []
    
    def NTL2NTLandK(self, NTL):
        K = -70 # 초기 전위
        w = 0.1 # 이전 NTL 적용 가중치(10%)
        if len(self.K) != 0:
            for i, j in enumerate(self.K): # 시간 감쇠를 적용한 K 업데이트
                K += i**(1/j)
        NTL_bef_w = [i*w for i in self.NTL_bef]
        NTL_final = [NTL[i] + NTL_bef_w[i] for i in range(4)]
        return NTL_final, K
    
    def NTL_K_hist(self, NTL, K): # 마지막 모델에서 계산한 NTL, K를 현재 스텝의 최종 NTL, K로 기록
        self.NTL_bef = NTL
        self.K.append(K)

In [None]:
# 뉴런 정의
K_remember = 100 # 기억 막전위

class neuron():
    def __init__(self, K_critical, code, type): # 노드 고유 번호(code)
        self.K_critical = K_critical # 임계 막전위(활성화를 위한)
        self.saved_IPT = [None]
        self.code = code
        self.connected_node = []
        self.connected_node_code = []
        self.w_bef = 0
        self.w_curr = 0
        self.K_out = []
        self.NTL_out = []
        self.IPT_curr = ''

        self.type = type # types: exciting, inhibiting, modulating

        self.K_final = 0
        self.NTL_final = 0

    def do_work(self, NTL, K):
        #w 업데이트 시에는 self.w_curr을 업데이트 하기!
        # 할일 code needed
        return NTL, K
    
    def save(self, IPT):
        self.saved_IPT.append(IPT)
        self.IPT_curr = IPT

    def make_new_connection(self, node): # 노드와 연결 함수
        self.connected_node.append(node)
        node.connected_node.append(self)

        self.connected_node_code.append(node.code)
        node.connected_node_code.append(self.code)
        return node.code

    def main(self, NTL, K, IPT, node):
        if K > self.K_critical:
            NTL_out, K_out = self.do_work(NTL, K)
            self.NTL_out.append(NTL_out)
        if node is not None:
            self.K_out.append(K_out * getattr(node, "w_bef", 1.0))  # 기본 1.0
        else:
            self.K_out.append(K_out)

        if K > K_remember: # IPT 기억 조건
            self.save(IPT)
            print('[LOG] IPT Saved')

        if self.w_curr - self.w_bef > 0: # 가중치가 커지면 그만크 연결 노드 수 증가
            for _ in range(int(self.w_curr - self.w_bef)):
                available_node = list(set(node_codes + self.connected_node_code))
                if not available_node:
                    return  # 또는 continue
                self.make_new_connection(nodes[available_node[0]-1])

        K_final = sum(K_out) if self.K_out else 0
        NTL_final = []
        for i in range(4):
            k=0
            for j in NTL_out:
                k += j[i]
            NTL_final.append(k/len(NTL_out) if self.K_out/ else 0)

        self.NTL_final = NTL_final
        self.K_final = K_final
        self.update_nodes()
        return
    
    def update_nodes(self):
        a_kept = self.saved_IPT.copy()
        if self.IPT_curr in a_kept:
            idx = a_kept.index(self.IPT_curr)
            IPT_out = a_kept[:idx] + a_kept[idx+1:]
        else:
            IPT_out = [x for x in a_kept if x is not None]

        if not IPT_out:
            return  # 전파할 IPT가 없으면 종료

        for i in self.connected_node:
            i.main(self.NTL_final, self.K_final, IPT_out[-1], self)


In [20]:
# nn 정의
w_exciting = 0.33
w_inhibiting = 0.33
w_modulating = 0.34
node_count = 100

# 1) 비율 정리(혹시 합이 1이 아닐 수도 있으니 정규화)
weights = [
    ('exciting',  w_exciting),
    ('inhibiting', w_inhibiting),
    ('modulating', w_modulating),
]
total_w = sum(w for _, w in weights)
weights = [(t, (w / total_w) if total_w else 0.0) for t, w in weights]

# 2) 바닥 할당 + 나머지를 소수점 큰 순서대로 분배
raw = [node_count * w for _, w in weights]
base = [int(x) for x in raw]
remainder = node_count - sum(base)

# 소수 부분 큰 순서대로 remainder 만큼 1씩 추가
frac_idx = sorted(range(len(raw)), key=lambda i: (raw[i] - base[i]), reverse=True)
for k in range(remainder):
    base[frac_idx[k]] += 1

# 3) 노드 생성 (code 유니크하게 증가)
nodes = []
node_codes = []
code = 0
for (t, _), cnt in zip(weights, base):
    for _ in range(cnt):
        n = neuron(+30, code, t)   # 네 클래스명 그대로 사용
        nodes.append(n)
        node_codes.append(code)
        code += 1

node_count = len(nodes)  # 실제 생성된 수로 갱신


**FLOW**

In [17]:
# class들 호출
emotion_storage = emotion_storage()

NameError: name 'emotion_storage' is not defined

In [None]:
# 데이터 박스 구성
data_box = dict()

In [None]:
# INPUT
data_box['IPT'] = 'I hate you'

In [None]:
# 감정 저장소
data_box['NTL'] = IPT2NTL(data_box['IPT'])
data_box['NTL'], data_box['K'] = emotion_storage.NTL2NTLandK(data_box['NTL'])

In [None]:
# 초기 뉴런 자극
import random

class dummy_node():
    def __init__(self):
        self.code = 0

dummy_node = dummy_node()

start_neuron = nodes[random.randrange(1, node_count+1)+1]
start_neuron.main(data_box['NTL'], data_box['K'], data_box['IPT'], dummy_node)
