In [16]:
# 설치 (가벼운 조합: openai, gradio, plotly만)
!pip -q install --upgrade --force-reinstall "httpx==0.27.2" "openai==1.51.0"
!pip -q install --upgrade "gradio==4.44.0" "plotly==5.24.1"

# 이미 로드된 모듈 제거(Colab 잔존 모듈 충돌 방지)
import sys
for m in ["httpx", "openai"]:
    if m in sys.modules:
        del sys.modules[m]

import httpx, openai
print("httpx:", httpx.__version__)
print("openai:", openai.__version__)
print("✅ install ok")


[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
dataproc-spark-connect 0.8.3 requires websockets>=14.0, but you have websockets 12.0 which is incompatible.
firebase-admin 6.9.0 requires httpx[http2]==0.28.1, but you have httpx 0.27.2 which is incompatible.
google-adk 1.11.0 requires websockets<16.0.0,>=15.0.1, but you have websockets 12.0 which is incompatible.
google-genai 1.30.0 requires httpx<1.0.0,>=0.28.1, but you have httpx 0.27.2 which is incompatible.
google-genai 1.30.0 requires websockets<15.1.0,>=13.0.0, but you have websockets 12.0 which is incompatible.[0m[31m
[0mhttpx: 0.28.1
openai: 1.51.0
✅ install ok


In [17]:
# 기능 중심 유틸: 공통 상수/간단 함수
import os, re, time, socket, textwrap, traceback
from dataclasses import dataclass
from typing import Optional, List

import numpy as np
import pandas as pd
import plotly.graph_objects as go

# 긴 응답을 세그먼트로 이어붙이는 파라미터(사실상 길이 제한 완화)
LONG_SEGMENT_TOKENS = 1500
LONG_MAX_SEGMENTS   = 12

def md(s: str) -> str:
    return textwrap.dedent(s).strip()

def find_free_port(start=7860, end=7950) -> int:
    for p in range(start, end+1):
        try:
            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
                s.settimeout(0.1)
                if s.connect_ex(("127.0.0.1", p)) != 0:
                    return p
        except:
            pass
    raise OSError("No free port")


In [18]:
# 생성은 OpenAI API만 사용. httpx Client를 직접 주입해서 proxies 충돌 경로 차단.
import httpx
from openai import OpenAI

_HTTPX = httpx.Client(timeout=httpx.Timeout(60.0, connect=30.0, read=60.0))

def _openai_client() -> OpenAI:
    if not os.environ.get("OPENAI_API_KEY"):
        raise RuntimeError("OPENAI_API_KEY가 없습니다. 상단 UI에서 저장하세요.")
    # 핵심: http_client=_HTTPX 주입
    return OpenAI(http_client=_HTTPX)

def _openai_chat(system, user, model, temperature, top_p, max_tokens):
    client = _openai_client()
    resp = client.chat.completions.create(
        model=model,
        messages=[{"role":"system","content":system},{"role":"user","content":user}],
        temperature=float(temperature), top_p=float(top_p), max_tokens=int(max_tokens),
    )
    return (resp.choices[0].message.content or "").strip()

def llm_once(system, user, model_id, temperature, top_p, max_tokens):
    return _openai_chat(system, user, model_id, temperature, top_p, max_tokens)

def llm_long(system, user, model_id, temperature=0.7, top_p=0.9,
             segment_tokens=LONG_SEGMENT_TOKENS, max_segments=LONG_MAX_SEGMENTS, stop_marker="[END]"):
    out = []
    prompt = user + "\n\n(문서가 길면 여러 번 나눠 작성. 완전히 끝나면 마지막 줄에 [END])"
    for _ in range(max_segments):
        seg = llm_once(system, prompt, model_id, temperature, top_p, segment_tokens)
        seg = (seg or "").strip()
        out.append(seg)
        if stop_marker in seg:
            break
        prompt = "이어서 계속. 반복 없이 자연스럽게 연결. 끝나면 [END]."
    return "\n".join(out).replace(stop_marker, "").strip()

print("✅ LLM 엔진 준비 완료 (OpenAI + httpx 주입)")


✅ LLM 엔진 준비 완료 (OpenAI + httpx 주입)


In [19]:
# A) 순정 GPT — 문단형(비교 기준)
BASELINE_SYSTEM = """You are a helpful assistant.
Write the answer in Korean as plain paragraphs only.
Do NOT use headings, lists, tables, math, code blocks, KPIs, or IF–THEN rules.
No hard limit on length; write comprehensively.
"""
BASELINE_USER_TPL = "Question:\n{q}\n\n(Write only plain paragraphs. No explicit structure.)"

# B) 들뢰즈 오퍼레이터 — 6개 섹션 강제 + 표/수식 포함
DELEUZE_SYSTEM = md("""
You are a Deleuzian policy designer.
Always output ALL the following SECTIONS in Korean with the exact headings:

### 문제-이데아
- 목표, 제약, 결정변수, 불확실성(특이점)을 5~9줄

### 프론티어(표)
- 보수/균형/진보 3대안 표 1개 (열: 대안, 핵심수단, 기대효과, Trade-off, 재정설명)
- Markdown 표로 작성

### 대안 상세(3개)
- 각 대안을 6~10줄: 핵심 레버 3개, 기대효과, 위험/완화, 실행 로드맵

### 스위칭 룰(IF–THEN)
- 3~7개 규칙, 각 1줄. 예) IF PG>25% THEN H+=10 AND clawback-=5

### KPI/피드백
- KPI 3~6개(정의+측정), 분기별 조정 의사코드 5~9줄 (```text 블록 사용)

### 수식/제약
- 반드시 ```math 블록
- B = E_eff + ΔVAT(τ_vat) + ΔTop(τ_top) + clawback(T) - {Cash(H) + W_sub(W) + Training + Admin}
- 제약: B≥0, Admin≤2%, EMTR≤τ*
""")

DELEUZE_USER_TPL = md("""
질문:
{q}

근거 텍스트(선택):
{ctx}

지시:
- 위 6개 섹션을 반드시 모두 출력한다.
- 프론티어는 마크다운 표, 수식은 ```math 블록으로 작성한다.
- [END]는 쓰지 않는다.
""")

REQ_HEADS = ["### 문제-이데아","### 프론티어(표)","### 대안 상세(3개)","### 스위칭 룰","### KPI/피드백","### 수식/제약"]

def is_deleuze_structured(text: str) -> bool:
    has_sections = all(h in text for h in REQ_HEADS)
    has_table    = ("|" in text and "---" in text)
    has_math     = "```math" in text
    return has_sections and has_table and has_math

def repair_deleuze(text, q, ctx, model, temperature, top_p):
    if is_deleuze_structured(text):
        return text
    missing = [h for h in REQ_HEADS if h not in text]
    sys = "You are a strict editor. Fill ONLY the missing sections exactly as specified."
    usr = md(f"""
누락된 섹션: {", ".join(missing)}
질문: {q}
근거: {ctx or "없음"}

[초안 시작]
{text}
[초안 끝]

규칙:
- 기존 텍스트는 수정하지 말고, 누락된 섹션만 추가.
- 프론티어는 마크다운 표, 수식은 ```math 블록.
""")
    fixed = llm_long(sys, usr, model, temperature, top_p, segment_tokens=1200, max_segments=4)
    return fixed if is_deleuze_structured(fixed) else text


In [20]:
# Markdown 표 → DataFrame
def parse_markdown_table(md_text: str) -> Optional[pd.DataFrame]:
    lines = [ln.rstrip() for ln in md_text.splitlines()]
    for i, ln in enumerate(lines):
        if "|" in ln and re.search(r"\|\s*-+\s*\|", ln):
            header_idx = i-1 if i>0 else None
            if header_idx is None:
                continue
            block = []
            j = header_idx
            while j < len(lines) and "|" in lines[j]:
                block.append(lines[j]); j += 1
            hdr = [c.strip() for c in block[0].strip("|").split("|")]
            body = []
            for row in block[2:]:
                cols = [c.strip() for c in row.strip("|").split("|")]
                if len(cols) == len(hdr):
                    body.append(cols)
            if body:
                return pd.DataFrame(body, columns=hdr)
    return None

# 옵션 라벨 표준화
def _normalize_option_names(series: pd.Series) -> pd.Series:
    def map_name(s: str) -> str:
        t = str(s); tl = t.lower()
        if any(k in t for k in ["보수","보수형"]) or "conservative" in tl: return "보수"
        if any(k in t for k in ["균형","중도"])   or "balanced" in tl or "moderate" in tl: return "균형"
        if any(k in t for k in ["진보","공격","확대","강화"]) or "progressive" in tl or "aggressive" in tl: return "진보"
        return t
    return series.apply(map_name)

# 텍스트 키워드 기반 점수
_FISCAL_POS = ["세수","세입","흑자","절감","지출 효율","클로백","재원","기금","균형재정","건전성"]
_FISCAL_NEG = ["적자","지출 확대","보조금","부채","감세","재정 악화","적자전환","재정 부담"]
_EFFECT_POS = ["효율","효과","성과","감축","달성","목표 경로","가격신호","타게팅","지표","모니터링","피드백","정확"]
_EFFECT_NEG = ["의존","지연","부작용","비효율","왜곡","과소지원","집행 지체"]
_NUM_PAT  = re.compile(r"(\d+(?:\.\d+)?)\s*(%|톤|MW|GW|조원|억원|만원|배|p|퍼센트)")

def _keyword_score(text: str, pos_words: List[str], neg_words: List[str]) -> float:
    s = str(text)
    pos = sum(1 for w in pos_words if w in s)
    neg = sum(1 for w in neg_words if w in s)
    return float(pos - neg)

def _magnitude_boost(text: str) -> float:
    matches = list(_NUM_PAT.finditer(str(text)))
    return min(5.0, 0.8 * len(matches))

def _normalize_minmax(arr: np.ndarray) -> np.ndarray:
    lo, hi = float(np.min(arr)), float(np.max(arr))
    if hi - lo < 1e-8:
        return np.full_like(arr, 50.0)
    return 100.0 * (arr - lo) / (hi - lo)

def frontier_to_numeric(df: pd.DataFrame) -> pd.DataFrame:
    df2 = df.copy()
    ren = {}
    for c in df2.columns:
        if "대안" in c: ren[c] = "대안"
        if "핵심" in c: ren[c] = "핵심수단"
        if "기대효" in c: ren[c] = "기대효과"
        if "Trade" in c or "trade" in c.lower(): ren[c] = "Trade-off"
        if any(k in c for k in ["예산","B","재정"]): ren[c] = "재정설명"
    df2.rename(columns=ren, inplace=True)
    if "대안" not in df2.columns:
        df2.rename(columns={df2.columns[0]:"대안"}, inplace=True)

    df2["대안"] = _normalize_option_names(df2["대안"])

    def row_text(row):
        cols = []
        for k in ["대안","핵심수단","기대효과","Trade-off","재정설명"]:
            if k in df2.columns: cols.append(str(row.get(k,"")))
        return " ".join(cols)

    fiscal_raw, effect_raw = [], []
    for _, row in df2.iterrows():
        txt = row_text(row)
        f = _keyword_score(txt, _FISCAL_POS, _FISCAL_NEG) + _magnitude_boost(txt)
        e = _keyword_score(txt, _EFFECT_POS, _EFFECT_NEG) + _magnitude_boost(txt)
        fiscal_raw.append(f); effect_raw.append(e)

    fiscal_arr = _normalize_minmax(np.array(fiscal_raw, dtype=float))
    effect_arr = _normalize_minmax(np.array(effect_raw, dtype=float))

    df2["재정 기여 지수(텍스트 기반)"] = np.round(fiscal_arr, 1)
    df2["성과 지수(텍스트 기반)"]   = np.round(effect_arr, 1)

    # 파라미터 예시(핵심수단 텍스트 길이에 비례)
    if "핵심수단" in df2.columns:
        ln = df2["핵심수단"].astype(str).str.len()
        df2["param_세율/가격신호"]   = (ln/100).clip(0, 1.0).round(2)
        df2["param_정밀환급/클로백"] = (ln/140).clip(0, 1.2).round(2)
        df2["param_훈련/전환지원"]   = (ln/60).clip(0, 6).round(2)
    else:
        n = len(df2)
        df2["param_세율/가격신호"]   = np.linspace(0.2, 0.8, n).round(2)
        df2["param_정밀환급/클로백"] = np.linspace(0.3, 0.9, n).round(2)
        df2["param_훈련/전환지원"]   = np.linspace(2, 6,   n).round(2)

    # 종합점수(랭킹용): 성과(0.6)+재정(0.4)
    df2["종합점수"] = np.round(0.6*df2["성과 지수(텍스트 기반)"] + 0.4*df2["재정 기여 지수(텍스트 기반)"], 1)
    return df2

def build_plots(df_num: pd.DataFrame):
    xname, yname = "성과 지수(텍스트 기반)", "재정 기여 지수(텍스트 기반)"

    fig1 = go.Figure()
    fig1.add_trace(go.Scatter(
        x=df_num[xname], y=df_num[yname],
        mode="markers+text",
        text=df_num["대안"].astype(str),
        textposition="top center",
        name="정책안"
    ))
    fig1.update_layout(
        title="프론티어 — 성과 vs 재정(표 텍스트 근거 정규화)",
        xaxis_title=f"{xname} (↑좋음)", yaxis_title=f"{yname} (↑재정건전성 기여)",
        template="plotly_white", height=360
    )

    fig2 = go.Figure()
    param_cols = [c for c in df_num.columns if c.startswith("param_")]
    for col in param_cols:
        fig2.add_trace(go.Bar(x=df_num["대안"], y=df_num[col], name=col.replace("param_","")))
    fig2.update_layout(title="핵심 파라미터 비교(텍스트 유도)", barmode="group",
                       template="plotly_white", height=360)

    t = np.arange(1, 7)
    base_eff = float(np.nanmean(df_num[xname].values)) if len(df_num) else 50.0
    base_fsc = float(np.nanmean(df_num[yname].values)) if len(df_num) else 50.0
    pg = (28 - 0.10*(base_eff-50) - 1.0*(t-1) + np.random.randn(len(t))*0.25).clip(12,30)
    B  = (-0.2 + 0.02*(base_fsc-50) + 0.4*np.sin(t/1.2) + np.random.randn(len(t))*0.1).round(2)

    fig3 = go.Figure()
    fig3.add_trace(go.Scatter(x=t, y=pg, mode="lines+markers", name="성과계열 KPI 예시"))
    fig3.add_trace(go.Scatter(x=t, y=B,  mode="lines+markers", name="재정계열 KPI 예시"))
    fig3.update_layout(title="KPI 시뮬레이션(표 텍스트 기저선)", xaxis_title="기간",
                       template="plotly_white", height=360)
    return fig1, fig2, fig3


In [21]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def structure_score(text: str) -> float:
    h = sum(1 for s in REQ_HEADS if s in text) / len(REQ_HEADS)
    has_table = 1.0 if ("|" in text and "---" in text) else 0.0
    has_math  = 1.0 if "```math" in text else 0.0
    return round(0.6*h + 0.2*has_table + 0.2*has_math, 3)

def novelty_score(candidate: str, baseline: str) -> float:
    vec = TfidfVectorizer(min_df=1, max_df=0.95, ngram_range=(1,2))
    X = vec.fit_transform([candidate, baseline])
    sim = float(cosine_similarity(X[0], X[1])[0,0])
    return round(max(0.0, 1.0 - sim), 3)

def length_score(text: str) -> float:
    n = len(text)
    if n < 400:  return 0.4
    if n > 8000: return 0.7
    x = (min(2500, n) - 400) / (2500-400)
    return round(0.6 + 0.4*max(0.0, min(1.0, x)), 3)

@dataclass
class RunConfig:
    model_id: str
    temperature: float
    top_p: float

def baseline_answer(q, model, temperature, top_p) -> str:
    sys = BASELINE_SYSTEM
    usr = BASELINE_USER_TPL.format(q=q)
    return llm_long(sys, usr, model, temperature, top_p,
                    segment_tokens=LONG_SEGMENT_TOKENS, max_segments=LONG_MAX_SEGMENTS)

def deleuze_answer(q, ctx, model, temperature, top_p) -> str:
    sys = DELEUZE_SYSTEM
    usr = DELEUZE_USER_TPL.format(q=q, ctx=ctx or "없음")
    return llm_long(sys, usr, model, temperature, top_p,
                    segment_tokens=LONG_SEGMENT_TOKENS, max_segments=LONG_MAX_SEGMENTS)

def run_pipeline(q: str, ctx: str, cfg: RunConfig):
    t0 = time.time()

    # A) 순정 GPT (문단형)
    baseline = baseline_answer(q, cfg.model_id, cfg.temperature, cfg.top_p)

    # B) 들뢰즈 오퍼레이터(6섹션 강제) + 누락 보정
    cand = deleuze_answer(q, ctx, cfg.model_id, cfg.temperature, cfg.top_p)
    cand = repair_deleuze(cand, q, ctx, cfg.model_id, cfg.temperature, cfg.top_p)

    # 프론티어 표 파싱 → 텍스트 지수화 → 그래프
    df_front = parse_markdown_table(cand)
    auto_ok = df_front is not None
    if not auto_ok:
        # 표가 없을 경우 최소 폴백(임의 수치 없이 텍스트 구조만)
        df_front = pd.DataFrame({
            "대안":     ["보수","균형","진보"],
            "핵심수단": ["세율 완만·정밀 환급·집행 효율", "가격신호+환급 균형·중간 강도", "강한 가격신호·대규모 전환지원"],
            "기대효과": ["점진 개선", "목표 경로 근접", "고강도 성과"],
            "Trade-off":["속도·불평등 리스크 낮음", "중간 부담/성과", "재정·수용성 부담"],
            "재정설명":["재정건전성 우선", "균형적 재원 구성", "전환지원 중심 재원"]
        })
    df_num = frontier_to_numeric(df_front)
    fig1, fig2, fig3 = build_plots(df_num)

    # 랭킹 테이블
    rank_df = df_num[["대안","성과 지수(텍스트 기반)","재정 기여 지수(텍스트 기반)","종합점수"]].copy()
    rank_df = rank_df.sort_values("종합점수", ascending=False).reset_index(drop=True)
    rank_df.index = rank_df.index + 1
    rank_df = rank_df.rename_axis("rank").reset_index()

    # 간단 점수(전부 오픈소스)
    s_struct = structure_score(cand)
    s_novel  = novelty_score(cand, baseline)
    s_len    = length_score(cand)
    final_score = round(0.55*s_struct + 0.35*s_novel + 0.10*s_len, 3)

    meta = {
        "runtime_sec": round(time.time()-t0,2),
        "frontier_auto": bool(auto_ok),
        "oss_modules": ["gradio","plotly","pandas","scikit-learn","regex"],
        "scores": {"structure": s_struct, "novelty": s_novel, "length": s_len, "final": final_score}
    }
    return baseline, cand, fig1, fig2, fig3, rank_df, meta


In [22]:
import gradio as gr

try:
    gr.close_all()
except:
    pass

PORT = find_free_port()

with gr.Blocks(title="A: 순정 GPT vs B: GPT+들뢰즈 — 구조·프론티어·스위칭·KPI") as demo:
    gr.Markdown(md("""
    ## 🔍 비교 데모 — **A: 순정 GPT(문단)** vs **B: GPT+들뢰즈(표·수식·그래프·랭킹)**
    - 생성은 OpenAI API. 표 파싱/지수화/그래프/랭킹/평가는 **모두 오픈소스**로 수행합니다.
    - 프론티어 축(성과/재정)은 **표의 텍스트 근거(키워드·숫자)** 를 정규화해 산출합니다(임의 수치 사용 없음).
    """))

    with gr.Accordion("🔑 OpenAI API 키", open=True):
        key = gr.Textbox(type="password", placeholder="sk-...")
        out = gr.Markdown()
        def set_key(k):
            os.environ["OPENAI_API_KEY"]=k.strip()
            return "✅ 저장 완료"
        gr.Button("키 저장").click(set_key, inputs=[key], outputs=[out])

    with gr.Row():
        with gr.Column(scale=1):
            q   = gr.Textbox(label="질문", lines=6,
                             placeholder="예) 탄소세와 전환지원으로 2030년 –40% 감축 설계…")
            ctx = gr.Textbox(label="근거 텍스트(선택)", lines=6,
                             placeholder="프론티어 표 컬럼, KPI, 임계치(θ), 조정폭(δ) 등 힌트를 넣으면 구조가 더 정밀해집니다.")
            with gr.Accordion("고급설정", open=False):
                model = gr.Textbox(value="gpt-4o-mini", label="OpenAI Model ID")
                temp  = gr.Slider(0.0, 1.2, value=0.7, step=0.05, label="temperature")
                top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="top_p")
            run_btn = gr.Button("실행", variant="primary")
            meta_box = gr.Markdown()

    with gr.Row():
        with gr.Column(scale=1):
            base_box = gr.Markdown()  # 문단형 가독성 위해 Markdown 사용
        with gr.Column(scale=2):
            deleuze_box = gr.Markdown()  # 표/수식 렌더링을 위해 Markdown
            with gr.Row():
                fig1 = gr.Plot(label="프론티어 — 성과 vs 재정(표 텍스트 근거)")
                fig2 = gr.Plot(label="핵심 파라미터 비교(텍스트 유도)")
            fig3 = gr.Plot(label="KPI 시뮬레이션(표 텍스트 기저선)")
            rank_tbl = gr.Dataframe(label="대안 랭킹(텍스트 기반 지수)", wrap=True, interactive=False)

    def on_run(question, context, model_id, temperature, top_p):
        cfg = RunConfig(model_id=model_id, temperature=float(temperature), top_p=float(top_p))
        try:
            b, d, f1, f2, f3, rank_df, meta = run_pipeline(question, context, cfg)
            sc = meta["scores"]
            chip = (
                f"**Runtime**: {meta['runtime_sec']}s · "
                f"**Frontier**: {'표 자동파싱✅' if meta['frontier_auto'] else '표 부재→텍스트 폴백⚠️'}  \n"
                f"**축 정의**: 성과/재정 지수는 **표 텍스트 근거**로 정규화 산출  \n"
                f"**OSS**: gradio, plotly, pandas, scikit-learn, regex  \n"
                f"**(참고)** 구조 {sc['structure']}, 창의 {sc['novelty']}, 길이 {sc['length']}, 최종 {sc['final']}"
            )
            # A/B를 Markdown으로 노출 (A는 문단, B는 섹션/표/수식)
            base_md    = md(b)
            deleuze_md = md(d)
            return base_md, deleuze_md, f1, f2, f3, rank_df, chip
        except Exception as e:
            tb = "```\n"+traceback.format_exc()+"\n```"
            return "❌ 오류", f"❌ 오류: {e}\n{tb}", go.Figure(), go.Figure(), go.Figure(), pd.DataFrame(), ""

    run_btn.click(on_run, inputs=[q, ctx, model, temp, top_p],
                  outputs=[base_box, deleuze_box, fig1, fig2, fig3, rank_tbl, meta_box])

demo.queue(max_size=32).launch(share=True, server_name="0.0.0.0", server_port=PORT, show_error=True)
print("🔗 Port:", PORT)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()



IMPORTANT: You are using gradio version 4.44.0, however version 4.44.1 is available, please upgrade. 
--------



Running on public URL: https://e5ed36aa772b4aa1d8.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


🔗 Port: 7862
