In [2]:
import os, json, mimetypes, argparse
import cv2, numpy as np
from PIL import Image, ImageOps
from google import genai
from google.genai import types
from typing import Dict, Tuple, Union
from copy import deepcopy


TARGET_SHORT = 768

def _load_exif_bgr(path: str):
    pil = Image.open(path)
    pil = ImageOps.exif_transpose(pil).convert("RGB")
    return cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR)

def _resize_short(bgr, short=TARGET_SHORT):
    h, w = bgr.shape[:2]
    s = min(h, w)
    if s == short: return bgr
    scale = short / float(s)
    new = (int(round(w * scale)), int(round(h * scale)))
    interp = cv2.INTER_AREA if scale < 1.0 else cv2.INTER_CUBIC
    return cv2.resize(bgr, new, interpolation=interp)

def _wb_grayworld(bgr, strength=0.5):
    x = bgr.astype(np.float32)
    means = x.reshape(-1,3).mean(0) + 1e-6
    g = means.mean()
    gains = np.clip(g/means, 0.8, 1.2)
    gains = (1-strength)*1.0 + strength*gains
    x *= gains
    return np.clip(x, 0, 255).astype(np.uint8)

def _clahe_light(bgr, clip=1.8, tiles=8):
    lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    l = cv2.createCLAHE(clipLimit=clip, tileGridSize=(tiles, tiles)).apply(l)
    return cv2.cvtColor(cv2.merge((l,a,b)), cv2.COLOR_LAB2BGR)

def _skin_mask(bgr):
    ycrcb = cv2.cvtColor(bgr, cv2.COLOR_BGR2YCrCb)
    Y, Cr, Cb = cv2.split(ycrcb)
    m1 = (Cr >= 135) & (Cr <= 180) & (Cb >= 85) & (Cb <= 135) & (Y >= 40) & (Y <= 240)
    hsv = cv2.cvtColor(bgr, cv2.COLOR_BGR2HSV)
    H, S, V = cv2.split(hsv)
    m2 = (H <= 25) & (S >= 30) & (S <= 180) & (V >= 60)
    m = (m1 & m2).astype(np.uint8) * 255
    k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5))
    m = cv2.morphologyEx(m, cv2.MORPH_OPEN,  k, iterations=1)
    m = cv2.morphologyEx(m, cv2.MORPH_CLOSE, k, iterations=2)
    num, labels, stats, _ = cv2.connectedComponentsWithStats(m, connectivity=8)
    if num > 1:
        largest = 1 + np.argmax(stats[1:, cv2.CC_STAT_AREA])
        m2 = np.zeros_like(m)
        m2[labels == largest] = 255
        m = m2
    return m

def preprocess_with_mask(bgr, bg_gray=220):
    bgr = _resize_short(bgr)
    bgr = _wb_grayworld(bgr, 0.5)
    bgr = _clahe_light(bgr, 1.8, 8)
    mask = _skin_mask(bgr)
    bg = np.full_like(bgr, (bg_gray, bg_gray, bg_gray), np.uint8)
    out = np.where(mask[..., None] == 255, bgr, bg)
    return out


MODEL = "gemini-2.0-flash"
PROMPT_TEXT = (
    "전처리된 얼굴 피부 이미지를 분석하여 JSON만 반환하라.\n"
    "평가 항목: acne(여드름), redness(홍조), melasma_darkspots(잡티).\n"
    "각 항목은 다음 스키마로 제공하라:\n"
    "{acne:{score:number,reason:string}, redness:{score:number,reason:string}, "
    "melasma_darkspots:{score:number,reason:string}}\n"
    "score는 0~100 범위의 실수이며, 0은 없음·매우 양호, 100은 매우 심함을 의미한다."
)

def analyze_with_gemini(image_path, api_key):
    client = genai.Client(api_key=api_key)
    mime = mimetypes.guess_type(image_path)[0] or "image/jpeg"
    with open(image_path, "rb") as f: img_bytes = f.read()
    content = types.Content(
        role="user",
        parts=[
            types.Part(text=PROMPT_TEXT),
            types.Part(inline_data=types.Blob(mime_type=mime, data=img_bytes)),
        ],
    )
    resp = client.models.generate_content(
        model=MODEL,
        contents=[content],
        config=types.GenerateContentConfig(
            response_mime_type="application/json",
            temperature=0.2,
            system_instruction="너는 피부 분석 전문가다. 반드시 JSON만 반환하라."
        ),
    )
    txt = (resp.text or "").strip()
    start, end = txt.find("{"), txt.rfind("}")
    return json.loads(txt[start:end+1]) if start != -1 else {}


MAP = {
    "q1": {"없어요":0,"T존 일부(이마 혹은 코)":1,"T존 전체(이마와 코)":2,"얼굴 전체":3},
    "q2": {"전혀 안 보여요":0,"지금은 없지만 가끔 보여요":1,"부분적으로 붉게 보여요":2,"전체적으로 붉게 보여요":3},
    "q3": {"없어요":0,"U존 일부(볼 혹은 턱)":1,"U존 전체(볼과 턱)":2,"얼굴 전체":3},
    "q4": {"전혀 생기지 않아요":0,"표정을 지을 때만 생겨요":1,"표정 짓지 않아도 약간 있어요":2,"표정 짓지 않아도 많이 있어요":3},
    "q5": {"주름이 없어요":0,"잔주름이에요":1,"깊은 주름이에요":2,"잔주름과 깊은 주름 다 있어요":3},
    "q6": {"전혀 생기지 않아요":0,"미소 지을 때만 약간 생겨요":1,"미소 지을 때 진하게 생겨요":2,"미소 짓지 않아도 생겨요":3},
    "q7": {"전혀 안 보여요":0,"거의 안 보여요":1,"약간 눈에 띄어요":2,"곳곳에 많이 보여요":3},
    "q8": {"주름이 없어요":0,"잔주름이에요":1,"깊은 주름이에요":2,"잔주름과 깊은 주름 다 있어요":3},
    "q9": {"외출 전보다 윤기가 없어요":0,"외출 전과 변함이 없어요":1,"약간 번들거리고 윤기가 있어요":2,"많이 번들거리고 기름져요":3}
}

def _to_score(q, v): return MAP[q].get(v, 0)
def _to_0_3(x): return round(max(0, min(100, float(x))) / 100 * 3, 2)

def assess_skin_type(**a):
    s = {k:_to_score(k,v) for k,v in a.items()}
    oil = round(0.6*s["q1"] + 0.4*s["q9"],2)
    dry = float(s["q3"]); sens=float(s["q2"])
    wrinkle=round(0.4*s["q4"] + 0.6*((s["q5"]+s["q8"])/2),2)
    pigment=float(s["q7"])
    if oil>=2 and dry<=1: skin="지성"
    elif dry>=2 and oil<=1: skin="건성"
    elif oil>=2 and dry>=2: skin="복합성"
    else: skin="중성"
    return {"skin_type":skin,"indices":{"oil":oil,"dry":dry,"sensitivity":sens,"wrinkle":wrinkle,"pigment":pigment}}

def assess_with_gemini(survey, gemini):
    base=assess_skin_type(**survey)
    idx=base["indices"]; fused=idx.copy()
    acne,red,mel=[_to_0_3(gemini[k]["score"]) for k in ["acne","redness","melasma_darkspots"]]
    fused["sensitivity"]=round(0.4*idx["sensitivity"]+0.6*red,2)
    fused["pigment"]=round(0.3*idx["pigment"]+0.7*mel,2)
    fused["oil"]=round(0.7*idx["oil"]+0.3*acne,2)
    fused["dry"]=idx["dry"]; fused["wrinkle"]=idx["wrinkle"]
    if fused["oil"]>=2 and fused["dry"]<=1: skin="지성"
    elif fused["dry"]>=2 and fused["oil"]<=1: skin="건성"
    elif fused["oil"]>=2 and fused["dry"]>=2: skin="복합성"
    else: skin="중성"
    return {"skin_type":skin,"indices":fused,"vision_raw":gemini}

def run_full_pipeline(input_path, survey_json):
    original=_load_exif_bgr(input_path)
    processed=preprocess_with_mask(original)
    pre_path=input_path.replace(".jpeg","_Pre.jpg")
    cv2.imwrite(pre_path,processed)
    print(f"[INFO] 전처리 완료: {pre_path}")
    api_key=os.getenv("GEMINI_API_KEY")
    gemini=analyze_with_gemini(pre_path, api_key)
    with open(survey_json,"r",encoding="utf-8") as f: survey=json.load(f)
    fused=assess_with_gemini(survey, gemini)
    print("\n[Gemini 이미지 분석 결과]"); print(json.dumps(gemini,ensure_ascii=False,indent=2))
    print("\n[설문+이미지 융합 결과]"); print(json.dumps(fused,ensure_ascii=False,indent=2))

if __name__=="__main__":
    run_full_pipeline("0001_01_F.jpg","survey_example.json")


[INFO] 전처리 완료: 0001_01_F.jpg

[Gemini 이미지 분석 결과]
{
  "acne": {
    "score": 15.0,
    "reason": "Slight presence of small acne spots, mostly on the forehead and cheeks."
  },
  "redness": {
    "score": 30.0,
    "reason": "Mild redness observed, particularly around the nose and cheek areas."
  },
  "melasma_darkspots": {
    "score": 55.0,
    "reason": "Moderate presence of dark spots and melasma, especially on the cheeks and forehead."
  }
}

[설문+이미지 융합 결과]
{
  "skin_type": "중성",
  "indices": {
    "oil": 1.53,
    "dry": 1.0,
    "sensitivity": 1.34,
    "wrinkle": 1.0,
    "pigment": 1.75
  },
  "vision_raw": {
    "acne": {
      "score": 15.0,
      "reason": "Slight presence of small acne spots, mostly on the forehead and cheeks."
    },
    "redness": {
      "score": 30.0,
      "reason": "Mild redness observed, particularly around the nose and cheek areas."
    },
    "melasma_darkspots": {
      "score": 55.0,
      "reason": "Moderate presence of dark spots and melasma, es

In [7]:
import json, os, hashlib, re
from typing import List, Dict, Any, Iterable
from elasticsearch import Elasticsearch, helpers

def connect_es():
    return Elasticsearch("http://localhost:9201", request_timeout=20)

INDEX_NAME = "cosmetics_demo"

def load_json(path: str) -> List[Dict[str, Any]]:
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)

def normalize_product(doc: Dict[str, Any], category: str) -> Dict[str, Any]:
    name = doc.get("productName") or ""
    brand = doc.get("mallName") or ""
    price = doc.get("salePrice")
    price_disc = doc.get("discountedSalePrice")
    avg_score = doc.get("averageReviewScore")
    review_cnt = doc.get("totalReviewCount")

    ing_str = (doc.get("ingredientsInfo") or {}).get("ingredients")
    if isinstance(ing_str, str):
        ingredients = [s.strip().lower() for s in ing_str.split(",") if s.strip()]
    else:
        ingredients = []

    reviews = doc.get("reviews") or []
    review_text = " ".join([str(r.get("reviewContent", "")) for r in reviews])[:5000]  # 길이 제한

    pid = hashlib.md5(f"{name}|{brand}".encode("utf-8")).hexdigest()

    return {
        "product_id": pid,
        "productName": name,
        "brand": brand,
        "category": category,               
        "ingredients": ingredients,         
        "review_text": review_text,         
        "salePrice": price,
        "discountedSalePrice": price_disc,
        "averageReviewScore": avg_score,
        "totalReviewCount": review_cnt,
    }

def load_and_normalize_all() -> List[Dict[str, Any]]:
    base = os.getcwd()
    files = [
        ("cream.json", "cream"),
        ("essense.json", "essense"),      
        ("skintoner.json", "skintoner"),  
    ]
    out = []
    for fname, cat in files:
        path = os.path.join(base, fname)
        if not os.path.exists(path):
            print(f"[WARN] not found: {path}")
            continue
        raw = load_json(path)
        for doc in raw:
            out.append(normalize_product(doc, cat))
    return out


MAPPING = {
    "settings": {
        "analysis": {
            "analyzer": {
                "ko_std": {"tokenizer": "standard", "filter": ["lowercase"]}
            }
        }
    },
    "mappings": {
        "properties": {
            "product_id": {"type": "keyword"},
            "productName": {"type": "text", "analyzer": "ko_std"},
            "brand": {"type": "keyword"},
            "category": {"type": "keyword"},
            "ingredients": {"type": "keyword"},           
            "review_text": {"type": "text", "analyzer": "ko_std"},
            "salePrice": {"type": "float"},
            "discountedSalePrice": {"type": "float"},
            "averageReviewScore": {"type": "float"},
            "totalReviewCount": {"type": "integer"},
        }
    }
}

def ensure_index(es: Elasticsearch):
    if es.indices.exists(index=INDEX_NAME):
        return
    es.indices.create(index=INDEX_NAME, **MAPPING)

def bulk_index(es: Elasticsearch, docs: List[Dict[str, Any]]):
    ops = []
    for d in docs:
        ops.append({"_op_type": "index", "_index": INDEX_NAME, "_id": d["product_id"], "_source": d})
    if ops:
        helpers.bulk(es, ops, refresh="wait_for")


POS_ING = {
    "pigment": {"niacinamide","vitamin c","ascorbic","3-o-ethyl ascorbic","arbutin","alpha arbutin","tranexamic","licorice","kojic"},
    "sensitivity": {"panthenol","madecassoside","centella","cica","allantoin","beta-glucan","bisabolol","aloe","ceramide"},
    "dry": {"hyaluronic","glycerin","squalane","ceramide","cholesterol","urea"},
    "acne": {"salicylic","bha","lipo-hydroxy","azelaic","zinc","niacinamide"},
}

NEG_ING = {
    "sensitivity": {"fragrance","parfum","linalool","limonene","citral","eugenol","essential oil","tea tree oil","peppermint oil","alcohol denat"},
    "acne": {"coconut oil","isopropyl myristate","lanolin","myristyl myristate"},
}

def build_query(fusion: Dict[str, Any], category: str, top_k: int = 15) -> Dict[str, Any]:
    idx = fusion.get("indices", {})
    oil = float(idx.get("oil", 0))
    dry = float(idx.get("dry", 0))
    sens = float(idx.get("sensitivity", 0))
    pigment = float(idx.get("pigment", 0))
    acne_flag = 1.0 if fusion.get("flags", {}).get("acne") else 0.0

    pos_ings = set()
    neg_ings = set()
    if pigment >= 2.0: pos_ings |= POS_ING["pigment"]
    if sens >= 2.0:    pos_ings |= POS_ING["sensitivity"]; neg_ings |= NEG_ING["sensitivity"]
    if dry >= 2.0:     pos_ings |= POS_ING["dry"]
    if oil >= 2.0 or acne_flag: pos_ings |= POS_ING["acne"]; neg_ings |= NEG_ING["acne"]

    functions = [
      
        *([{"filter": {"terms": {"ingredients": list(pos_ings)}}, "weight": 1.5}] if pos_ings else []),
        *([{"filter": {"terms": {"ingredients": list(neg_ings)}}, "weight": 0.7}] if neg_ings else []),
        
        {
            "script_score": {
                "script": {
                    "source": """
                        double s = 1.0;
                        if (!doc['averageReviewScore'].empty) {
                            s *= (0.9 + Math.min(Math.max(doc['averageReviewScore'].value, 0.0), 5.0) / 10.0);
                        }
                        if (!doc['totalReviewCount'].empty) {
                            double n = Math.min(doc['totalReviewCount'].value, 5000);
                            s *= (0.9 + (n/5000.0)*0.2);
                        }
                        return s;
                    """
                }
            }
        }
    ]

    body = {
        "size": top_k,
        "query": {
            "function_score": {
                "query": {
                    "bool": {
                        "must": [{"term": {"category": category}}],
                        "should": [
                            {"multi_match": {
                                "query": "brightening soothing barrier oil-free non-comedogenic",
                                "fields": ["productName^2", "review_text"]
                            }}
                        ]
                    }
                },
                "boost_mode": "multiply",
                "score_mode": "multiply",
                "functions": functions
            }
        },
        "_source": {
            "includes": [
                "product_id","productName","brand","category",
                "ingredients","salePrice","discountedSalePrice",
                "averageReviewScore","totalReviewCount"
            ]
        }
    }
    return body


CATEGORIES = ["cream","essense","skintoner"] 

def search_candidates_per_category(es: Elasticsearch, fusion: Dict[str, Any], per_cat: int = 15) -> Dict[str, List[Dict[str, Any]]]:
    out = {}
    for cat in CATEGORIES:
        q = build_query(fusion, category=cat, top_k=per_cat)
        resp = es.search(index=INDEX_NAME, body=q)
        hits = resp["hits"]["hits"]
        out[cat] = [{
            "rank_es": i+1,
            "score_es": round(h["_score"], 3),
            **h["_source"]
        } for i, h in enumerate(hits)]
    return out

if __name__ == "__main__":
    es = connect_es()

   
    ensure_index(es)
    docs = load_and_normalize_all()
    if docs:
        bulk_index(es, docs)
        print(f"[INFO] indexed: {len(docs)} docs")

    fusion_json = {
        "skin_type": "복합성",
        "indices": {"oil": 2.2, "dry": 1.0, "sensitivity": 2.4, "wrinkle": 1.2, "pigment": 2.8},
        "flags": {"sensitive": True, "aging": False, "pigment": True, "acne": False}
    }

    results = search_candidates_per_category(es, fusion_json, per_cat=15)
    for cat, arr in results.items():
        print(f"\n=== {cat.upper()} (ES candidates) ===")
        for r in arr:   
            print(f"- ({r['score_es']}) {r['brand']} | {r['productName']} | {r['discountedSalePrice'] or r['salePrice']}")



[INFO] indexed: 243 docs

=== CREAM (ES candidates) ===
- (1.5) 더마팩토리 온라인스토어 | [비건인증] 더마팩토리 EDLP 어성초 71% 수분크림(대용량) 120g /수분진정 속건조개선 민감지성피부 촘촘보습 2배용량 수분만땅 | 7000
- (1.499) 한율 | [본사직영] 한율 어린쑥 수분진정크림 55ml /한율 1등템 | 27580
- (1.499) 더마팩토리 온라인스토어 | [비건인증] 더마팩토리 비제로 컴포트 크림 80ml / 마일드크림 장벽보호 진정보습 온가족크림 자극지수 0.00 민감피부 구아이아줄렌 덱스판테놀 5% | 7800
- (1.498) 달바 공식스토어 | 달바 비건 더블 크림 단지형 70g | 37900
- (1.498) 더마팩토리 온라인스토어 | [비건인증] 더마팩토리 아쿠아포린 수분 크림 70ml /글리세릴글루코사이드 부활초 수분길활성 초저분자히알루론산 갈바닉크림 속수분크림 스쿠알란 오일프리 수분젤크림 | 7000
- (1.498) 더마팩토리 온라인스토어 | [비건인증] 더마팩토리 EDLP 어성초 71% 수분크림 60g /속건조개선 민감지성피부 열감감소 수분가득 촉촉크림 | 4500
- (1.495) 달바 공식스토어 | 달바 비건 더블 크림 튜브형 60ml | 29900
- (1.495) 더마팩토리 온라인스토어 | [비건인증] 더마팩토리 세라판테놀 8% 인텐시브 크림 (세라마이드 10,000ppm) 50ml /고보습크림 보습영양 탄력크림 속보습 장벽케어크림 판테놀크림 스쿠알란10% 판테놀7% | 9000
- (1.495) 더마팩토리 온라인스토어 | [비건인증] 더마팩토리 시카53.2%크림 30ml / 마데카소사이드 센텔라정량추출물(TECA) 1.1% 병풀잎수 트러블진정 장벽케어 민감피부진정 급속진정 | 9000
- (1.493) 더마팩토리 온라인스토어 | [비건인증] 더마팩토리 레티날 300ppm 크림 30ml /비타민 A 레티날 레티놀 리프팅 주름개선 크림 | 9000
- (1.49

In [8]:
from __future__ import annotations
from typing import Dict, Any, List, Iterable, Set, Tuple
import os, json
import numpy as np
import pandas as pd


POSITIVE_BY_CONCERN = {
    "pigment": {
        "claims": {"brightening", "dark-spot", "tone-up"},
        "ingredients": {"niacinamide", "vitamin c", "ascorbic", "3-o-ethyl ascorbic",
                        "arbutin", "alpha arbutin", "tranexamic", "licorice", "glabridin", "kojic"}
    },
    "sensitivity": {
        "claims": {"soothing", "calming", "barrier", "hypoallergenic"},
        "ingredients": {"panthenol", "madecassoside", "centella", "cica", "allantoin",
                        "beta-glucan", "bisabolol", "aloe", "ceramide"}
    },
    "acne_hint": {
        "claims": {"anti-acne", "non-comedogenic", "sebum-control"},
        "ingredients": {"salicylic", "bha", "lipo-hydroxy", "azelaic", "zinc", "niacinamide"}
    },
    "dry": {
        "claims": {"hydrating", "moisturizing", "barrier"},
        "ingredients": {"hyaluronic", "glycerin", "squalane", "ceramide", "cholesterol", "urea"}
    },
    "oil": {
        "claims": {"oil-free", "sebum-control", "matte"},
        "tags": {"oil-free", "lightweight", "gel-cream", "non-comedogenic"}
    }
}

NEGATIVE_BY_CONCERN = {
    "sensitivity": {
        "ingredients": {"fragrance", "parfum", "linalool", "limonene", "citral", "eugenol",
                        "essential oil", "tea tree oil", "peppermint oil", "alcohol denat"}
    },
    "acne_hint": {
        "ingredients": {"coconut oil", "isopropyl myristate", "lanolin", "myristyl myristate"}
    }
}


def _to_list(x) -> List[str]:
    if x is None: return []
    if isinstance(x, (list, tuple, set)): return [str(v).lower() for v in x]
    return [str(x).lower()]

def _safe_float(x, default=np.nan):
    try:
        return float(x)
    except Exception:
        return default

def _count_intersection(a: Iterable[str], b: Set[str]) -> int:
    return len(set([str(v).lower() for v in a]) & set([str(v).lower() for v in b]))

def make_ltr_features(cands: List[Dict[str, Any]], fusion_json: Dict[str, Any]) -> pd.DataFrame:
    """
    ES 후보(한 카테고리의 리스트)를 받아 LTR 입력용 피처 DataFrame 생성
    - 문서 단위 피처 + 사용자/쿼리 컨텍스트 피처(fusion indices)
    """
    idx = fusion_json.get("indices", {})
    oil = float(idx.get("oil", 0)); dry = float(idx.get("dry", 0))
    sens = float(idx.get("sensitivity", 0)); pigment = float(idx.get("pigment", 0))
    wrinkle = float(idx.get("wrinkle", 0))
    acne_flag = 1.0 if fusion_json.get("flags", {}).get("acne") else 0.0

    rows = []
    for doc in cands:
        ingredients = _to_list(doc.get("ingredients"))
        claims = _to_list(doc.get("claims"))
        tags = _to_list(doc.get("tags"))

        
        pos_ing_cnt = (
            _count_intersection(ingredients, POSITIVE_BY_CONCERN["pigment"]["ingredients"]) +
            _count_intersection(ingredients, POSITIVE_BY_CONCERN["sensitivity"]["ingredients"]) +
            _count_intersection(ingredients, POSITIVE_BY_CONCERN["dry"]["ingredients"]) +
            _count_intersection(ingredients, POSITIVE_BY_CONCERN["acne_hint"]["ingredients"])
        )
        pos_claim_cnt = (
            _count_intersection(claims, POSITIVE_BY_CONCERN["pigment"]["claims"]) +
            _count_intersection(claims, POSITIVE_BY_CONCERN["sensitivity"]["claims"]) +
            _count_intersection(claims, POSITIVE_BY_CONCERN["dry"]["claims"]) +
            _count_intersection(claims, POSITIVE_BY_CONCERN["acne_hint"]["claims"]) +
            _count_intersection(claims, POSITIVE_BY_CONCERN["oil"]["claims"])
        )
        pos_tag_cnt = _count_intersection(tags, POSITIVE_BY_CONCERN["oil"].get("tags", set()))

        neg_ing_cnt = (
            _count_intersection(ingredients, NEGATIVE_BY_CONCERN["sensitivity"]["ingredients"]) +
            _count_intersection(ingredients, NEGATIVE_BY_CONCERN["acne_hint"]["ingredients"])
        )

        
        has_oil_free = 1.0 if ("oil-free" in tags or "oil-free" in claims) else 0.0
        has_noncomedogenic = 1.0 if ("non-comedogenic" in tags or "non-comedogenic" in claims) else 0.0
        has_soothing = 1.0 if ("soothing" in claims or "calming" in claims) else 0.0
        has_brightening = 1.0 if ("brightening" in claims or "tone-up" in claims or "dark-spot" in claims) else 0.0

        
        es_score = _safe_float(doc.get("score_es"), default=np.nan)
        price = _safe_float(doc.get("price"), default=np.nan)
        comedo = _safe_float(doc.get("comedogenic_rating"), default=np.nan)
        spf = _safe_float(doc.get("spf"), default=0.0)

        
        price_log = np.log1p(price) if np.isfinite(price) else np.nan

        rows.append({
            
            "es_score": es_score,
            "price_log": price_log,
            "comedogenic": comedo,
            "spf": spf,
            "pos_ing_cnt": float(pos_ing_cnt),
            "pos_claim_cnt": float(pos_claim_cnt),
            "pos_tag_cnt": float(pos_tag_cnt),
            "neg_ing_cnt": float(neg_ing_cnt),
            "has_oil_free": has_oil_free,
            "has_noncomedogenic": has_noncomedogenic,
            "has_soothing": has_soothing,
            "has_brightening": has_brightening,

            
            "q_oil": oil,
            "q_dry": dry,
            "q_sensitivity": sens,
            "q_pigment": pigment,
            "q_wrinkle": wrinkle,
            "q_acne_flag": acne_flag,
        })

    df = pd.DataFrame(rows)
    return df


DEFAULT_FEATURE_ORDER = [
    "es_score","price_log","comedogenic","spf",
    "pos_ing_cnt","pos_claim_cnt","pos_tag_cnt","neg_ing_cnt",
    "has_oil_free","has_noncomedogenic","has_soothing","has_brightening",
    "q_oil","q_dry","q_sensitivity","q_pigment","q_wrinkle","q_acne_flag"
]

def align_features(df: pd.DataFrame, feature_order: List[str] = None) -> pd.DataFrame:
    cols = feature_order or DEFAULT_FEATURE_ORDER
   
    for c in cols:
        if c not in df.columns:
            df[c] = 0.0
    df = df[cols]
    return df


def load_xgb_model(model_path: str):
    """
    - .json/.ubj: Booster로 로드
    - .pkl/.pickle: sklearn API로 로드
    """
    ext = os.path.splitext(model_path)[1].lower()
    if ext in [".json", ".ubj"]:
        import xgboost as xgb
        bst = xgb.Booster()
        bst.load_model(model_path)
        return ("booster", bst)
    elif ext in [".pkl", ".pickle"]:
        import pickle
        with open(model_path, "rb") as f:
            model = pickle.load(f)
        return ("sklearn", model)
    else:
        raise ValueError(f"지원하지 않는 모델 확장자: {ext}")

def predict_scores(df_feat: pd.DataFrame, model_spec) -> np.ndarray:
    kind, model = model_spec
    if kind == "booster":
        import xgboost as xgb
        dmat = xgb.DMatrix(df_feat.values, feature_names=list(df_feat.columns))
        pred = model.predict(dmat)
        return np.asarray(pred).reshape(-1)
    else:  
        pred = model.predict(df_feat.values)
        return np.asarray(pred).reshape(-1)


def rank_topn_per_category(
    candidates_by_cat: Dict[str, List[Dict[str, Any]]],
    fusion_json: Dict[str, Any],
    model_path: str,
    top_n: int = 3,
    feature_order: List[str] = None
) -> Dict[str, List[Dict[str, Any]]]:
    """
    반환:
    {
      "skin":  [Top-N dict...],
      "toner": [Top-N dict...],
      ...
    }
    """
    model = load_xgb_model(model_path)
    out = {}

    for cat, cands in candidates_by_cat.items():
        if not cands:
            out[cat] = []
            continue
        df_feat = make_ltr_features(cands, fusion_json)
        df_feat = align_features(df_feat, feature_order)

        scores = predict_scores(df_feat, model)

      
        ranked = sorted(zip(cands, scores), key=lambda x: x[1], reverse=True)
        topk = []
        for item, sc in ranked[:top_n]:
            item_out = dict(item)
            item_out["score_ltr"] = float(sc)
            topk.append(item_out)
        out[cat] = topk
    return out


if __name__ == "__main__":

    fusion_json = {
        "skin_type": "복합성",
        "indices": {"oil": 2.2, "dry": 1.0, "sensitivity": 2.4, "wrinkle": 1.5, "pigment": 2.8},
        "flags": {"sensitive": True, "aging": False, "pigment": True, "acne": False}
    }

    # 3) 모델 경로
    model_path = "models/ltr_model.json"  
   
    final = rank_topn_per_category(candidates_by_cat, fusion_json, model_path, top_n=3)
    print(json.dumps(final, ensure_ascii=False, indent=2))
    pass



A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.6 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\frank\anaconda3\Lib\site-packages\ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "C:\Users\frank\anaconda3\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "C:\Users\frank\anaconda3\Lib\site-packages\ipykernel\kernelapp.py", line 701, in start
    self.io_loop.start()
  File "C:\Users\frank\anaconda3\Lib\site-pack

ImportError: 
A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.6 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.




A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.6 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\frank\anaconda3\Lib\site-packages\ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "C:\Users\frank\anaconda3\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "C:\Users\frank\anaconda3\Lib\site-packages\ipykernel\kernelapp.py", line 701, in start
    self.io_loop.start()
  File "C:\Users\frank\anaconda3\Lib\site-pack

ImportError: 
A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.6 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.



ImportError: numpy.core.multiarray failed to import