In [10]:
import cv2
import numpy as np
from PIL import Image, ImageOps


TARGET_SHORT = 768

def _load_exif_bgr(path: str):
    pil = Image.open(path)
    pil = ImageOps.exif_transpose(pil).convert("RGB")
    return cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR)

def _resize_short(bgr, short=TARGET_SHORT):
    h, w = bgr.shape[:2]
    s = min(h, w)
    if s == short: return bgr
    scale = short / float(s)  
    new = (int(round(w*scale)), int(round(h*scale)))
    interp = cv2.INTER_AREA if scale < 1.0 else cv2.INTER_CUBIC
    return cv2.resize(bgr, new, interpolation=interp)

def _wb_grayworld(bgr, strength=0.5):
    x = bgr.astype(np.float32)
    means = x.reshape(-1,3).mean(0) + 1e-6
    g = means.mean()
    gains = np.clip(g/means, 0.8, 1.2)
    gains = (1-strength)*1.0 + strength*gains
    x *= gains
    return np.clip(x, 0, 255).astype(np.uint8)

def _clahe_light(bgr, clip=1.8, tiles=8):
    lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    l = cv2.createCLAHE(clipLimit=clip, tileGridSize=(tiles, tiles)).apply(l)
    return cv2.cvtColor(cv2.merge((l,a,b)), cv2.COLOR_LAB2BGR)

def _skin_mask(bgr):
    ycrcb = cv2.cvtColor(bgr, cv2.COLOR_BGR2YCrCb)
    Y, Cr, Cb = cv2.split(ycrcb)
    m1 = (Cr >= 135) & (Cr <= 180) & (Cb >= 85) & (Cb <= 135) & (Y >= 40) & (Y <= 240)

    hsv = cv2.cvtColor(bgr, cv2.COLOR_BGR2HSV)
    H, S, V = cv2.split(hsv)
    m2 = (H <= 25) & (S >= 30) & (S <= 180) & (V >= 60)

    m = (m1 & m2).astype(np.uint8) * 255

    k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5))
    m = cv2.morphologyEx(m, cv2.MORPH_OPEN,  k, iterations=1)
    m = cv2.morphologyEx(m, cv2.MORPH_CLOSE, k, iterations=2)

    num, labels, stats, _ = cv2.connectedComponentsWithStats(m, connectivity=8)
    if num > 1:
        largest = 1 + np.argmax(stats[1:, cv2.CC_STAT_AREA])
        m2 = np.zeros_like(m)
        m2[labels == largest] = 255
        m = m2

    return m

def preprocess_with_mask(bgr, bg_gray=220):
    bgr = _resize_short(bgr)
    bgr = _wb_grayworld(bgr, 0.5)
    bgr = _clahe_light(bgr, 1.8, 8)
    mask = _skin_mask(bgr)
    bg = np.full_like(bgr, (bg_gray, bg_gray, bg_gray), np.uint8)
    out = np.where(mask[..., None] == 255, bgr, bg)
    return out


if __name__ == "__main__":
    input_path = "0001_03_R.jpeg"
    output_path = "0001_03_R_Pre.jpg"

    original = _load_exif_bgr(input_path)

    processed = preprocess_with_mask(original)

    cv2.imwrite(output_path, processed, [cv2.IMWRITE_JPEG_QUALITY, 92])
    print(f" 저장 완료: {output_path}")

    h1, h2 = original.shape[0], processed.shape[0]
    if h1 != h2:
        scale = h1 / h2
        processed = cv2.resize(processed, (int(processed.shape[1]*scale), h1))
    pad = np.full((h1, 20, 3), 220, np.uint8)
    preview = np.hstack([original, pad, processed])
    preview = cv2.resize(preview, (1280, 720))
    
    cv2.imshow("Before (Left)  vs  After (Right)", preview)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


 저장 완료: 0001_03_R_Pre.jpg


In [45]:
# 2.0 flash

MODEL = "gemini-2.0-flash"          
IMAGE_PATH = "Lee_Pre.jpg"  

def main():
    api_key = os.getenv("GEMINI_API_KEY")
    client = genai.Client(api_key=api_key)

    mime = mimetypes.guess_type(IMAGE_PATH)[0] or "image/jpeg"
    with open(IMAGE_PATH, "rb") as f:
        img_bytes = f.read()

    prompt = (
        "전처리된 얼굴 피부 이미지를 분석하여 JSON만 반환하라.\n"
        "평가 항목: acne(여드름), redness(홍조), melasma_darkspots(잡티).\n"
        "각 항목은 다음 스키마로 제공하라:\n"
        "{acne:{score:number,reason:string}, "
        "redness:{score:number,reason:string}, "
        "melasma_darkspots:{score:number,reason:string}}\n"
        "score는 0~100 범위의 실수이며, 0은 없음·매우 양호, 100은 매우 심함을 의미한다."
    )

    content = types.Content(
        role="user",
        parts=[
            types.Part(text=prompt),
            types.Part(inline_data=types.Blob(mime_type=mime, data=img_bytes)),
        ],
    )

    resp = client.models.generate_content(
        model=MODEL,
        contents=[content],
        config=types.GenerateContentConfig(
            response_mime_type="application/json",
            temperature=0.2,
            system_instruction="너는 피부 분석 전문가다. 반드시 JSON만 반환하라."
        ),
    )

    data = json.loads(resp.text)
    print(json.dumps(data, ensure_ascii=False, indent=2))

if __name__ == "__main__":
    main()


{
  "acne": {
    "score": 35.0,
    "reason": "얼굴 전체에 작은 여드름과 뾰루지가 관찰됩니다."
  },
  "redness": {
    "score": 40.0,
    "reason": "뺨과 코 주변에 약간의 홍조가 보입니다."
  },
  "melasma_darkspots": {
    "score": 55.0,
    "reason": "얼굴 전체에 잡티와 검버섯이 다수 관찰됩니다."
  }
}


In [2]:
from typing import Dict, Tuple, Union

MAP: Dict[str, Dict[str, int]] = {
    "q1": {"없어요":0, "T존 일부(이마 혹은 코)":1, "T존 전체(이마와 코)":2, "얼굴 전체":3},
    "q2": {"전혀 안 보여요":0, "지금은 없지만 가끔 보여요":1, "부분적으로 붉게 보여요":2, "전체적으로 붉게 보여요":3},
    "q3": {"없어요":0, "U존 일부(볼 혹은 턱)":1, "U존 전체(볼과 턱)":2, "얼굴 전체":3},
    "q4": {"전혀 생기지 않아요":0, "표정을 지을 때만 생겨요":1, "표정 짓지 않아도 약간 있어요":2, "표정 짓지 않아도 많이 있어요":3},
    "q5": {"주름이 없어요":0, "잔주름이에요":1, "깊은 주름이에요":2, "잔주름과 깊은 주름 다 있어요":3},
    "q6": {"전혀 생기지 않아요":0, "미소 지을 때만 약간 생겨요":1, "미소 지을 때 진하게 생겨요":2, "미소 짓지 않아도 생겨요":3},
    "q7": {"전혀 안 보여요":0, "거의 안 보여요":1, "약간 눈에 띄어요":2, "곳곳에 많이 보여요":3},
    "q8": {"주름이 없어요":0, "잔주름이에요":1, "깊은 주름이에요":2, "잔주름과 깊은 주름 다 있어요":3},  # Q5와 동일(중복)
    "q9": {"외출 전보다 윤기가 없어요":0, "외출 전과 변함이 없어요":1, "약간 번들거리고 윤기가 있어요":2, "많이 번들거리고 기름져요":3},
}

Score = Union[int, str]

def _to_score(q_key: str, v: Score) -> int:
    if isinstance(v, int):
        return 0 if v < 0 else 3 if v > 3 else v
    m = MAP[q_key]
    if v not in m:
        raise ValueError(f"{q_key}='{v}'는 허용되지 않습니다. choices={list(m)}")
    return m[v]

def assess_skin_type(
    q1: Score, q2: Score, q3: Score, q4: Score, q5: Score, q6: Score, q7: Score, q8: Score, q9: Score,
    *,
    oil_weights: Tuple[float, float] = (0.6, 0.4),   # (q1, q9)
    wrinkle_weights: Tuple[float, float] = (0.4, 0.6),  # (presence=q4, depth=avg(q5,q8))
    thr_high: float = 2.0, thr_low: float = 1.0,
    use_q8_average: bool = True
) -> Dict[str, object]:
    s1 = _to_score("q1", q1); s2 = _to_score("q2", q2); s3 = _to_score("q3", q3)
    s4 = _to_score("q4", q4); s5 = _to_score("q5", q5); s6 = _to_score("q6", q6)
    s7 = _to_score("q7", q7); s8 = _to_score("q8", q8); s9 = _to_score("q9", q9)

    depth = (s5 + s8)/2 if use_q8_average else s5
    oil = round(oil_weights[0]*s1 + oil_weights[1]*s9, 2)
    dry = float(s3)
    sens = float(s2)
    wrinkle = round(wrinkle_weights[0]*s4 + wrinkle_weights[1]*depth, 2)
    pigment = float(s7)

    if oil >= thr_high and dry <= thr_low:
        skin = "지성"
    elif dry >= thr_high and oil <= thr_low:
        skin = "건성"
    elif oil >= thr_high and dry >= thr_high:
        skin = "복합성"
    else:
        skin = "중성"

    flags = {
        "sensitive": sens >= thr_high,
        "aging": wrinkle >= thr_high,
        "pigment": pigment >= thr_high,
    }

    return {
        "skin_type": skin,
        "indices": {
            "oil": oil, "dry": dry, "sensitivity": sens, "wrinkle": wrinkle, "pigment": pigment
        },
        "flags": flags,
        "debug": {"scores": (s1,s2,s3,s4,s5,s6,s7,s8,s9), "depth": round(depth,2),
                  "thr_high": thr_high, "thr_low": thr_low}
    }

if __name__ == "__main__":
    res = assess_skin_type(
        "T존 전체(이마와 코)", "부분적으로 붉게 보여요", "U존 일부(볼 혹은 턱)",
        "표정을 지을 때만 생겨요", "잔주름이에요", "미소 지을 때만 약간 생겨요",
        "약간 눈에 띄어요", "잔주름이에요",
        "약간 번들거리고 윤기가 있어요"
    )
    print(res)


{'skin_type': '지성', 'indices': {'oil': 2.0, 'dry': 1.0, 'sensitivity': 2.0, 'wrinkle': 1.0, 'pigment': 2.0}, 'flags': {'sensitive': True, 'aging': False, 'pigment': True}, 'debug': {'scores': (2, 2, 1, 1, 1, 1, 2, 1, 2), 'depth': 1.0, 'thr_high': 2.0, 'thr_low': 1.0}}


In [None]:
from __future__ import annotations
from typing import Dict, List, Any, Optional
from elasticsearch import Elasticsearch, helpers


def get_es(host: str = "http://localhost:9200", api_key: Optional[str] = None) -> Elasticsearch:
    if api_key:
        return Elasticsearch(hosts=[host], api_key=api_key, request_timeout=5)
    return Elasticsearch(hosts=[host], request_timeout=5)


PRODUCT_INDEX = "products"

def product_index_body() -> Dict[str, Any]:
    return {
        "settings": {
            "analysis": {
                "tokenizer": {
                    "nori_user_dict": {
                        "type": "nori_tokenizer",
                        "decompound_mode": "mixed"
                    }
                },
                "filter": {
                    "synonym_ing": {
                        "type": "synonym",
                        "synonyms": [
                            "니아신아마이드, niacinamide => niacinamide",
                            "징크피씨에이, zinc pca, zinc_pca => zinc_pca",
                            "살리실산, bha, salicylic acid => salicylic_acid",
                            "센텔라, 병풀, centella => centella",
                            "판테놀, panthenol => panthenol",
                            "히알루론산, hyaluronic acid => hyaluronic_acid",
                            "레티놀, retinol => retinol",
                            "비타민 c, vitamin c, ascorbic acid => vitamin_c"
                        ]
                    }
                },
                "analyzer": {
                    "ko_nori_with_syn": {
                        "type": "custom",
                        "tokenizer": "nori_user_dict",
                        "filter": ["lowercase", "synonym_ing"]
                    }
                }
            }
        },
        "mappings": {
            "dynamic": "false",
            "properties": {
                "product_id": {"type": "keyword"},
                "brand": {"type": "keyword"},
                "name_ko": {"type": "text", "analyzer": "ko_nori_with_syn"},
                "category": {"type": "keyword"},
                "price": {"type": "float"},
                "is_vegan": {"type": "boolean"},

                "ingredients_raw": {"type": "text", "analyzer": "ko_nori_with_syn"},
                "ingredients_tokens": {"type": "keyword"},   
                "actives": {"type": "keyword"},              
                "actives_strength": {                        
                    "type": "object",
                    "enabled": False
                },

                "reviews_text": {"type": "text", "analyzer": "ko_nori_with_syn"},
                "reviews_sentiment": {"type": "float"},
                "review_count": {"type": "integer"},
                "rating_avg": {"type": "float"},

                "popularity": {"type": "rank_feature"},
                "review_count_log": {"type": "rank_feature"},
                "rating_rank": {"type": "rank_feature"},

                "fragrance_free": {"type": "boolean"},
                "alcohol_free": {"type": "boolean"},
                "for_oily": {"type": "boolean"},
                "for_dry": {"type": "boolean"},
                "for_sensitive": {"type": "boolean"},
                "for_pigment": {"type": "boolean"},
                "for_aging": {"type": "boolean"},

                "spf": {"type": "integer"},
                "pa": {"type": "keyword"},

                "release_date": {"type": "date"}
            }
        }
    }

def ensure_product_index(es: Elasticsearch, index: str = PRODUCT_INDEX) -> None:
    if es.indices.exists(index=index):
        return
    es.indices.create(index=index, body=product_index_body())


def upsert_products_stub(es: Elasticsearch, docs: List[Dict[str, Any]], index: str = PRODUCT_INDEX) -> None:
    """
 
    """
    if not docs:
        return
    actions = ({"_op_type": "index", "_index": index, "_id": d.get("product_id"), "_source": d} for d in docs)
    helpers.bulk(es, actions)


OIL_ACTIVES = ["niacinamide", "zinc_pca", "salicylic_acid"]
DRY_ACTIVES = ["ceramide", "cholesterol", "hyaluronic_acid", "squalane"]
SENSITIVE_ACTIVES = ["centella", "panthenol", "madecassoside", "allantoin"]
PIGMENT_ACTIVES = ["niacinamide", "arbutin", "vitamin_c", "azelaic_acid"]
AGING_ACTIVES = ["retinol", "retinal", "peptide", "bakuchiol"]

def build_query(
    profile: Dict[str, Any],
    *,
    topn: int = 200,
    categories: Optional[List[str]] = None,
    user_blacklist: Optional[List[str]] = None,
    include_fields: Optional[List[str]] = None
) -> Dict[str, Any]:
    """
    profile 예시:
    {
      "skin_type": "지성",
      "indices": {"oil": 2.3, "dry": 0.0, "sensitivity": 1.0, "wrinkle": 0.8, "pigment": 2.1},
      "flags": {"sensitive": False, "aging": False, "pigment": True}
    }
    """
    include_fields = include_fields or [
        "product_id","name_ko","brand","category","price","is_vegan",
        "ingredients_tokens","rating_avg","review_count","reviews_sentiment",
        "fragrance_free","alcohol_free","for_oily","for_dry","for_sensitive",
        "for_pigment","for_aging","spf","pa","release_date"
    ]
    filters: List[Dict[str, Any]] = [{"term": {"is_vegan": True}}]
    if categories:
        filters.append({"terms": {"category": categories}})
    if user_blacklist:
        filters.append({"bool": {"must_not": [{"terms": {"ingredients_tokens": user_blacklist}}]}})

    should: List[Dict[str, Any]] = [
        {"rank_feature": {"field": "popularity", "boost": 1.1}},
        {"rank_feature": {"field": "review_count_log", "boost": 1.1}},
        {"rank_feature": {"field": "rating_rank", "boost": 1.1}},
    ]

    oil = profile.get("indices", {}).get("oil", 0)
    dry = profile.get("indices", {}).get("dry", 0)
    sens_flag = profile.get("flags", {}).get("sensitive", False)
    pig_flag = profile.get("flags", {}).get("pigment", False)
    aging_flag = profile.get("flags", {}).get("aging", False)

    def boost_for(val: float, base: float = 2.0, step: float = 0.5) -> float:
        # 0~3 스케일 기준: 0→1.0, 1→1.5, 2→2.0, 3→2.5 (기본)
        return max(1.0, base + (val - 2.0) * step)

    if oil >= 2:
        should.append({"terms": {"actives": OIL_ACTIVES, "boost": boost_for(oil, base=2.2)}})
        should.append({"term": {"for_oily": {"value": True, "boost": 1.5}}})

    if dry >= 2:
        should.append({"terms": {"actives": DRY_ACTIVES, "boost": boost_for(dry, base=2.2)}})
        should.append({"term": {"for_dry": {"value": True, "boost": 1.5}}})

    if sens_flag:
        should.append({"terms": {"actives": SENSITIVE_ACTIVES, "boost": 2.0}})
        should.append({"term": {"fragrance_free": {"value": True, "boost": 2.0}}})
        should.append({"term": {"for_sensitive": {"value": True, "boost": 1.4}}})

    if pig_flag:
        should.append({"terms": {"actives": PIGMENT_ACTIVES, "boost": 1.8}})
        should.append({"term": {"for_pigment": {"value": True, "boost": 1.3}}})

    if aging_flag:
        should.append({"terms": {"actives": AGING_ACTIVES, "boost": 1.8}})
        should.append({"term": {"for_aging": {"value": True, "boost": 1.3}}})

    body = {
        "size": topn,
        "_source": {"includes": include_fields},
        "query": {
            "bool": {
                "filter": filters,
                "should": should,
                "minimum_should_match": 0
            }
        }
    }
    return body


def search_topn(
    es: Elasticsearch,
    profile: Dict[str, Any],
    *,
    topn: int = 200,
    categories: Optional[List[str]] = None,
    user_blacklist: Optional[List[str]] = None,
    index: str = PRODUCT_INDEX
) -> List[Dict[str, Any]]:
    body = build_query(profile, topn=topn, categories=categories, user_blacklist=user_blacklist)
    resp = es.search(index=index, body=body)
    hits = resp.get("hits", {}).get("hits", [])
    results = []
    for h in hits:
        src = h.get("_source", {})
        results.append({
            "es_id": h.get("_id"),
            "es_score": h.get("_score"),
            "product_id": src.get("product_id"),
            "name": src.get("name_ko"),
            "brand": src.get("brand"),
            "category": src.get("category"),
            "price": src.get("price"),
            "is_vegan": src.get("is_vegan"),
            "ingredients_tokens": src.get("ingredients_tokens", []),
            "rating_avg": src.get("rating_avg"),
            "review_count": src.get("review_count"),
            "reviews_sentiment": src.get("reviews_sentiment"),
            "flags": {
                "fragrance_free": src.get("fragrance_free", False),
                "alcohol_free": src.get("alcohol_free", False),
                "for_oily": src.get("for_oily", False),
                "for_dry": src.get("for_dry", False),
                "for_sensitive": src.get("for_sensitive", False),
                "for_pigment": src.get("for_pigment", False),
                "for_aging": src.get("for_aging", False),
            }
        })
    return results


if __name__ == "__main__":
    es = get_es()
    ensure_product_index(es)


    user_profile = {
        "skin_type": "복합성",
        "indices": {"oil": 2.5, "dry": 2.1, "sensitivity": 0.5, "wrinkle": 1.4, "pigment": 2.2},
        "flags": {"sensitive": False, "aging": False, "pigment": True}
    }
    topn = search_topn(
        es, user_profile,
        topn=200,
        categories=["essence","serum"],
        user_blacklist=["fragrance"]  
    )
    print(f"TopN={len(topn)}")
   
