In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
from scipy.sparse import coo_matrix

BASE_DIR = Path().resolve().parent.parent.parent
# base_path =  BASE_DIR / "data" / "train" / "TL_csv"
base_path = BASE_DIR / "AI 모델" / "1.모델소스코드" / "1.여행로그 장소 추천 고도화" / "data"
save_path = BASE_DIR / "AI 모델" / "1.모델소스코드" / "1.여행로그 장소 추천 고도화" / "model"

In [None]:
import pickle
import json
import pandas as pd
from scipy.sparse import load_npz


# 1) LightFM 모델 로드
with open(save_path / "lightfm_model.pkl", "rb") as f:
    model = pickle.load(f)

# 2) item_features_matrix 로드
item_features_matrix = load_npz(save_path / "item_features_matrix.npz")

# 3) item 메타데이터 로드
item_meta = pd.read_csv(save_path / "item_meta.csv")

# 4) user feature 컬럼 순서 로드
with open(save_path / "user_feature_cols.json", "r", encoding="utf-8") as f:
    USER_FEATURE_COLS = json.load(f)

# 5) VISIT_AREA_NM -> item_id 매핑 로드
with open(save_path / "name_to_item_id.pkl", "rb") as f:
    name_to_item_id = pickle.load(f)

In [7]:
import numpy as np
from scipy.sparse import csr_matrix

# 학습 때 사용했던 prefix 규칙 그대로
USER_PREFIX_MAP = {
    "GENDER": "gender",
    "AGE_GRP": "age_grp",
    "MARR_STTS": "marr_stts",
    "JOB_NM": "job_nm",
    "INCOME": "income",
    "TRAVEL_NUM": "travel_num",
    "TRAVEL_STYL_1": "travel_styl_1",
    "TRAVEL_STATUS_RESIDENCE": "travel_status_residence",
    "TRAVEL_STATUS_DESTINATION": "travel_status_destination",
    "TRAVEL_STATUS_ACCOMPANY": "travel_status_accompany",
    "TRAVEL_MOTIVE_1": "travel_motive_1",
    "TRAVEL_COMPANIONS_NUM": "travel_companions_num",
    "MONTH": "month",
    "SEASON": "season",
    "HOW_LONG": "how_long",
}


def build_new_user_features(new_user: dict) -> csr_matrix:
    """
    new_user = {
      "GENDER": "...",
      "AGE_GRP": "...",
      ...
      "HOW_LONG": ...
    }
    """
    row = np.zeros((1, len(USER_FEATURE_COLS)), dtype=np.float32)

    col_index = {col: idx for idx, col in enumerate(USER_FEATURE_COLS)}

    for field, prefix in USER_PREFIX_MAP.items():
        if field not in new_user:
            continue
        val = new_user[field]
        if pd.isna(val):
            continue

        col_name = f"{prefix}_{val}"
        if col_name in col_index:
            row[0, col_index[col_name]] = 1.0

    return csr_matrix(row)


In [8]:
def recommend_for_new_jeju_user(new_user: dict, topn: int = 10):
    # 1) 새 유저 feature 벡터 (user_id=0 가정)
    new_user_features = build_new_user_features(new_user)

    # 2) 제주 아이템만 필터 (item_meta 기준)
    jeju_items = item_meta[item_meta["SIDO"] == "제주특별자치도"].copy()
    candidate_item_ids = jeju_items["item_id"].values

    # 3) LightFM 점수 예측
    scores = model.predict(
        user_ids=0,
        item_ids=candidate_item_ids,
        user_features=new_user_features,
        item_features=item_features_matrix,
        num_threads=4,
    )

    # 4) 상위 topn 추출
    order = np.argsort(-scores)[:topn]
    top_ids = candidate_item_ids[order]
    top_scores = scores[order]

    rec = jeju_items.set_index("item_id").loc[top_ids].reset_index()
    rec["score"] = top_scores
    rec["rank"] = np.arange(1, len(rec) + 1)

    return rec[
        ["rank", "item_id", "score", "VISIT_AREA_NM", "SIDO", "VISIT_AREA_TYPE_CD", "ratings"]
    ]


In [6]:
import pandas as pd
from io import StringIO

csv_text = """
,TRAVELER_ID,GENDER,AGE_GRP,MARR_STTS,JOB_NM,INCOME,TRAVEL_NUM,TRAVEL_STYL_1,TRAVEL_STATUS_RESIDENCE,TRAVEL_STATUS_DESTINATION,TRAVEL_STATUS_ACCOMPANY,TRAVEL_MOTIVE_1,TRAVEL_COMPANIONS_NUM,MONTH,SEASON,HOW_LONG
1,h006468,남,40,2,1,8,2,1,제주특별자치도,제주,자녀 동반 여행,8,2,9,autumn,1
2,h003272,여,50,1,2,4,1,3,제주특별자치도,제주,나홀로 여행,6,0,8,summer,1
3,h006175,여,20,1,2,6,2,4,대구광역시,제주,나홀로 여행,1,0,9,autumn,2
"""

# 문자열을 파일처럼 읽기
new_users_df = pd.read_csv(StringIO(csv_text))

# 맨 앞 인덱스용 컬럼(Unnamed: 0) 제거
if "Unnamed: 0" in new_users_df.columns:
    new_users_df = new_users_df.drop(columns=["Unnamed: 0"])

print(new_users_df)

  TRAVELER_ID GENDER  AGE_GRP  MARR_STTS  JOB_NM  INCOME  TRAVEL_NUM  \
0     h006468      남       40          2       1       8           2   
1     h003272      여       50          1       2       4           1   
2     h006175      여       20          1       2       6           2   

   TRAVEL_STYL_1 TRAVEL_STATUS_RESIDENCE TRAVEL_STATUS_DESTINATION  \
0              1                 제주특별자치도                        제주   
1              3                 제주특별자치도                        제주   
2              4                   대구광역시                        제주   

  TRAVEL_STATUS_ACCOMPANY  TRAVEL_MOTIVE_1  TRAVEL_COMPANIONS_NUM  MONTH  \
0                자녀 동반 여행                8                      2      9   
1                  나홀로 여행                6                      0      8   
2                  나홀로 여행                1                      0      9   

   SEASON  HOW_LONG  
0  autumn         1  
1  summer         1  
2  autumn         2  


In [9]:
# new_users_df = pd.read_csv("new_users.csv")

TOPN = 10

for _, row in new_users_df.iterrows():
    traveler_id = row["TRAVELER_ID"]

    new_user = {
        "GENDER": row["GENDER"],
        "AGE_GRP": row["AGE_GRP"],
        "MARR_STTS": row["MARR_STTS"],
        "JOB_NM": row["JOB_NM"],
        "INCOME": row["INCOME"],
        "TRAVEL_NUM": row["TRAVEL_NUM"],
        "TRAVEL_STYL_1": row["TRAVEL_STYL_1"],
        "TRAVEL_STATUS_RESIDENCE": row["TRAVEL_STATUS_RESIDENCE"],
        "TRAVEL_STATUS_DESTINATION": row["TRAVEL_STATUS_DESTINATION"],
        "TRAVEL_STATUS_ACCOMPANY": row["TRAVEL_STATUS_ACCOMPANY"],
        "TRAVEL_MOTIVE_1": row["TRAVEL_MOTIVE_1"],
        "TRAVEL_COMPANIONS_NUM": row["TRAVEL_COMPANIONS_NUM"],
        "MONTH": row["MONTH"],
        "SEASON": row["SEASON"],
        "HOW_LONG": row["HOW_LONG"],
    }

    print(f"\n==========================")
    print(f"TRAVELER_ID: {traveler_id}")
    print(f"입력 특성: {new_user}")

    rec_df = recommend_for_new_jeju_user(new_user, topn=TOPN)

    print("\n추천 결과 (TOP-{})".format(TOPN))
    # 너무 길면 주요 컬럼만
    print(
        rec_df[["rank", "VISIT_AREA_NM", "score", "VISIT_AREA_TYPE_CD", "ratings"]]
        .to_string(index=False)
    )


TRAVELER_ID: h006468
입력 특성: {'GENDER': '남', 'AGE_GRP': 40, 'MARR_STTS': 2, 'JOB_NM': 1, 'INCOME': 8, 'TRAVEL_NUM': 2, 'TRAVEL_STYL_1': 1, 'TRAVEL_STATUS_RESIDENCE': '제주특별자치도', 'TRAVEL_STATUS_DESTINATION': '제주', 'TRAVEL_STATUS_ACCOMPANY': '자녀 동반 여행', 'TRAVEL_MOTIVE_1': 8, 'TRAVEL_COMPANIONS_NUM': 2, 'MONTH': 9, 'SEASON': 'autumn', 'HOW_LONG': 1}

추천 결과 (TOP-10)
 rank    VISIT_AREA_NM       score  VISIT_AREA_TYPE_CD  ratings
    1       액트몬 제주 중문점 -214.324448                   6 4.333333
    2 메종글래드 제주 더 파티오 풀 -214.324448                   6 3.666667
    3           새별 프렌즈 -214.324448                   6 3.666667
    4       서프라이즈 테마파크 -214.324448                   6 5.000000
    5        호텔 리젠트 마린 -214.324448                   6 5.000000
    6             비체올린 -214.324448                   6 3.000000
    7 뽀로로 앤 타요 테마파크 제주 -214.324448                   6 5.000000
    8        피겨 뮤지엄 제주 -214.324448                   6 4.333333
    9    금호리조트 제주 아쿠아나 -214.324448                   6 2.000