In [22]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import matplotlib.font_manager as fm
from sklearn.preprocessing import LabelEncoder
from catboost import Pool
from sklearn.model_selection import train_test_split
from catboost import CatBoostRegressor
import unicodedata
from itertools import combinations
from sklearn.metrics.pairwise import cosine_similarity
from gensim.models import KeyedVectors

In [23]:
main_df=pd.read_csv('./data/main_data.csv')
real_df=pd.read_csv('./data/real_data.csv')
ct_sim = pd.read_csv('./data/ct_sim.csv', index_col=0)

In [25]:
real_df['FCLTY_NM'] = real_df['FCLTY_NM'].astype(str)

X = real_df[['GENDER', 'AGE_GRP', 'TRAVEL_STYL_5',
       'TRAVEL_STYL_6', 'TRAVEL_STYL_7', 'TRAVEL_STYL_8',
       'TRAVEL_COMPANIONS_NUM','FCLTY_NM']]
y = real_df['DGSTFN']

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=.2, random_state=0)

cat_features = ['GENDER', 'AGE_GRP', 'TRAVEL_STYL_5','TRAVEL_STYL_6',
                'TRAVEL_STYL_7', 'TRAVEL_STYL_8', 'FCLTY_NM']

# 범주형 데이터 처리를 위한 Pool객체 생성
X_train_pool = Pool(X_train, y_train, cat_features = cat_features)
X_test_pool = Pool(X_test, y_test, cat_features = cat_features)

cb_reg = CatBoostRegressor(
    n_estimators = 5000,
    depth = 5,
    learning_rate = .03,
    loss_function = 'RMSE',
    eval_metric = 'RMSE',
    l2_leaf_reg = 10,
    early_stopping_rounds = 200,
    random_state = 0
)
cb_reg.fit(X_train_pool, eval_set = X_test_pool, verbose = 100)

real_df['FCLTY_NM'] = real_df['FCLTY_NM'].apply(lambda x: unicodedata.normalize('NFKC', str(x)).strip())
places = list(real_df['FCLTY_NM'].unique())

dgstfn_pred_df = pd.DataFrame(index=X_train.index, columns=places)
predicted_dgstfn = cb_reg.predict(X_train_pool)

for place in places:
    dgstfn_pred_df[place] = predicted_dgstfn

0:	learn: 0.7535490	test: 0.7479938	best: 0.7479938 (0)	total: 16.5ms	remaining: 1m 22s
100:	learn: 0.7173558	test: 0.7357685	best: 0.7357685 (100)	total: 1.66s	remaining: 1m 20s
200:	learn: 0.7008529	test: 0.7310686	best: 0.7309182 (188)	total: 3.11s	remaining: 1m 14s
300:	learn: 0.6894171	test: 0.7273656	best: 0.7273305 (289)	total: 4.81s	remaining: 1m 15s
400:	learn: 0.6804790	test: 0.7244893	best: 0.7244893 (400)	total: 6.35s	remaining: 1m 12s
500:	learn: 0.6713085	test: 0.7223779	best: 0.7223295 (499)	total: 8.01s	remaining: 1m 11s
600:	learn: 0.6614091	test: 0.7201955	best: 0.7201519 (598)	total: 10.1s	remaining: 1m 13s
700:	learn: 0.6500721	test: 0.7187915	best: 0.7187887 (699)	total: 12.2s	remaining: 1m 15s
800:	learn: 0.6396167	test: 0.7173690	best: 0.7167617 (738)	total: 14.2s	remaining: 1m 14s
900:	learn: 0.6310643	test: 0.7169890	best: 0.7167617 (738)	total: 16.2s	remaining: 1m 13s
1000:	learn: 0.6218387	test: 0.7170111	best: 0.7167109 (915)	total: 18.9s	remaining: 1m 15s
1

In [14]:
def recommend_user_info(gender, age_group, comp_num, styl5, styl6, styl7, styl8):

    user_features = pd.DataFrame({
        'GENDER': [gender],
        'AGE_GRP': [age_group],
        'TRAVEL_STYL_5': [styl5],
        'TRAVEL_STYL_6': [styl6],
        'TRAVEL_STYL_7': [styl7],
        'TRAVEL_STYL_8': [styl8],
        'TRAVEL_COMPANIONS_NUM': [comp_num]
    })

    pred_stfn = {}

    for place in places:
        user_features_place = user_features.copy()  
        user_features_place['FCLTY_NM'] = place 

        user_pool_place = Pool(user_features_place, cat_features=cat_features + ['FCLTY_NM'])
        
        stfn = cb_reg.predict(user_pool_place)[0]
        stfn=round(stfn,3)
        pred_stfn[place] = stfn

    sorted_recommendations = sorted(pred_stfn.items(), key=lambda x: x[1], reverse=True)
    result_df = pd.DataFrame(sorted_recommendations, columns=['FCLTY_NM', 'rate_pred'])
    
    return result_df

In [15]:
def recommend_category(selected_category, ct_sim, main_df):
    # 선택된 카테고리의 유사도 순으로 정렬
    sim_scores = ct_sim[selected_category].sort_values(ascending=False)
    recommended_places = []  # 추천할 장소 리스트
    category_similarity = {}  # 추천 장소와 유사도 저장할 딕셔너리

    # 유사도가 높은 카테고리부터 장소 추천
    for category in sim_scores.index:  
        facilities = main_df[main_df['CTGRY_TWO_NM'] == category]['FCLTY_NM']

        for fclty in facilities:
            if fclty not in recommended_places:  # 중복된 장소 제외
                recommended_places.append(fclty)
                # 선택된 카테고리와 유사 카테고리의 유사도 저장
                similarity = ct_sim[selected_category][category]
                similarity=round(similarity,3)
                if similarity==1.000:
                    similarity=0.700
                category_similarity[fclty] = similarity
                
    # 추천 장소와 유사도를 데이터프레임으로 변환
    recommendations_df = pd.DataFrame(list(category_similarity.items()), columns=['FCLTY_NM', 'similarity'])
    return recommendations_df

In [17]:
def combine_recommend(gender, age_group, comp_num, styl5, styl6, styl7, styl8, selected_category, ct_sim, main_df):
    user_info = recommend_user_info(gender, age_group, comp_num, styl5, styl6, styl7, styl8)
    category = recommend_category(selected_category, ct_sim, main_df)
    # 유사도에 맞추어 평점 정규화 (1~5) -> (0~1)
    user_info['rate_pred_normalized'] = (user_info['rate_pred'] - 1) / (5 - 1)

    combined_df = pd.merge(user_info, category, on='FCLTY_NM', how='inner')
    combined_df['final_score'] = combined_df['rate_pred_normalized'] * 0.8 + combined_df['similarity'] * 0.2
    combined_df = combined_df.sort_values(by='final_score', ascending=False).reset_index(drop=True)

    return combined_df

In [34]:
def final_recommend(gender, age_group, comp_num, styl5, styl6, styl7, styl8, selected_category, ct_sim, main_df,
                            filter_dspsn_prkplce_at=False, filter_dspsn_toilet_at=False, filter_wchair_hold_at=False,
                            filter_guid_dog_acp_posbl_at=False, filter_brll_guid_at=False, filter_klang_vic_guid_at=False):
    
    barrier_free= [
        'DSPSN_PRKPLCE_AT',      # 장애인 주차장 여부
        'DSPSN_TOILET_AT',       # 장애인 화장실 여부
        'WCHAIR_HOLD_AT',        # 휠체어 보유 여부
        'GUID_DOG_ACP_POSBL_AT', # 안내견 출입 가능 여부
        'BRLL_GUID_AT',          # 점자 안내 여부
        'KLANG_VIC_GUID_AT'      # 한국어 음성 안내 여부
    ]

    recommendations = combine_recommend(gender, age_group, comp_num, styl5, styl6, styl7, styl8, selected_category, ct_sim, main_df)
    # final_recommendations와 main_df를 병합하여 배리어 프리 정보 추가
    combined_df = pd.merge(recommendations, main_df[['FCLTY_NM'] + barrier_free], on='FCLTY_NM', how='left')

    # 조건에 맞춰 필터링 적용 (True인 것만 필터링)
    if filter_brll_guid_at:
        idx = combined_df[combined_df['BRLL_GUID_AT'] == 0].index
        combined_df = combined_df.drop(idx)
        combined_df.reset_index(drop=True, inplace=True)

    if filter_guid_dog_acp_posbl_at:
        idx = combined_df[combined_df['GUID_DOG_ACP_POSBL_AT'] == 0].index
        combined_df = combined_df.drop(idx)
        combined_df.reset_index(drop=True, inplace=True)

    if filter_dspsn_toilet_at:
        idx = combined_df[combined_df['DSPSN_TOILET_AT'] == 0].index
        combined_df = combined_df.drop(idx)
        combined_df.reset_index(drop=True, inplace=True)

    if filter_wchair_hold_at:
        idx = combined_df[combined_df['WCHAIR_HOLD_AT'] == 0].index
        combined_df = combined_df.drop(idx)
        combined_df.reset_index(drop=True, inplace=True)

    if filter_dspsn_prkplce_at:
        idx = combined_df[combined_df['DSPSN_PRKPLCE_AT'] == 0].index
        combined_df = combined_df.drop(idx)
        combined_df.reset_index(drop=True, inplace=True)

    if filter_klang_vic_guid_at:
        idx = combined_df[combined_df['KLANG_VIC_GUID_AT'] == 0].index
        combined_df = combined_df.drop(idx)
        combined_df.reset_index(drop=True, inplace=True)

    # 중복된 장소 이름을 제거하고, 상위 10개 추천 장소 선택
    filtered_recommendations = combined_df.drop_duplicates(subset='FCLTY_NM')['FCLTY_NM'].head(10).reset_index(drop=True)
    place_list=filtered_recommendations.tolist()
    return place_list

In [35]:
final_recommend(0, 20, 3, 7, 4, 5, 6, '관광지', ct_sim, main_df,
                            filter_dspsn_prkplce_at=True, filter_dspsn_toilet_at=True, filter_wchair_hold_at=False,
                            filter_guid_dog_acp_posbl_at=False, filter_brll_guid_at=False, filter_klang_vic_guid_at=False)

['김녕해수욕장',
 '에코랜드테마파크',
 '카멜리아힐',
 '섭지코지',
 '절물자연휴양림',
 '9.81파크',
 '훈데르트바서파크',
 '선녀와나무꾼테마공원',
 '서프라이즈테마파크',
 '더마파크']

In [36]:
final_recommend(0, 50, 1, 3, 2, 6, 2, '전시/기념관', ct_sim, main_df,
                            filter_dspsn_prkplce_at=False, filter_dspsn_toilet_at=False, filter_wchair_hold_at=False,
                            filter_guid_dog_acp_posbl_at=True, filter_brll_guid_at=True, filter_klang_vic_guid_at=False)

['김만덕기념관',
 '그리스신화박물관',
 '국립제주박물관',
 '너븐숭이4.3기념관',
 '제주항공우주박물관',
 '제주도립미술관',
 '제주현대미술관',
 'ICC제주국제컨벤션센터',
 '남원큰엉해변',
 '선녀와나무꾼테마공원']