In [1]:
import pandas as pd
import re
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

pd.set_option('display.max_row', 10)
pd.set_option('display.max_columns', 10)

In [3]:
product_info = pd.read_excel("../../data/glowpick_product_info.xlsx")
transaction = pd.read_excel("../../data/glowpick_transaction.xlsx")
user_info = pd.read_csv("../../data/glowpick_user_info_clustered.csv")

del transaction["Unnamed: 0"]
del product_info["Unnamed: 0"]

## 1. 데이터 변형

### 1-1. 제품 효능 토큰 워드 임베딩

In [5]:
product_info["function"] = [re.sub("#","",str(f)) for f in product_info.function]
product_info["function"] = [re.sub(","," ",f) for f in product_info.function]

In [6]:
tfidfv = TfidfVectorizer().fit(product_info.function.tolist())

In [7]:
dtm_array = tfidfv.transform(product_info.function.tolist()).toarray()

dtm = pd.DataFrame(dtm_array, index =  product_info.product_name.tolist(), columns = [f[0] for f in sorted(tfidfv.vocabulary_.items())])

In [16]:
dtm

Unnamed: 0,12,13,1mm,2색이상,2주,...,휴대용,휴대용브러쉬,흘러내림방지,흡수력,히알루론산
글로발 체리 미러클 휘니쉬,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0
스마트 치실,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0
로즈 하이드라 글로리 앰플,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0
자음생크림 소프트,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0
마이크로 에센스,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
퓨어 래디언트 프로텍션 아쿠아 글로우 쿠션 파운데이션 [SPF23/PA++],0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0
파워 10 포뮬라 엘아이 젤리 패드 감초줄렌,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0
파워 10 포뮬라 엘아이 크림 감초줄렌,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0
태그 원더랜드 아이 글리터,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0


### 1-2. 제품 - 제품 유사도 행렬 계산

In [8]:
item_based_collabor_function = cosine_similarity(dtm)

item_based_collabor_function = pd.DataFrame(data = item_based_collabor_function, index = dtm.index.tolist(), columns = dtm.index.tolist())

## 중복 컬럼 제거
item_based_collabor_function = item_based_collabor_function.loc[:,~item_based_collabor_function.T.duplicated()]

## 중복 행 제거
item_based_collabor_function = item_based_collabor_function.drop_duplicates()

In [9]:
item_based_collabor_function

Unnamed: 0,글로발 체리 미러클 휘니쉬,스마트 치실,로즈 하이드라 글로리 앰플,자음생크림 소프트,마이크로 에센스,...,모링가 샤워젤,퓨어 래디언트 프로텍션 아쿠아 글로우 쿠션 파운데이션 [SPF23/PA++],파워 10 포뮬라 엘아이 젤리 패드 감초줄렌,태그 원더랜드 아이 글리터,나이트 시리즈 아이섀도우
글로발 체리 미러클 휘니쉬,1.0,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.0
스마트 치실,0.0,1.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.072076,0.0,0.0
로즈 하이드라 글로리 앰플,0.0,0.000000,1.000000,0.087746,0.241988,...,0.182887,0.000000,0.522514,0.0,0.0
자음생크림 소프트,0.0,0.000000,0.087746,1.000000,0.410287,...,0.097527,0.093476,0.000000,0.0,0.0
마이크로 에센스,0.0,0.000000,0.241988,0.410287,1.000000,...,0.268964,0.000000,0.303034,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
모링가 샤워젤,0.0,0.000000,0.182887,0.097527,0.268964,...,1.000000,0.000000,0.093141,0.0,0.0
퓨어 래디언트 프로텍션 아쿠아 글로우 쿠션 파운데이션 [SPF23/PA++],0.0,0.000000,0.000000,0.093476,0.000000,...,0.000000,1.000000,0.000000,0.0,0.0
파워 10 포뮬라 엘아이 젤리 패드 감초줄렌,0.0,0.072076,0.522514,0.000000,0.303034,...,0.093141,0.000000,1.000000,0.0,0.0
태그 원더랜드 아이 글리터,0.0,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,1.0,0.0


## 2. 효능 기반 추천 알고리즘 데모

### 2-1. 제품 추천 함수 정의

In [10]:
transaction = transaction.merge(user_info[["nickname", "cluster"]], on = "nickname")

In [12]:
def get_latest_product_df(cluster):
    
    dataframe = transaction.loc[(transaction.cluster == cluster) &(transaction.cluster > 3)]
    
    latest_purchase = dataframe.sort_values(by = "review_date", ascending = False).groupby("nickname").head(1)
    
    return latest_purchase


def get_recommendation_by_nickname(latest_purchase,user_name):
    
    latest_purchase = transaction.sort_values(by = "review_date", ascending = False).groupby("nickname").head(1)
    
    latest_purchase = latest_purchase.merge(product_info[["product_name", "function"]], on = "product_name")

    product = latest_purchase.loc[latest_purchase.nickname == user_name, "product_name"].tolist()[0]

    purchase_date = latest_purchase.loc[latest_purchase.nickname == user_name, "review_date"].tolist()[0]

    recommendation_df = item_based_collabor_function.loc[item_based_collabor_function.index == product].T
    recommended_products = recommendation_df.sort_values(by = product, ascending = False).iloc[1:7].index.tolist()


    print("""
    마지막 긍정 리뷰 작성 상품 : %s

    마지막 리뷰 일자 : %s

    추천 유사 상품 Top 5: 
    
    1. %s
    
    2. %s
    
    3. %s
    
    4. %s
    
    5. %s

    """%(product, purchase_date,
         recommended_products[0],
         recommended_products[1],
         recommended_products[2],
         recommended_products [3],
         recommended_products[4]))
    

### 2-2. 함수 적용

In [13]:
users_group_1 = get_latest_product_df(2)
users_group_2 = get_latest_product_df(5)

In [14]:
## 1등급 리뷰어 기린91 제품 추천 예시

get_recommendation_by_nickname(users_group_1, "기린91")


    마지막 긍정 리뷰 작성 상품 : 글로발 체리 미러클 휘니쉬

    마지막 리뷰 일자 : 2022.05.14

    추천 유사 상품 Top 5: 
    
    1. 그린 테라피 샴푸
    
    2. (빅스톤 컬렉션) 매직프레스 슈퍼 슬림핏
    
    3. 블레미쉬 트리트먼트
    
    4. 부스티모 부스팅 샴푸
    
    5. 수드 앤 릴렉스 모이스처라이징 바디 워시

    


In [15]:
## 2등급 리뷰어 maeng89 제품 추천 예시

get_recommendation_by_nickname(users_group_2, "maeng89")


    마지막 긍정 리뷰 작성 상품 : 화이트닝 드레스 미백 톤업크림

    마지막 리뷰 일자 : 2022.05.14

    추천 유사 상품 Top 5: 
    
    1. 어린쑥 수분진정 크림
    
    2. 리얼 하트리프 수딩 크림
    
    3. 아쿠아 피토플렉스 크림 인 토너 미스트
    
    4. 바디 데오 스틱 [무향]
    
    5. 비트 리프레쉬 패드

    
