In [1]:
import numpy as np
import pandas as pd
import GoEat
from GoEatWebCrawler import recipe_finder
from collections import Counter
from konlpy.tag import Hannanum
from konlpy.tag import Kkma
from konlpy.utils import concordance, pprint
import scipy
import sklearn
import konlpy
import re

In [84]:
df_user_info = pd.read_csv('data\\info\\df_user_info.csv')
df_user_detail =pd.read_csv('data\\info\\df_user_detail.csv')
food_df = pd.read_csv('data\\info\\food_df.csv')
food_detail_df = pd.read_csv('data\\info\\food_detail_df.csv')
interactions_df = pd.read_csv('data\\info\\interactions_df.csv')

In [10]:
interactions_full_df = interactions_df \
                    .groupby(['userId', 'foodId'])['eventStrength'].mean() \
                    .apply(GoEat.smooth_user_preference).reset_index()

In [11]:
interactions_train_df, interactions_test_df = \
GoEat.interactions_train_test_split(interactions_full_df, test_size=0.20)

# interactions on Train set: 8280
# interactions on Test set: 2070


In [12]:
interactions_full_indexed_df, interactions_train_indexed_df, interactions_test_indexed_df = \
GoEat.indexed_df(interactions_full_df,interactions_train_df,interactions_test_df)

In [13]:
cf_preds_df = GoEat.users_items_svd(interactions_df, nfactors = 20)

In [14]:
cf_model = GoEat.CFRecommender(cf_preds_df, food_df)


In [11]:
GoEat.item_recommenation(cf_model,food_df)

userId를 입력해주세요 :  1


Unnamed: 0,recStrength,foodId,foodName
1,5.649437,15,게장
2,5.580856,12,콩국수
3,5.089097,10,잔치국수
4,5.085647,8,제육볶음
5,4.880808,18,돈부리
6,4.803932,0,볶음밥
7,4.742806,38,오므라이스
8,4.705226,13,비빔밥
9,4.577618,2,된장찌개
10,4.489017,5,불고기


In [19]:
food_detail_df = food_df.copy()

In [85]:
def crawl(food_detail_df):
    print('[Recipe Web Crawling Start]')
    for index in range(len(food_detail_df)):

        food = food_detail_df.foodName[index]
        recipe = recipe_finder(food,2)
        food_detail_df.loc[index ,'foodRecipe'] = str(recipe)
        
        if (index+1) % 5 == 0:
            print(round((index+1)/len(food_df)*100,2),'percent Done')
   
    print('Complete!!')
    print('')
    print('[noun extract start]')
    
    food_detail_df['foodRecipeNoun'] = ''
    for i in range(len(food_detail_df)):

        doc = food_detail_df.foodRecipe[i]
        noun = Hannanum().nouns(doc)
        cnt = Counter(noun)
        only_word = []
        for key, value in cnt.items():
            if int(value) < 3:
                noun.remove(key)
        for word in noun:
            m = re.match('^\D*\D$',word)
            if m:
                only_word.append(m.group())
        
        food_detail_df.loc[i,'foodRecipeNoun'] = str(only_word)
        if (i % 5) == 0:
            print(round(i/len(food_detail_df)*100,2), ' perent done')
    print('Complete')

In [28]:
with open('data\\stopwords-ko.txt', 'r',encoding='utf-8') as f:
    lines = f.readlines()
    stopwords_ko = list(map(lambda x: x.rstrip('\n'),lines))

In [29]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [63]:
tfidf_matrix, tfidf_feature_names = GoEat.tfidf_vectorizer(food_detail_df,'foodRecipeNoun',stopwords_ko)
pd.DataFrame(tfidf_matrix.todense(),columns = tfidf_feature_names).head()

Unnamed: 0,n갓김치,n갓김치 재료,n겨자장,n그런,n그런 게장무침,n남,n남 요리,n냄비,n냄비 닭한마리,n다시마,...,후춧가루 통조림,훈제오리,훈제오리 식용유,훈제오리고기,훈제오리고기 또띠아,훈제오리고기 부추,훈제오리고기 양파,훈제오리고기 칠리소스,훠궈,훠궈 마라탕
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052184,0.052184,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [64]:
item_ids = food_detail_df['foodId'].tolist()

In [45]:
def get_item_profile(item_id):
    idx = item_ids.index(item_id)
    item_profile = tfidf_matrix[idx:idx+1]
    return item_profile

def get_item_profiles(ids):
    item_profiles_list = [get_item_profile(x) for x in ids]
    item_profiles = scipy.sparse.vstack(item_profiles_list)
    return item_profiles

def build_users_profile(person_id, interactions_indexed_df):
    interactions_person_df = interactions_indexed_df.loc[person_id]
    user_item_profiles = get_item_profiles(interactions_person_df['foodId'])
    
    user_item_strengths = np.array(interactions_person_df['eventStrength']).reshape(-1,1)
    #Weighted average of item profiles by the interactions strength
    user_item_strengths_weighted_avg = np.sum(user_item_profiles.multiply(user_item_strengths), axis=0) / np.sum(user_item_strengths)
    user_profile_norm = sklearn.preprocessing.normalize(user_item_strengths_weighted_avg)
    return user_profile_norm

def build_users_profiles(): 
    interactions_indexed_df = interactions_full_df[interactions_full_df['foodId'] \
                                                   .isin(food_df['foodId'])].set_index('userId')
    user_profiles = {}
    for person_id in interactions_indexed_df.index.unique():
        user_profiles[person_id] = build_users_profile(person_id, interactions_indexed_df)
    return user_profiles

In [72]:
user_profiles = build_users_profiles()
len(user_profiles)

218

In [74]:
myuserid = int(input('perosnId를 입력해주세요 : '))
myprofile = user_profiles[myuserid]
print(myprofile.shape)
df_profile = pd.DataFrame(sorted(zip(tfidf_feature_names, 
                        myprofile.flatten().tolist()), key=lambda x: -x[1]),
             columns=['token', 'relevance'])
df_profile.head(10)

perosnId를 입력해주세요 :  5


(1, 4351)


Unnamed: 0,token,relevance
0,삼겹살,0.178694
1,소스,0.150896
2,소금,0.146369
3,계란,0.139798
4,사리곰탕,0.136383
5,불고기,0.131245
6,준비,0.127106
7,돼지고기,0.125274
8,야채,0.118912
9,대파,0.118478


In [71]:
content_based_recommender_model = GoEat.ContentBasedRecommender(food_df,item_ids)

In [73]:
class ContentBasedRecommender:
    
    MODEL_NAME = 'Content-Based'
    
    def __init__(self, items_df=None):
        self.item_ids = item_ids
        self.items_df = items_df
        
    def get_model_name(self):
        return self.MODEL_NAME
        
    def _get_similar_items_to_user_profile(self, person_id, topn=1000):
        #Computes the cosine similarity between the user profile and all item profiles
        cosine_similarities = cosine_similarity(user_profiles[person_id], tfidf_matrix)
        #Gets the top similar items
        similar_indices = cosine_similarities.argsort().flatten()[-topn:]
        #Sort the similar items by similarity
        similar_items = sorted([(item_ids[i], cosine_similarities[0,i]) for i in similar_indices], key=lambda x: -x[1])
        return similar_items
        
    def recommend_items(self, user_id, items_to_ignore=[], topn=10, verbose=False):
        similar_items = self._get_similar_items_to_user_profile(user_id)
        #Ignores items the user has already interacted
        similar_items_filtered = list(filter(lambda x: x[0] not in items_to_ignore, similar_items))
        
        recommendations_df = pd.DataFrame(similar_items_filtered, columns=['foodId', 'recStrength']) \
                                    .head(topn)

        if verbose:
            if self.items_df is None:
                raise Exception('"items_df" is required in verbose mode')

            recommendations_df = recommendations_df.merge(self.items_df, how = 'left', 
                                                          left_on = 'foodId', 
                                                          right_on = 'foodId')[['recStrength', 'foodId', 'foodName']]


        return recommendations_df
    
content_based_recommender_model = ContentBasedRecommender(food_df)

In [50]:
from sklearn.metrics.pairwise import cosine_similarity

In [75]:
personNum = int(input('userId를 입력해주세요 : '))

cb_model = content_based_recommender_model.recommend_items(personNum,verbose=True)

for food in range(len(food_df.foodId)):
    cb_model.loc[cb_model.foodId == food_df.foodId[food],'foodName'] = food_df.foodName[food]

cb_model.index = range(1,len(cb_model)+1)

cb_model

userId를 입력해주세요 :  5


Unnamed: 0,recStrength,foodId,foodName
1,0.292358,21,라멘
2,0.292358,37,라면
3,0.273069,6,삼겹살
4,0.265583,29,짜장면
5,0.244126,5,불고기
6,0.243941,44,타코
7,0.242486,24,스테이크
8,0.235259,40,브리또
9,0.234404,27,함박스테이크
10,0.233688,8,제육볶음


In [83]:
df_user_info.to_csv('data\\info\\df_user_info.csv',index=False)
df_user_detail.to_csv('data\\info\\df_user_detail.csv',index=False)
food_df.to_csv('data\\info\\food_df.csv',index=False)
food_detail_df.to_csv('data\\info\\food_detail_df.csv',index=False)
interactions_df.to_csv('data\\info\\interactions_df.csv',index=False)


In [213]:
GoEat.item_recommenation(cf_model,food_df)

userId를 입력해주세요 :  165


Unnamed: 0,recStrength,foodId,foodName
1,5.657943,15,게장
2,4.943323,40,브리또
3,4.943323,44,타코
4,4.781042,25,리조또
5,4.726357,10,잔치국수
6,4.602865,4,닭도리탕
7,4.537646,9,족발
8,4.436405,49,막창
9,4.394851,56,소고기
10,4.319438,31,샤브샤브


#### 