In [1]:
import numpy as np
import scipy
import pandas as pd
import math
import random
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.utils import shuffle
from scipy.sparse.linalg import svds
import matplotlib.pyplot as plt

Import Data
---

In [2]:
#import food rating data
df_food = pd.read_csv('data\\food_rating.csv')
df_food.head()

Unnamed: 0,타임스탬프,이름 ex) 홍길동,성별,만 나이 ex) 24,찜 (갈비찜/찜닭 등),갈비탕/설렁탕,곱창/막창,볶음밥,김치찌개,된장찌개,...,오므라이스,컵밥,브리또&타코,햄버거,샌드위치,치킨,혼밥은 주로 어떻게 하나요?,혼밥할 때 주로 먹는 음식 메뉴는 무엇인가요? 한 가지만 적어주세요.,(선택사항) 데이트를 하는 상황에서는 주로 무슨 음식을 즐겨 먹나요? 한 가지만 적어주세요.,친한 친구들 여럿이서 만나는 자리에서는 무슨 음식을 즐겨 먹나요? 한 가지만 적어 주세요.
0,10-6-2018 14:32:49,이영건,남자,24,2,4,4,3,2,4,...,3,2,4,5,5,4,집에서 음식을 해 먹는다.,볶음밥,파스타,
1,10-6-2018 15:18:04,성창민,남자,20,5,5,0,5,5,5,...,5,4,5,1,1,3,밖에서 사 먹는다.,덥밥,데이트안함,고기꾸어먹음
2,10-6-2018 15:19:56,윤혜진,여자,20,4,4,3,4,3,5,...,4,3,2,4,3,4,밖에서 사 먹는다.,알밥,파스타,떡볶이
3,10-6-2018 15:21:10,한상욱,남자,21,4,4,5,4,4,2,...,4,2,3,3,4,4,밖에서 사 먹는다.,제육덮밥,스테이크,막창구이
4,10-6-2018 15:21:55,황준원,남자,20,3,5,3,5,4,4,...,5,4,5,5,5,5,배달을 시킨다.,햄버거,양식(파스타),양 많은 것(닭갈비)


In [3]:
df_food_reset_index = df_food.reset_index()
df_food_reset_index = df_food_reset_index.rename(columns = {'index':'userid'})
df_food_reset_index.head()

Unnamed: 0,userid,타임스탬프,이름 ex) 홍길동,성별,만 나이 ex) 24,찜 (갈비찜/찜닭 등),갈비탕/설렁탕,곱창/막창,볶음밥,김치찌개,...,오므라이스,컵밥,브리또&타코,햄버거,샌드위치,치킨,혼밥은 주로 어떻게 하나요?,혼밥할 때 주로 먹는 음식 메뉴는 무엇인가요? 한 가지만 적어주세요.,(선택사항) 데이트를 하는 상황에서는 주로 무슨 음식을 즐겨 먹나요? 한 가지만 적어주세요.,친한 친구들 여럿이서 만나는 자리에서는 무슨 음식을 즐겨 먹나요? 한 가지만 적어 주세요.
0,0,10-6-2018 14:32:49,이영건,남자,24,2,4,4,3,2,...,3,2,4,5,5,4,집에서 음식을 해 먹는다.,볶음밥,파스타,
1,1,10-6-2018 15:18:04,성창민,남자,20,5,5,0,5,5,...,5,4,5,1,1,3,밖에서 사 먹는다.,덥밥,데이트안함,고기꾸어먹음
2,2,10-6-2018 15:19:56,윤혜진,여자,20,4,4,3,4,3,...,4,3,2,4,3,4,밖에서 사 먹는다.,알밥,파스타,떡볶이
3,3,10-6-2018 15:21:10,한상욱,남자,21,4,4,5,4,4,...,4,2,3,3,4,4,밖에서 사 먹는다.,제육덮밥,스테이크,막창구이
4,4,10-6-2018 15:21:55,황준원,남자,20,3,5,3,5,4,...,5,4,5,5,5,5,배달을 시킨다.,햄버거,양식(파스타),양 많은 것(닭갈비)


Divide Data into User information, and Rating
--

In [5]:
user_info_index = [0,1,2,3,4]  + list(range(54,58))
food_drop_index = list(range(0,5)) + list(range(54,58))

In [6]:
df_user_info = df_food_reset_index.iloc[:,user_info_index]
df_food_rating = df_food_reset_index.drop(axis =1,columns=df_food_reset_index.columns[food_drop_index])
df_food_rating.head()

Unnamed: 0,찜 (갈비찜/찜닭 등),갈비탕/설렁탕,곱창/막창,볶음밥,김치찌개,된장찌개,닭갈비,닭도리탕,불고기,냉면(물/비빔),...,카레/커리,김밥,분식(떡볶이/튀김/순대),라면,오므라이스,컵밥,브리또&타코,햄버거,샌드위치,치킨
0,2,4,4,3,2,4,3,1,3,4,...,3,3,3,4,3,2,4,5,5,4
1,5,5,0,5,5,5,2,2,5,2,...,3,3,1,1,5,4,5,1,1,3
2,4,4,3,4,3,5,3,2,4,3,...,3,2,4,4,4,3,2,4,3,4
3,4,4,5,4,4,2,4,5,4,4,...,3,3,5,3,4,2,3,3,4,4
4,3,5,3,5,4,4,5,3,5,5,...,5,5,4,5,5,4,5,5,5,5


In [7]:
df_user_info.head()

Unnamed: 0,userid,타임스탬프,이름 ex) 홍길동,성별,만 나이 ex) 24,혼밥은 주로 어떻게 하나요?,혼밥할 때 주로 먹는 음식 메뉴는 무엇인가요? 한 가지만 적어주세요.,(선택사항) 데이트를 하는 상황에서는 주로 무슨 음식을 즐겨 먹나요? 한 가지만 적어주세요.,친한 친구들 여럿이서 만나는 자리에서는 무슨 음식을 즐겨 먹나요? 한 가지만 적어 주세요.
0,0,10-6-2018 14:32:49,이영건,남자,24,집에서 음식을 해 먹는다.,볶음밥,파스타,
1,1,10-6-2018 15:18:04,성창민,남자,20,밖에서 사 먹는다.,덥밥,데이트안함,고기꾸어먹음
2,2,10-6-2018 15:19:56,윤혜진,여자,20,밖에서 사 먹는다.,알밥,파스타,떡볶이
3,3,10-6-2018 15:21:10,한상욱,남자,21,밖에서 사 먹는다.,제육덮밥,스테이크,막창구이
4,4,10-6-2018 15:21:55,황준원,남자,20,배달을 시킨다.,햄버거,양식(파스타),양 많은 것(닭갈비)


In [8]:
df_food_rating_stack = pd.DataFrame(df_food_rating.stack()).reset_index() 
df_food_rating_stack = shuffle(df_food_rating_stack)
df_food_rating_stack.columns =['personId','contentId','eventStrength']

추천 시스템을 위한 변수 새롭게 설정
--

In [9]:
#articles_df 
#ContentId : 음식 고유번호 
#FoodName : 고유번호에 따른 음식 이름

articles_df = pd.DataFrame(df_food_rating_stack.contentId.unique()).reset_index()
articles_df.columns = ['contentId','foodName'] 

In [10]:
#interactions_full_df 
#personId : 개인 고유번호
#ContentId : 음식 고유번호 
#eventStrength : 음식에 대한 평가

interactions_full_with_zeros_df = df_food_rating_stack.copy().reset_index(drop=True)

for food in range(len(articles_df.foodName)):
    interactions_full_with_zeros_df.loc[interactions_full_with_zeros_df.contentId == articles_df.foodName[food],'contentId'] = articles_df.contentId[food]

In [11]:
interactions_full_df  = interactions_full_with_zeros_df[interactions_full_with_zeros_df['eventStrength'] != 0]

In [12]:
interactions_train_df, interactions_test_df = train_test_split(interactions_full_df,
                                   stratify=interactions_full_df['personId'], 
                                   test_size=0.20,
                                   random_state=42)

print('# interactions on Train set: %d' % len(interactions_train_df))
print('# interactions on Test set: %d' % len(interactions_test_df))

# interactions on Train set: 6154
# interactions on Test set: 1539


In [18]:
interactions_train_df.head()

Unnamed: 0,personId,contentId,eventStrength
5976,107,4,4
7724,128,30,3
3382,15,0,5
4254,119,46,5
939,26,33,3


Popularity Based Recommneder
==

In [13]:
df_food_grouped = interactions_full_df.groupby(['contentId']).agg({'eventStrength': 'sum'}).reset_index()
grouped_rating = interactions_full_df.groupby(['contentId']).agg({'personId': 'count'}).reset_index()
df_food_grouped['eventStrength']  = df_food_grouped['eventStrength'].div(grouped_rating['personId'])
df_food_ranking = df_food_grouped.sort_values(['eventStrength', 'contentId'], ascending = [0,1])

df_food_ranking['foodName'] = ''

for food in range(len(articles_df.contentId)):
    df_food_ranking.loc[df_food_ranking.contentId == articles_df.contentId[food],'foodName'] = articles_df.foodName[food]

df_food_ranking.index = range(1,len(df_food_ranking)+1)

Most Favored 10 Foods
--

In [15]:
df_food_ranking.head(10)

Unnamed: 0,contentId,eventStrength,foodName
1,35,4.388535,초밥
2,32,4.379747,삼겹살(구이)
3,21,4.360759,스테이크
4,29,4.295597,치킨
5,37,4.194969,파스타
6,12,4.189873,수육/보쌈
7,9,4.167742,회(사시미)
8,23,4.037736,피자
9,31,4.006289,분식(떡볶이/튀김/순대)
10,13,4.0,곱창/막창


Least Favored 10 Foods
--

In [16]:
df_food_ranking.tail(10)

Unnamed: 0,contentId,eventStrength,foodName
40,17,3.512658,베트남 쌀국수
41,19,3.496855,삼계탕
42,26,3.459119,김밥
43,41,3.383648,샌드위치
44,5,3.360759,비빔밥
45,30,3.327044,수제비
46,38,3.322581,월남쌈
47,44,3.312102,함박스테이크
48,36,3.09396,컵밥
49,8,2.743243,콩국수


Food Recommender
==

Collaborative Filtering Model
==

Model Evaluation
--

In [17]:
users_items_pivot_matrix_df = interactions_train_df.pivot(index='personId', 
                                                          columns='contentId', 
                                                          values='eventStrength').fillna(0)

users_items_pivot_matrix_df.head(10)

contentId,0,1,2,3,4,5,6,7,8,9,...,39,40,41,42,43,44,45,46,47,48
personId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,3.0,3.0,0.0,4.0,5.0,3.0,4.0,4.0,0.0,0.0,...,1.0,4.0,5.0,4.0,0.0,3.0,5.0,2.0,0.0,3.0
1,0.0,3.0,5.0,2.0,1.0,3.0,5.0,1.0,0.0,0.0,...,2.0,5.0,1.0,2.0,0.0,1.0,0.0,2.0,5.0,5.0
2,3.0,3.0,4.0,2.0,4.0,4.0,3.0,4.0,4.0,4.0,...,2.0,0.0,3.0,3.0,3.0,0.0,2.0,4.0,3.0,0.0
3,5.0,3.0,4.0,0.0,3.0,0.0,4.0,3.0,4.0,0.0,...,5.0,2.0,4.0,4.0,3.0,0.0,2.0,3.0,4.0,0.0
4,4.0,5.0,5.0,4.0,5.0,4.0,5.0,0.0,2.0,0.0,...,3.0,4.0,5.0,5.0,0.0,5.0,0.0,0.0,4.0,0.0
5,4.0,0.0,5.0,0.0,5.0,3.0,5.0,5.0,2.0,5.0,...,2.0,5.0,0.0,4.0,0.0,3.0,5.0,5.0,4.0,5.0
6,3.0,0.0,3.0,4.0,0.0,5.0,3.0,5.0,0.0,3.0,...,5.0,4.0,2.0,5.0,5.0,3.0,0.0,0.0,5.0,4.0
7,4.0,4.0,0.0,4.0,4.0,4.0,4.0,4.0,0.0,4.0,...,4.0,4.0,3.0,0.0,5.0,4.0,4.0,5.0,0.0,5.0
8,5.0,0.0,4.0,4.0,2.0,3.0,3.0,0.0,3.0,2.0,...,4.0,5.0,4.0,3.0,2.0,1.0,4.0,3.0,0.0,5.0
9,0.0,3.0,2.0,0.0,4.0,2.0,3.0,0.0,2.0,0.0,...,4.0,4.0,3.0,0.0,0.0,3.0,3.0,0.0,4.0,0.0


In [19]:
users_items_pivot_matrix = users_items_pivot_matrix_df.as_matrix()
users_items_pivot_matrix[:10]

  """Entry point for launching an IPython kernel.


array([[3., 3., 0., 4., 5., 3., 4., 4., 0., 0., 3., 3., 5., 4., 2., 0.,
        3., 2., 3., 3., 4., 5., 2., 5., 5., 4., 3., 2., 4., 4., 1., 3.,
        4., 0., 2., 5., 0., 0., 0., 1., 4., 5., 4., 0., 3., 5., 2., 0.,
        3.],
       [0., 3., 5., 2., 1., 3., 5., 1., 0., 0., 5., 5., 0., 0., 2., 0.,
        5., 1., 2., 0., 0., 2., 5., 2., 4., 5., 3., 0., 5., 3., 2., 1.,
        5., 1., 5., 0., 4., 0., 2., 2., 5., 1., 2., 0., 1., 0., 2., 5.,
        5.],
       [3., 3., 4., 2., 4., 4., 3., 4., 4., 4., 4., 5., 4., 3., 3., 3.,
        4., 2., 0., 4., 2., 5., 0., 2., 4., 4., 2., 0., 4., 4., 4., 0.,
        4., 0., 0., 0., 3., 4., 5., 2., 0., 3., 3., 3., 0., 2., 4., 3.,
        0.],
       [5., 3., 4., 0., 3., 0., 4., 3., 4., 0., 4., 4., 4., 0., 3., 0.,
        4., 1., 4., 0., 0., 5., 0., 4., 5., 4., 3., 5., 3., 4., 4., 5.,
        5., 4., 4., 4., 2., 3., 3., 5., 2., 4., 4., 3., 0., 2., 3., 4.,
        0.],
       [4., 5., 5., 4., 5., 4., 5., 0., 2., 0., 5., 5., 5., 3., 4., 4.,
        5., 

In [20]:
users_ids = list(users_items_pivot_matrix_df.index)
users_ids[:10]

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [21]:
#The number of factors to factor the user-item matrix.
NUMBER_OF_FACTORS_MF = 15
#Performs matrix factorization of the original user item matrix
U, sigma, Vt = svds(users_items_pivot_matrix, k = NUMBER_OF_FACTORS_MF)

In [22]:
users_items_pivot_matrix.shape

(159, 49)

In [23]:
U.shape

(159, 15)

In [24]:
Vt.shape

(15, 49)

In [25]:
sigma = np.diag(sigma)
sigma.shape

(15, 15)

In [26]:
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) 
all_user_predicted_ratings[:1]

array([[3.50772200e+00, 2.49575823e+00, 1.82770471e+00, 3.65104366e+00,
        3.65628856e+00, 1.98639801e+00, 1.64140041e+00, 3.37080953e+00,
        1.44029825e+00, 6.23923900e-01, 2.88080943e+00, 1.66114231e+00,
        5.13691012e+00, 3.32587713e+00, 3.00011003e+00, 1.49381159e+00,
        3.55731479e+00, 2.95058334e+00, 2.49606571e+00, 2.40368994e+00,
        3.06096176e+00, 3.79884514e+00, 3.27311965e+00, 5.21818774e+00,
        3.56089175e+00, 3.61315602e+00, 1.03137976e+00, 2.60990541e+00,
        3.11051197e+00, 4.28436315e+00, 6.52012225e-01, 4.44084086e+00,
        3.45509164e+00, 3.58678020e+00, 2.19919748e+00, 5.69379265e+00,
        5.33797962e-01, 8.15996519e-01, 7.64176915e-01, 2.51920195e+00,
        3.12868543e+00, 3.37919316e+00, 3.84132497e+00, 1.94680154e+00,
        3.02514165e+00, 3.25582488e+00, 2.04527328e+00, 4.93575804e-03,
        2.57867767e+00]])

In [148]:
#Converting the reconstructed matrix back to a Pandas dataframe
cf_preds_df = pd.DataFrame(all_user_predicted_ratings, columns = users_items_pivot_matrix_df.columns, index=users_ids).transpose()
cf_preds_df.head(10)

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,149,150,151,152,153,154,155,156,157,158
contentId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,3.134933,2.289185,2.540851,4.321943,3.67458,2.19477,3.741615,3.38077,3.508169,3.318815,...,3.723676,4.518698,1.803894,0.916887,4.384756,2.772024,3.505297,3.410371,4.388166,2.990535
1,2.177865,2.57738,2.040034,3.417702,2.264264,1.621826,2.82216,3.244848,2.802993,1.439252,...,0.995578,2.856867,2.567452,3.205781,3.26608,2.494313,2.704332,1.197655,0.678411,1.574097
2,3.216498,1.191397,4.624755,1.744922,5.331028,5.565033,3.595349,4.624524,1.343233,2.867855,...,1.756907,4.192471,3.374231,0.890645,4.673062,2.56551,3.422145,5.137333,3.913624,0.673601
3,3.794514,5.164858,2.288752,3.914988,2.384117,4.828152,3.560317,4.021629,2.546811,2.90611,...,2.876434,4.988642,3.382902,3.928258,1.322659,3.881605,4.596853,3.631161,2.422564,3.004501
4,0.829535,3.60908,3.768368,1.629138,3.406857,3.236636,1.384084,1.083,3.262224,3.89242,...,1.158629,0.73001,2.22453,3.964927,3.456134,4.446681,2.136296,1.897022,4.461153,4.360912
5,1.497681,0.825802,2.810158,0.957401,4.303608,5.265031,1.652824,3.842985,1.290783,3.282245,...,3.395573,4.38122,4.592708,2.668908,4.560114,2.234447,2.952539,2.803964,1.39839,3.706861
6,2.444424,0.681473,3.431133,3.689698,1.142012,1.421158,3.944433,1.927181,3.712344,2.029978,...,0.560729,2.085012,2.953014,3.446022,5.983078,3.288656,1.287714,0.937442,2.387572,2.253797
7,2.966792,1.611336,2.90036,4.026096,3.170081,0.879318,2.456123,1.445794,2.685456,1.953087,...,2.398349,1.985004,1.41945,2.11442,5.087053,2.741815,1.280473,2.662979,4.92286,1.244355
8,1.887334,1.999167,3.38963,3.763851,2.01003,3.086933,4.60592,2.584017,3.071324,3.105261,...,1.720387,5.233974,5.737408,3.432989,5.263437,2.746036,1.700089,1.40388,3.471089,3.279743
9,2.325719,3.844022,3.268061,4.434087,3.139309,1.881927,4.978865,3.566104,3.44301,2.48923,...,2.709248,3.616562,4.613854,2.489231,3.165795,3.29699,3.013065,2.721523,5.139883,4.295092


In [27]:
interactions_full_indexed_df = interactions_full_df.set_index('personId')
interactions_train_indexed_df = interactions_train_df.set_index('personId')
interactions_test_indexed_df = interactions_test_df.set_index('personId')

In [150]:
item_ids = articles_df['contentId'].tolist()

In [28]:
def get_items_interacted(person_id, interactions_df):
    # Get the user's data and merge in the movie information.
    interacted_items = interactions_df.loc[person_id]['contentId']
    return set(interacted_items if type(interacted_items) == pd.Series else [interacted_items])

In [29]:
#Top-N accuracy metrics consts
EVAL_RANDOM_SAMPLE_NON_INTERACTED_ITEMS = 0

class ModelEvaluator:


    def get_not_interacted_items_sample(self, person_id, sample_size, seed=42):
        interacted_items = get_items_interacted(person_id, interactions_full_indexed_df)
        all_items = set(articles_df['contentId'])
        non_interacted_items = all_items - interacted_items

        random.seed(seed)
        non_interacted_items_sample = random.sample(non_interacted_items, sample_size)
        return set(non_interacted_items_sample)

    def _verify_hit_top_n(self, item_id, recommended_items, topn):        
            try:
                index = next(i for i, c in enumerate(recommended_items) if c == item_id)
            except:
                index = -1
            hit = int(index in range(0, topn))
            return hit, index

    def evaluate_model_for_user(self, model, person_id):
        #Getting the items in test set
        interacted_values_testset = interactions_test_indexed_df.loc[person_id]
        if type(interacted_values_testset['contentId']) == pd.core.series.Series:
            person_interacted_items_testset = set(interacted_values_testset['contentId'])
        else:
            person_interacted_items_testset = set([int(interacted_values_testset['contentId'])])  
        interacted_items_count_testset = len(person_interacted_items_testset) 

        #Getting a ranked recommendation list from a model for a given user
        person_recs_df = model.recommend_items(person_id, 
                                               items_to_ignore=get_items_interacted(person_id, 
                                                                                    interactions_train_indexed_df), 
                                               topn=10000000000)

        hits_at_5_count = 0
        hits_at_10_count = 0
        #For each item the user has interacted in test set
        for item_id in person_interacted_items_testset:
            #Getting a random sample (100) items the user has not interacted 
            #(to represent items that are assumed to be no relevant to the user)
            non_interacted_items_sample = self.get_not_interacted_items_sample(person_id, 
                                                                          sample_size=EVAL_RANDOM_SAMPLE_NON_INTERACTED_ITEMS, 
                                                                          seed=item_id%(2**32)) ## seed=item_id%(2**32)
            #Combining the current interacted item with the 100 random items
            items_to_filter_recs = non_interacted_items_sample.union(set([item_id]))

            #Filtering only recommendations that are either the interacted item or from a random sample of 100 non-interacted items
            valid_recs_df = person_recs_df[person_recs_df['contentId'].isin(items_to_filter_recs)]                    
            valid_recs = valid_recs_df['contentId'].values
            #Verifying if the current interacted item is among the Top-N recommended items
            hit_at_5, index_at_5 = self._verify_hit_top_n(item_id, valid_recs, 5)
            hits_at_5_count += hit_at_5
            hit_at_10, index_at_10 = self._verify_hit_top_n(item_id, valid_recs, 10)
            hits_at_10_count += hit_at_10

        #Recall is the rate of the interacted items that are ranked among the Top-N recommended items, 
        #when mixed with a set of non-relevant items
        recall_at_5 = hits_at_5_count / float(interacted_items_count_testset)
        recall_at_10 = hits_at_10_count / float(interacted_items_count_testset)

        person_metrics = {'hits@5_count':hits_at_5_count, 
                          'hits@10_count':hits_at_10_count, 
                          'interacted_count': interacted_items_count_testset,
                          'recall@5': recall_at_5,
                          'recall@10': recall_at_10}
        return person_metrics

    def evaluate_model(self, model):
        #print('Running evaluation for users')
        people_metrics = []
        for idx, person_id in enumerate(list(interactions_test_indexed_df.index.unique().values)):
            #if idx % 100 == 0 and idx > 0:
            #    print('%d users processed' % idx)
            person_metrics = self.evaluate_model_for_user(model, person_id)  
            person_metrics['_person_id'] = person_id
            people_metrics.append(person_metrics)
        print('%d users processed' % idx)

        detailed_results_df = pd.DataFrame(people_metrics) \
                            .sort_values('interacted_count', ascending=False)
        
        global_recall_at_5 = detailed_results_df['hits@5_count'].sum() / float(detailed_results_df['interacted_count'].sum())
        global_recall_at_10 = detailed_results_df['hits@10_count'].sum() / float(detailed_results_df['interacted_count'].sum())
        
        global_metrics = {'modelName': model.get_model_name(),
                          'recall@5': global_recall_at_5,
                          'recall@10': global_recall_at_10}    
        return global_metrics, detailed_results_df
    
model_evaluator = ModelEvaluator()   

In [30]:
#Creating a sparse pivot table with users in rows and items in columns
users_items_pivot_matrix_df = interactions_train_df.pivot(index='personId', 
                                                          columns='contentId', 
                                                          values='eventStrength').fillna(0)

users_items_pivot_matrix = users_items_pivot_matrix_df.as_matrix()
users_ids = list(users_items_pivot_matrix_df.index)
 
#The number of factors to factor the user-item matrix.
NUMBER_OF_FACTORS_MF = 15
#Performs matrix factorization of the original user item matrix
U, sigma, Vt = svds(users_items_pivot_matrix, k = NUMBER_OF_FACTORS_MF)

sigma = np.diag(sigma)

all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) 

#Converting the reconstructed matrix back to a Pandas dataframe
cf_preds_df = pd.DataFrame(all_user_predicted_ratings, columns = users_items_pivot_matrix_df.columns, index=users_ids).transpose()

  


In [31]:
class CFRecommender:
    
    MODEL_NAME = 'Collaborative Filtering'
    
    def __init__(self, cf_predictions_df, items_df=None):
        self.cf_predictions_df = cf_predictions_df
        self.items_df = items_df
        
    def get_model_name(self):
        return self.MODEL_NAME
        
    def recommend_items(self, user_id, items_to_ignore=[], topn=10, verbose=False):
        # Get and sort the user's predictions
        sorted_user_predictions = self.cf_predictions_df[user_id].sort_values(ascending=False) \
                                    .reset_index().rename(columns={user_id: 'recStrength'})

        # Recommend the highest predicted rating movies that the user hasn't seen yet.
        recommendations_df = sorted_user_predictions[~sorted_user_predictions['contentId'].isin(items_to_ignore)] \
                               .sort_values('recStrength', ascending = False) \
                               .head(topn)

        if verbose:
            if self.items_df is None:
                raise Exception('"items_df" is required in verbose mode')

            recommendations_df = recommendations_df.merge(self.items_df, how = 'left', 
                                                          left_on = 'contentId', 
                                                          right_on = 'contentId')[['recStrength', 'contentId', 'title', 'url', 'lang']]


        return recommendations_df
    


In [32]:
cf_recommender_model = CFRecommender(cf_preds_df, articles_df)

In [35]:
personNum = int(input('PersonId를 입력해주세요 : '))

cf_model = cf_recommender_model.recommend_items(personNum)

for food in range(len(articles_df.contentId)):
    cf_model.loc[cf_model.contentId == articles_df.contentId[food],'foodName'] = articles_df.foodName[food]

cf_model.index = range(1,len(cf_model)+1)

cf_model

PersonId를 입력해주세요 :  0


Unnamed: 0,contentId,recStrength,foodName
1,35,5.693793,초밥
2,23,5.218188,피자
3,12,5.13691,수육/보쌈
4,31,4.440841,분식(떡볶이/튀김/순대)
5,29,4.284363,치킨
6,42,3.841325,냉면(물/비빔)
7,21,3.798845,스테이크
8,4,3.656289,햄버거
9,3,3.651044,족발
10,25,3.613156,갈비탕/설렁탕


In [190]:
print('Evaluating Collaborative Filtering (SVD Matrix Factorization) model...')
cf_global_metrics, cf_detailed_results_df = model_evaluator.evaluate_model(cf_recommender_model)
print('\nGlobal metrics:\n%s' % cf_global_metrics)
cf_detailed_results_df.head(10)

Evaluating Collaborative Filtering (SVD Matrix Factorization) model...
158 users processed

Global metrics:
{'modelName': 'Collaborative Filtering', 'recall@5': 1.0, 'recall@10': 1.0}


Unnamed: 0,_person_id,hits@10_count,hits@5_count,interacted_count,recall@10,recall@5
0,119,10,10,10,1.0,1.0
108,58,10,10,10,1.0,1.0
104,21,10,10,10,1.0,1.0
102,140,10,10,10,1.0,1.0
101,20,10,10,10,1.0,1.0
100,100,10,10,10,1.0,1.0
99,16,10,10,10,1.0,1.0
98,66,10,10,10,1.0,1.0
97,64,10,10,10,1.0,1.0
95,86,10,10,10,1.0,1.0
