# To Do List

#### step1. dictionary 생성
> step1-1. makers별 foodid들이 들어있는 dict

> step1-2. group별 user들이 있는 dict

> step1-3. group별 최근 주문한 makers가 있는 dict

> step1-4. user별 최근 주문한 foodid가 있는 dict

---
Cosine Similarity : foodid-foodid, makersid-makersid

---

#### step2. CB추천 완성
> group이 들어왔을 때, 가장 최근에 시킨 메이커스 기반 메이커스 추천

> group의 user들에게 해당 메이커스에 있는 음식추천

#### input & output
>input: groupid

>output: 그룹에게 TopN개 추천 recomm_makers, makers_score

>output: 유저에게 TopN개 추천 recomm_food, food_score

In [7]:
# 필요 패키지 import
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.preprocessing import Normalizer
from sklearn.metrics.pairwise import cosine_similarity

### step1-1. maker별 foodid {makersid : {foodid set}}

In [8]:
makers_food = pd.read_csv('data/back/food_makers.csv')

makers_foods_dic = {}
for makersid in makers_food['MakersId'].unique():
    makers_foods_dic[makersid] = set({})
    
for makersid, foodid in zip(makers_food['MakersId'], makers_food['FoodId'].values):
    makers_foods_dic[makersid].add(int(foodid))
makers_foods_dic

{28: {18,
  71,
  73,
  74,
  115,
  116,
  134,
  252,
  409,
  502,
  503,
  864,
  865,
  866,
  867,
  909,
  1044,
  1201,
  1294,
  1365,
  1695,
  1932,
  1947,
  2599,
  2601,
  2602,
  2603,
  2997,
  3069,
  3150,
  3404,
  3405,
  3406},
 35: {27, 28, 51, 52, 234},
 42: {48,
  49,
  50,
  93,
  151,
  196,
  197,
  885,
  886,
  944,
  945,
  946,
  989,
  1617,
  1678,
  1680},
 41: {42,
  43,
  44,
  45,
  46,
  47,
  94,
  117,
  118,
  119,
  169,
  170,
  171,
  178,
  179,
  205,
  206,
  235,
  323,
  348,
  363,
  371,
  373,
  404,
  405,
  410,
  412,
  416,
  417,
  418,
  482,
  483,
  561,
  563,
  564,
  565,
  566,
  586,
  587,
  590,
  591,
  912,
  964,
  1027,
  1163,
  1164,
  1196,
  1197,
  1275,
  1396,
  1397,
  1401,
  1402,
  1404,
  1616,
  1618,
  1682,
  2076,
  2249,
  2373,
  2374,
  2375,
  2376,
  2377,
  2397,
  2435,
  2436,
  2437,
  2492,
  2493,
  2494,
  2495,
  2496,
  2497,
  2498,
  2499,
  2500,
  2501,
  2506,
  2510,
  2518,
  251

### step1-2. group별 유저들 {groupid : {user set}}

In [9]:
group_users = pd.read_csv('data/order_food_data.csv')

group_users_dic = {}
for groupid in group_users['GroupId'].unique():
    group_users_dic[groupid] = set({})
    
for makersid, userid in zip(group_users['GroupId'], group_users['UserId'].values):
   group_users_dic[makersid].add(int(userid))
group_users_dic

{0: {3,
  7,
  17,
  21,
  23,
  39,
  136,
  198,
  317,
  319,
  324,
  331,
  333,
  343,
  344,
  366,
  375,
  426,
  449,
  451,
  591,
  592,
  593,
  599,
  605,
  615,
  720,
  778,
  794,
  795,
  803,
  812,
  874,
  932,
  939,
  942,
  967,
  977,
  995,
  1004,
  1017,
  1095,
  1122,
  1171,
  1173,
  1174,
  1183,
  1187,
  1190,
  1191,
  1210,
  1232,
  1274,
  1329,
  1359,
  1361,
  1365,
  1366,
  1382,
  1417,
  1420,
  1462,
  1484,
  1514,
  1524,
  1546,
  1568,
  1571,
  1575,
  1577,
  1593,
  1600,
  1602,
  1653,
  1662,
  1683,
  1835,
  1869,
  1880,
  1892,
  1893,
  1973,
  1974,
  1980,
  1983,
  1984,
  2006,
  2010,
  2116,
  2151,
  2152,
  2165,
  2191,
  2194,
  2202,
  2204,
  2207,
  2220,
  2236,
  2237,
  2241,
  2246,
  2257,
  2271,
  2288,
  2305,
  2309,
  2327,
  2329,
  2337,
  2386,
  2430,
  2431,
  2450,
  2467,
  2493,
  2495,
  2519,
  2525,
  2526,
  2588,
  2591,
  2618,
  2619,
  2623,
  2642,
  2644,
  2664,
  2690,
  2693,
  27

### step1-3. group별 최근 주문한 makers가 있는 dict

In [10]:
# 가장 최근에 주문한 group별 최근 주문makers dict 만들기
group_makers_df = pd.read_csv('data/order_food_data.csv')

recently_makers_dic = {}
x = group_makers_df[['GroupId', 'MakersId']]
x = x.drop_duplicates(subset='GroupId', keep='last', ignore_index=True)
x.sort_values('GroupId', inplace= True)
x.reset_index(inplace=True)
x.drop('index', axis=1, inplace = True)

for group, makers in zip(x['GroupId'].values, x['MakersId'].values):
    recently_makers_dic.update({group : makers})
recently_makers_dic

{0: 230,
 1: 136,
 2: 249,
 3: 116,
 4: 29,
 5: 62,
 6: 51,
 7: 171,
 8: 249,
 9: 22,
 10: 245,
 11: 52,
 12: 15,
 13: 15,
 14: 81,
 15: 55,
 16: 25,
 17: 18,
 19: 155,
 20: 208,
 21: 94,
 22: 110,
 23: 16,
 24: 69,
 25: 51,
 26: 93,
 27: 63,
 28: 52,
 29: 186,
 30: 92,
 31: 103,
 32: 139,
 34: 64,
 35: 94,
 36: 69,
 38: 64,
 39: 198,
 40: 51,
 42: 93,
 43: 180,
 44: 201,
 45: 51,
 46: 51,
 47: 230,
 49: 107,
 50: 240,
 51: 117,
 54: 115,
 55: 184,
 56: 110,
 57: 206,
 58: 158,
 59: 220,
 60: 228,
 61: 110,
 62: 69,
 67: 115,
 72: 115,
 74: 115,
 81: 115,
 91: 115,
 92: 115,
 93: 115,
 95: 115,
 96: 115,
 97: 115,
 98: 69,
 100: 115,
 109: 115,
 110: 115,
 111: 115,
 117: 115,
 118: 115,
 122: 115,
 125: 141,
 126: 143,
 127: 136,
 128: 143,
 129: 158,
 132: 200,
 133: 147,
 137: 155,
 138: 158,
 139: 174,
 140: 119,
 141: 197,
 142: 173,
 143: 246,
 144: 197,
 147: 37,
 148: 174,
 150: 117,
 151: 174,
 152: 158,
 153: 201,
 154: 180,
 155: 187,
 156: 246,
 157: 184,
 158: 166,
 160: 1

### step1-4. user별 최근 주문한 foodid가 있는 dict

In [11]:
food_df = pd.read_csv('data/order_food_data.csv')

recently_eaten_dic = {}
x = food_df.drop_duplicates(subset='UserId', keep = 'last', ignore_index= True)
x = x.sort_values('UserId')
x.reset_index(inplace=True)

for user, food in zip(x.loc[:,'UserId'].values, x.loc[:,'FoodId'].values):
    recently_eaten_dic.update({user : food})
recently_eaten_dic

{3: 86,
 7: 1452,
 17: 2348,
 21: 8098,
 23: 40,
 39: 206,
 136: 10023,
 198: 205,
 305: 2563,
 311: 2348,
 315: 9831,
 317: 2095,
 319: 848,
 322: 18,
 324: 49,
 329: 91,
 330: 5793,
 331: 512,
 332: 10583,
 333: 5930,
 341: 10458,
 342: 2003,
 343: 364,
 344: 41,
 346: 353,
 347: 245,
 348: 2662,
 350: 3317,
 351: 246,
 352: 3412,
 353: 283,
 357: 3047,
 358: 91,
 361: 10548,
 362: 8815,
 363: 10297,
 364: 7767,
 365: 10539,
 366: 3340,
 367: 9013,
 368: 24,
 369: 8319,
 370: 8348,
 372: 10576,
 373: 10683,
 375: 117,
 376: 10130,
 377: 10153,
 379: 9904,
 380: 9013,
 381: 3174,
 382: 91,
 383: 8268,
 384: 10204,
 385: 7567,
 386: 3404,
 387: 3006,
 388: 278,
 389: 3404,
 391: 3404,
 393: 10445,
 395: 6655,
 398: 3173,
 399: 241,
 400: 2563,
 401: 2662,
 402: 1479,
 403: 8319,
 404: 10576,
 405: 10673,
 406: 2911,
 407: 8789,
 408: 6315,
 409: 10451,
 410: 10289,
 411: 10123,
 412: 10458,
 413: 1279,
 414: 4649,
 415: 10683,
 416: 10451,
 417: 10448,
 418: 10429,
 419: 10397,
 420: 1

### Makers Cosine Similarity

In [12]:
# makers cosine smilarity 구하기
mk_df = pd.read_csv('data/back/mk_df.csv')

mk_df.set_index('MakersId',inplace=True)
makers_cs_df = cosine_similarity(mk_df, mk_df)
makers_cs_df = pd.DataFrame(makers_cs_df, index = mk_df.index, columns= mk_df.index)
makers_cs_df

MakersId,28,35,42,41,43,29,17,1,38,37,...,245,247,238,249,250,251,252,257,253,254
MakersId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
28,1.000000,0.0,0.043482,0.697984,0.0,0.784856,0.663621,0.295918,0.771452,0.439039,...,0.583210,0.530937,0.228964,0.523919,0.332266,0.067351,0.801825,0.336940,0.572808,0.569275
35,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
42,0.043482,0.0,1.000000,0.047454,0.0,0.043533,0.048034,0.026496,0.047502,0.024360,...,0.027868,0.053659,0.010967,0.050181,0.032393,0.003681,0.045944,0.043006,0.043510,0.014073
41,0.697984,0.0,0.047454,1.000000,0.0,0.660110,0.767185,0.342195,0.638307,0.459853,...,0.430810,0.612665,0.254619,0.512535,0.369189,0.073972,0.732154,0.681555,0.842964,0.367079
43,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
251,0.067351,0.0,0.003681,0.073972,0.0,0.236863,0.371590,0.427659,0.067502,0.455728,...,0.077198,0.441726,0.489667,0.041310,0.416667,1.000000,0.082675,0.064906,0.085872,0.035400
252,0.801825,0.0,0.045944,0.732154,0.0,0.861263,0.794449,0.348470,0.867867,0.496370,...,0.745002,0.685625,0.350939,0.598221,0.467778,0.082675,1.000000,0.465808,0.706277,0.542657
257,0.336940,0.0,0.043006,0.681555,0.0,0.416288,0.560401,0.223862,0.349442,0.270688,...,0.194375,0.430058,0.265896,0.434359,0.314792,0.064906,0.465808,1.000000,0.759316,0.248145
253,0.572808,0.0,0.043510,0.842964,0.0,0.547573,0.676451,0.298953,0.582988,0.420629,...,0.408556,0.647072,0.351159,0.588439,0.468759,0.085872,0.706277,0.759316,1.000000,0.349759


### Food Cosine Similarity

In [13]:
food_cs_df = pd.read_csv('data/similarity/food_cs_df.csv', index_col=0)
food_cs_df

Unnamed: 0_level_0,18,51,50,43,42,44,47,39,41,49,...,10068,9812,10794,10772,7729,9941,7731,10453,10773,10633
FoodId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
51,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
50,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
43,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
42,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9941,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.276026,0.298142,0.389249,0.389249,1.000000,1.000000,0.866667,0.000000,0.389249,0.155700
7731,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.276026,0.447214,0.389249,0.544949,0.866667,0.866667,1.000000,0.000000,0.544949,0.155700
10453,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.144338,0.000000,0.150756,0.000000,0.000000,0.000000,1.000000,0.150756,0.000000
10773,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.161165,0.783349,0.454545,0.909091,0.389249,0.389249,0.544949,0.150756,1.000000,0.181818


### step2. CB추천 완성

## 그룹 177은 유저가 1명이라 출력이 잘 되는 것 확인 가능,
## 그룹 132도 유저수가 적당해서 보기 적당함

In [229]:
# 사용자가 가장 최근에 먹은 음식 기반 추천된 메이커스에서 판매하는 음식 추천 #
def by_recently_ordered(userid, n_of_recomm, makers):
    num = 1
    recom_food_score_list = []
    foodid = recently_eaten_dic[userid]
    cs_include_food = set({})
    
    # 주어진 음식과 다른 음식의 similarity를 가져온다
    sim_scores = food_cs_df.loc[foodid]
    del sim_scores[f'{foodid}']
    makers_own_food_cs = sim_scores
    
    for f in [str(food) for food in makers_foods_dic[makers]]:
        if f in makers_own_food_cs.index:
            cs_include_food.add(f)
    makers_own_food_cs = makers_own_food_cs.loc[[f for f in cs_include_food]].sort_values(ascending=False)[:n_of_recomm]
    
    recom_food_score_list.append(makers_own_food_cs.index)
    recom_food_score_list.append(makers_own_food_cs.values)
    
    print(f"사용자{userid} - 최근에 먹은 음식{foodid} 바탕으로 추천된 음식 Top{n_of_recomm}\n")
    
    for i in range(len(recom_food_score_list[0])):
        print(f'Top{num} 음식: {recom_food_score_list[0][i]}, 점수: {recom_food_score_list[1][i]}\n')
        num += 1

# 그룹이 가장 최근 이용한 메이커스 기반 메이커스 추천 #
def by_recently_used(makersid, n_of_recomm):
    recom_makers_score_list = []
    num = 0
    # 주어진 메이커스와 다른 메이커스의 similarity를 가져온다
    sim_scores = makers_cs_df.loc[makersid]
    sim_scores = sim_scores.sort_values(ascending = False)[1:n_of_recomm + 1]
    
    recom_makers_score_list.append(sim_scores.index)
    recom_makers_score_list.append(sim_scores.values)
    
    for i in range(len(recom_makers_score_list[0])):
        num += 1
        print(f"\n\n그룹{groupid}이(가) 최근에 이용한 메이커스{makersid} 바탕으로 추천된 메이커스 Top{num}")
        print(f'Top{num} 메이커스: {recom_makers_score_list[0][i]}, 점수: {recom_makers_score_list[1][i]}\n')
        makers = recom_makers_score_list[0][i]
    
        for userid in group_users_dic[groupid]:
            num_recomm_food = 3 # 추천 받을 음식 갯수
            # num_recomm_food = int(input()) # 추천 받을 음식 갯수
            by_recently_ordered(userid, num_recomm_food, makers)
            
groupid = int(input("그룹 아이디(GroupId)를 입력 : "))
top = int(input("추천받을 메이커스 개수 입력 : "))
by_recently_used(recently_makers_dic[groupid], top)

  sim_scores = sim_scores.sort_values(ascending = False)[1:n_of_recomm + 1]




그룹132이(가) 최근에 이용한 메이커스200 바탕으로 추천된 메이커스 Top1
Top1 메이커스: 143, 점수: 0.8333333333333335

사용자4612 - 최근에 먹은 음식6867 바탕으로 추천된 음식 Top3

Top1 음식: 3810, 점수: 0.816496580927726

Top2 음식: 5365, 점수: 0.816496580927726

Top3 음식: 4821, 점수: 0.816496580927726

사용자4615 - 최근에 먹은 음식6277 바탕으로 추천된 음식 Top3

Top1 음식: 3810, 점수: 0.7071067811865476

Top2 음식: 5365, 점수: 0.7071067811865476

Top3 음식: 4821, 점수: 0.7071067811865476

사용자4616 - 최근에 먹은 음식4329 바탕으로 추천된 음식 Top3

Top1 음식: 3810, 점수: 0.0

Top2 음식: 5365, 점수: 0.0

Top3 음식: 4821, 점수: 0.0

사용자4618 - 최근에 먹은 음식4269 바탕으로 추천된 음식 Top3

Top1 음식: 3810, 점수: 0.0

Top2 음식: 5365, 점수: 0.0

Top3 음식: 4821, 점수: 0.0

사용자4621 - 최근에 먹은 음식4257 바탕으로 추천된 음식 Top3

Top1 음식: 3810, 점수: 0.816496580927726

Top2 음식: 5365, 점수: 0.816496580927726

Top3 음식: 4821, 점수: 0.816496580927726

사용자4622 - 최근에 먹은 음식4259 바탕으로 추천된 음식 Top3

Top1 음식: 3810, 점수: 0.816496580927726

Top2 음식: 5365, 점수: 0.816496580927726

Top3 음식: 4821, 점수: 0.816496580927726

사용자4623 - 최근에 먹은 음식6907 바탕으로 추천된 음식 Top3

Top1 음식: 3810, 점

In [185]:
# 그룹 177이 가장 최근에 이용한 메이커스 확인
recently_makers_dic[177] # 가장 최근에 이용한 메이커스는 136

136

In [187]:
# 메이커스 간 유사도 확인 : 메이커스 추천에 사용
makers_cs_df.loc[136].sort_values(ascending=False) # 136을 이용해서 211이 추천됨

MakersId
136    1.000000
211    0.701982
144    0.677204
179    0.672092
132    0.666129
         ...   
85     0.000000
73     0.000000
82     0.000000
83     0.000000
44     0.000000
Name: 136, Length: 199, dtype: float64

In [190]:
# 177그룹의 유저 확인
group_users_dic[177] # 유저 1명 userid 7648

{7648}

In [188]:
# 177 그룹의 유저가 가장 최근에 먹은 음식id 확인
recently_eaten_dic[7648] # 유저 7648이 먹은 가장 최근 음식id는 8990

8990

In [226]:
# 추천된 makers에서 판매하는 음식 확인
print(8247 in makers_foods_dic[211]) # 추천된 메이커스 211에서 8247아이템을 판매함
print(8240 in makers_foods_dic[211]) # 추천된 메이커스 211에서 8240아이템을 판매함
print(8231 in makers_foods_dic[211]) # 추천된 메이커스 211에서 8231아이템을 판매함

True
True
True


In [227]:
# 추천된 메이커스에서 판매하는 음식들에 대한 순위 확인

recom_food_score_list = [8247, 8240, 8231]
cs_include_food = set({})

# 주어진 음식과 다른 음식의 similarity를 가져온다
sim_scores = food_cs_df.loc[8990]
del sim_scores[f'{8990}']
makers_own_food_cs = sim_scores

for f in [str(food) for food in makers_foods_dic[211]]:
    if f in makers_own_food_cs.index:
        cs_include_food.add(f)
makers_own_food_cs = makers_own_food_cs.loc[[f for f in cs_include_food]].sort_values(ascending=False)

# 추천된 음식들 점수
makers_own_food_cs

8247    0.365148
8240    0.344265
8231    0.344265
8235    0.326599
8213    0.326599
9611    0.326599
9613    0.326599
8212    0.326599
8237    0.326599
9612    0.326599
7862    0.326599
9614    0.298142
8215    0.258199
Name: 8990, dtype: float64

크롤링 한 음식이 저희가 받은 데이터에 존재하지 않았어서 cs 매트릭스에서 해당 음식과 다른 유사도를 참조하는데 오류 발생
0으로 해줘야할지..
> 일단 없는 거는 제외해서 처리를 해두었음