In [30]:
import pandas as pd, numpy as np, datetime
import pymysql,  os, pickle
from scipy import sparse
from haversine import haversine

from sklearn.preprocessing import MinMaxScaler

from scipy.sparse import csr_matrix
from scipy.stats import pearsonr
from sklearn.feature_extraction.text import CountVectorizer

from implicit.als import AlternatingLeastSquares

# implicit 라이브러리에서 권장사항입니다.
os.environ['OPENBLAS_NUM_THREADS'] = '1'
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
os.environ['MKL_NUM_THREADS'] = '1'

def dev_db_conn(database) :
    conn = pymysql.connect(host='host', user='user', password='password',autocommit=True,cursorclass=pymysql.cursors.DictCursor, db = "{}".format(database))
    return conn

def live_db_conn():
    conn = pymysql.connect(host='host', user='user', password='password',autocommit=True,cursorclass=pymysql.cursors.DictCursor, db = "database")
    return conn

pd.set_option('display.max_columns', None)

import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

pd.set_option('mode.chained_assignment',  None) # Setting With Copy Warning

def get_store_id(tgt):
    
    conn = live_db_conn()
    curs = conn.cursor()

    sql =  """
        SELECT store_id
        FROM product
        WHERE id = {}
        """.format(tgt)

    curs.execute(sql)
    store_id_df = pd.DataFrame(curs.fetchall())
    
    curs.close()
    conn.close()

    tgt_store_id = str(store_id_df['store_id'][0])
    
    return tgt_store_id

def get_location_id(tgt):

    tgt_store_id = get_store_id(tgt)
    
    conn = live_db_conn()
    curs = conn.cursor()

    sql =  """
        SELECT sl.store_id, location_master_id
        FROM store_location sl
        WHERE sl.store_id = {}
        """.format(tgt_store_id)

    curs.execute(sql)
    location_master_df = pd.DataFrame(curs.fetchall())
    
    curs.close()
    conn.close()

    tgt_location_id = str(location_master_df['location_master_id'].max())
    
    return tgt_location_id

def get_store_location_id(store_id):

    
    conn = live_db_conn()
    curs = conn.cursor()

    sql =  """
        SELECT sl.store_id, location_master_id
        FROM store_location sl
        WHERE sl.store_id = {}
        """.format(store_id)

    curs.execute(sql)
    location_master_df = pd.DataFrame(curs.fetchall())
    
    curs.close()
    conn.close()

    tgt_location_id = str(location_master_df['location_master_id'].max())
    
    return tgt_location_id


def get_category_id(tgt):
    
    tgt_store_id = get_store_id(tgt)
    conn = live_db_conn()
    curs = conn.cursor()

    sql =  """
    SELECT store_id, store_category_master_id
    FROM store_category sc
    WHERE sc.store_id = {}
    """.format(tgt_store_id)

    curs.execute(sql)
    category_master_df = pd.DataFrame(curs.fetchall())

    curs.close()
    conn.close()


    if len(category_master_df) != 0:
        tgt_category_id = category_master_df['store_category_master_id'][0]
        
    
        return tgt_category_id


def no_tag_product(tgt):

    tgt_store_id = get_store_id(tgt)

    conn = live_db_conn()
    curs = conn.cursor()

    sql = """
    SELECT p.id as product_id, pt.product_tag_master_id as product_tag_master_id, pt.type as tag_type
    FROM product p
    INNER JOIN product_tag pt
    INNER JOIN product_tag_master ptm
    on pt.product_id=p.id
    and pt.product_tag_master_id = ptm.id
    WHERE p.store_id={} and p.is_main = 1
    """.format(tgt_store_id)

    curs.execute(sql)
    tgt_df = pd.DataFrame(curs.fetchall())
    
    curs.close()    
    conn.close()

    return tgt_df

def no_tag_store(tgt):

    tgt_category_id = get_category_id(tgt)
    tgt_location_id = get_location_id(tgt)
    
    conn = live_db_conn()
    curs = conn.cursor()
    if tgt_category_id is None:

        sql = """
        SELECT s.id as store_id, s.name as store_name
        FROM store s
        INNER JOIN store_location sl
        on s.id = sl.store_id
        where s.status = 'normal'
        and sl.location_master_id = {}
        """.format(tgt_location_id)
    else:
        sql = """
        SELECT s.id as store_id, s.name as store_name
        FROM store s
        INNER JOIN store_category sc
        INNER JOIN store_category_master scm
        INNER JOIN store_location sl
        ON s.id = sc.store_id
        AND sc.store_category_master_id = scm.id
        AND s.id = sl.store_id
        where s.status = 'normal'
        and sl.location_master_id = {}
        and sc.store_category_master_id = {}
        """.format(tgt_location_id, tgt_category_id)
    curs.execute(sql)

    same_catloc_store = pd.DataFrame(curs.fetchall())

    curs.close()
    conn.close()
    
    return same_catloc_store


def get_main_product(tgt_store_id):

    conn = live_db_conn()
    curs = conn.cursor()

    sql = """
    SELECT p.store_id, s.name as store_name, p.id as product_id, p.name as product_name
    from product p
    inner join store s
    on p.store_id = s.id
    where p.is_main = 1
    and p.store_id = {} 
    and p.status = 'normal'
    order by p.price desc
    limit 2;
    """.format(tgt_store_id)

    curs.execute(sql)
    main_product = pd.DataFrame(curs.fetchall())
    curs.close()
    conn.close()
    
    return main_product

def get_tgt(tgt:str):

    conn = live_db_conn()
    curs = conn.cursor()

    sql = """
    SELECT p.id as product_id, pt.product_tag_master_id as product_tag_master_id, pt.type as tag_type, pt.flag as tag_flag
    FROM product p
    INNER JOIN product_tag pt
    INNER JOIN product_tag_master ptm
    on pt.product_id=p.id
    and pt.product_tag_master_id = ptm.id
    WHERE p.id={}
    """.format(tgt)

    curs.execute(sql)
    tgt_df = pd.DataFrame(curs.fetchall())

    curs.close()
    conn.close()

    if len(tgt_df) != 0:
        
        t_ids = tgt_df['product_tag_master_id'].tolist()
        tag_dic = {}

        for vals in tgt_df.values:
            tag_dic[vals[1]] = vals[2] 
        t_ids = str(t_ids)[1:-1]

        tf = 1
        
        return tf, t_ids, tag_dic
    
    else:
        
        tgt_df = no_tag_product(tgt)
        
        if len(tgt_df) != 0:
            
            tgt_df = tgt_df[tgt_df['product_id'] == tgt_df['product_id'].value_counts().index[0]]
            new_tgt = tgt_df['product_id'].unique()[0]
            t_ids = tgt_df['product_tag_master_id'].tolist()
            tag_dic = {}

            for vals in tgt_df.values:

                tag_dic[vals[1]] = vals[2] 
                
            t_ids = str(t_ids)[1:-1]
            
            tf = 2

            return tf, t_ids, tag_dic, new_tgt
        
        else:

            tgt_df = no_tag_store(tgt)
            
            # 10개 매장 메뉴 2개씩 (있는만큼)
            tgt_store_id_list = tgt_df['store_id'].value_counts().index.tolist()[:10]
            
            final_rec_df = pd.DataFrame()
        
            for tgt_store_id in tgt_store_id_list:
                
                
                temp = get_main_product(tgt_store_id)
                
                if len(temp) != 0:
                    
                    final_rec_df = pd.concat([final_rec_df,temp])
                
            tf = 3
            
            return tf, final_rec_df

def get_product(tgt:str):

    location_id = get_location_id(tgt)
    tgt_output = get_tgt(tgt)

    tf = tgt_output[0]

    if tf != 3:

        t_ids = tgt_output[1]

        conn = live_db_conn()
        curs = conn.cursor()

        sql = """
        SELECT p.id as product_id, p.status
        FROM product p
        INNER JOIN product_tag pt
        INNER JOIN store_location sl
        on pt.product_id=p.id
        and p.store_id = sl.store_id
        WHERE pt.product_tag_master_id in ({}) and p.status = 'normal'
        and sl.location_master_id in ({})
        group by p.id
        """.format(t_ids,location_id)

        curs.execute(sql)
        
        p_df = pd.DataFrame(curs.fetchall())
        
        curs.close()
        conn.close()

        p_ids = str(p_df['product_id'].unique().tolist())[1:-1]
        
        tag_dic = tgt_output[2]

        if tf == 1 :

            
            return tf, tag_dic, p_ids


        elif tf == 2:

            new_tgt = tgt_output[3]
            p_ids += ', {}'.format(str(new_tgt))
            
            return tf, tag_dic, p_ids, new_tgt
        
    elif tf == 3:
        
        final_rec_df = tgt_output[1]

        return tf, final_rec_df

def get_product_store(product_list):

    conn = live_db_conn()
    curs = conn.cursor()

    sql = """
    SELECT p.store_id as store_id, s.name as store_name, p.id as product_id, p.name as product_name
    FROM product p 
    inner join store s
    on p.store_id = s.id
    WHERE p.id in ({}) and p.is_main = 1 and s.status = 'normal'
    group by p.store_id
    """.format(str(product_list)[1:-1])

    curs.execute(sql)
    
    rec_df = pd.DataFrame(curs.fetchall())
    
    curs.close()
    conn.close()

    return rec_df

def make_dic(something_list):

    name2idx = {}
    idx2name = {}

    for i in range(len(something_list)):

        name2idx[something_list[i]] = i
        idx2name[i] = something_list[i]

    return name2idx, idx2name
    
def CB_rec(tgt:str):

    product_output = get_product(tgt)
    
    tf = product_output[0]

    if tf != 3:
       
        tag_dic = product_output[1]
        p_ids = product_output[2]

        weighted_dic = {'ingredient': 1.5, 'sauce':1, 'cooking':0.8, 'option':0.5}
        
        conn = live_db_conn()
        curs = conn.cursor()

        sql = """
        SELECT p.store_id as store_id, p.id as product_id, group_concat(ptm.id separator ' ') as ptm_ids
        FROM product p
        INNER JOIN product_tag pt
        INNER JOIN product_tag_master ptm
        on pt.product_id=p.id
        and pt.product_tag_master_id = ptm.id
        WHERE p.id in ({})
        group by p.id
        """.format(p_ids)
        
        curs.execute(sql)
        # del p_ids
        sim_df = pd.DataFrame(curs.fetchall())
        
        curs.close()
        conn.close()

            
        # product_tag
        vectorizer = CountVectorizer(min_df = 1)
        full_text = sim_df['ptm_ids'].tolist()
        X = vectorizer.fit_transform(full_text)
        
        sim_df = pd.DataFrame(
            data=X.todense(),
            index = sim_df['product_id'],
            columns = vectorizer.get_feature_names_out()
        )

        if tf == 2:
           
            # new_tgt
            tgt = product_output[3]
            target = sim_df[sim_df.index==tgt]

        elif tf == 1:
           
            target = sim_df[sim_df.index==int(tgt)]
            
        for k,v in tag_dic.items():
        
            target[str(k)]  *= weighted_dic[tag_dic[k]] 
            
        sim_value = []

        for i in range(len(sim_df)):
            v = np.linalg.norm(target-sim_df.iloc[i])
            sim_value.append(round(v,2))
        
        
        sim_df['sim_value'] =  sim_value
        result = sim_df[['sim_value']]

        rec_list = result.sort_values(by='sim_value').index.tolist()
        
        rec_df = get_product_store(rec_list)
        
        rec_df = rec_df[rec_df['store_id'] != int(get_store_id(tgt))]

        final_rec_df = pd.merge(rec_df, result.reset_index(), how='left', left_on = 'product_id', right_on = 'product_id')

        final_rec_df['scaled_simv'] = 5 * (1 - (final_rec_df['sim_value'] - min(final_rec_df['sim_value']))/(max(final_rec_df['sim_value'])-min(final_rec_df['sim_value'])))
        
        final_rec_df.sort_values(by='scaled_simv',inplace=True)
        final_rec_df = final_rec_df[final_rec_df['scaled_simv']>=0.1].head(100)
        final_rec_df = final_rec_df[['store_id', 'store_name', 'product_id', 'product_name','scaled_simv']]

        return final_rec_df

    else:
        
        final_rec_df = product_output[1]
        final_rec_df['scaled_simv'] = 2.5
        final_rec_df = final_rec_df[['store_id', 'store_name', 'product_id', 'product_name','scaled_simv']]

    final_rec_df.drop_duplicates('store_id',inplace=True,keep='first')

    return final_rec_df


def get_store(final_input_store, store_id_list):
    location_id = get_store_location_id(final_input_store)
    
    store_id_list = [int(x) for x in store_id_list]
    
    conn = live_db_conn()
    curs = conn.cursor()

    if type(store_id_list) != list:

        sql = """
        SELECT s.id, s.name, s.address
        FROM store s
        INNER JOIN store_location sl
        on s.id = sl.store_id
        where s.id = {} and sl.location_master_id = {}
        """.format(store_id_list, location_id)

    else:
       
        sql = """
        SELECT s.id, s.name
        FROM store s
        INNER JOIN store_location sl
        on s.id = sl.store_id
        where s.id in {} and sl.location_master_id = {}
        """.format(tuple(store_id_list), location_id)

    curs.execute(sql)
    df = pd.DataFrame(curs.fetchall())

    curs.close()
    conn.close()

    return df

def get_store_products(store_id):

    conn = live_db_conn()
    curs = conn.cursor()

    if type(store_id) == list:


        sql = """
        SELECT s.id store_id, s.name as store_name, p.id product_id, p.name product_name
        from store s
        inner join product p
        on s.id = p.store_id
        where s.id in {}
        """.format(tuple(store_id))

    else:

        sql = """
        SELECT s.id store_id, s.name as store_name, p.id product_id, p.name product_name
        from store s
        inner join product p
        on s.id = p.store_id
        where s.id = {}
        """.format(store_id)

    curs.execute(sql)
    df = pd.DataFrame(curs.fetchall())

    curs.close()
    conn.close()

    return df

def same_hour_loc(hour, loc):
    
    conn = live_db_conn()
    curs = conn.cursor()

    sql = """
    SELECT o.user_id, v.order_id, o.status, o.store_id, s.name, sl.location_master_id, p.name, hour(v.reserved_at) as hour
    FROM voucher v 
    INNER JOIN product p
    INNER JOIN `order` o
    INNER JOIN store s
    inner join store_location sl
    ON v.order_id = o.id
    and p.store_id = s.id
    AND s.id = o.store_id
    and s.id = sl.store_id
    where p.status = 'normal'
    and hour(v.reserved_at) = {}
    and sl.location_master_id = {}
    """.format(hour, loc)
    curs.execute(sql)

    df = pd.DataFrame(curs.fetchall())
    hot_list = df['store_id'].value_counts().index[:15].tolist()

    curs.close()
    conn.close()

    result = get_store_products(hot_list)

    return result

def cal_dist(location_id):

    conn = live_db_conn()
    curs = conn.cursor()

    sql = """
    SELECT s.id, s.name, s.latitude, s.longitude
    FROM store s
    inner join store_location sl
    on s.id = sl.store_id
    where sl.location_master_id = {}
    """.format(location_id)
    curs.execute(sql)

    dist = pd.DataFrame(curs.fetchall())
    curs.close()
    conn.close()
    
    dist.dropna(inplace=True)
    dist.reset_index(inplace=True)
    dist.drop('index',axis=1,inplace=True)
    
    return dist

def except_do(final_input, df):
    
    main_product_id = get_main_product(df['live_store_id'][0])['product_id'][0]
    hour = str(df.iloc[0]['reserved_at'].hour)
    loc_id = get_location_id(main_product_id)

    try:    
        
        if hour == '0':
            
            result = no_tag_store(main_product_id)
        
        else:
           
            # 시간대별 인기 매장
            result = same_hour_loc(hour, loc_id)
            result.drop_duplicates(['store_id'], keep='first', inplace=True)
            result['scaeled_simv'] = 2.5

    except:

        # 거리순
        print('except')
        dist = cal_dist(loc_id)
        input_loc = (dist[dist['id'] == final_input]['latitude'].values, dist[dist['id'] == final_input]['longitude'].values)

        dist['distance'] = [haversine(input_loc, (dist.loc[i,'latitude'], dist.loc[i,'longitude']), unit='km') for i in range(len(dist))]

        result = dist[dist['distance(km)']<=0.5]

    return result


def store_voucher(store_id_list):

    conn = live_db_conn()
    curs = conn.cursor()

    if type(store_id_list) == list:
        sql = """
        SELECT o.user_id, v.order_id, o.store_id, s.name, p.name, s.address, v.reserved_at
        FROM voucher v 
        INNER JOIN product p
        INNER JOIN `order` o
        INNER JOIN store s
        ON v.order_id = o.id
        AND s.id = o.store_id
        and s.id = p.store_id
        where o.status = 'paid'
        and p.status = 'normal'
        and o.store_id in {}
        """.format(tuple(store_id_list))
    else:
        sql = """
        SELECT o.user_id, v.order_id, o.store_id, s.name, p.name, s.address, v.reserved_at
        FROM voucher v 
        INNER JOIN product p
        INNER JOIN `order` o
        INNER JOIN store s
        ON v.order_id = o.id
        AND s.id = o.store_id
        and s.id = p.store_id
        where o.status = 'paid'
        and p.status = 'normal'
        and o.store_id = {}
        """.format(store_id_list)
    curs.execute(sql)

    voucher_df = pd.DataFrame(curs.fetchall())

    curs.close()
    conn.close()

    return voucher_df


def time_sim_cal(df):

    df_dum_class = pd.get_dummies(df, columns = ['time'])

    time_store = pd.DataFrame(df_dum_class.groupby('store_id')[df_dum_class.columns[7:]].sum().transpose())

    ts_trsp = time_store.transpose()

    scaler = MinMaxScaler()

    ts_trsp_mm = scaler.fit_transform(ts_trsp)
    mMscaled_ts = pd.DataFrame(ts_trsp_mm)

    mMscaled_ts_trsp = mMscaled_ts.transpose()

    mMscaled_ts_trsp.columns = time_store.columns

    time_df = mMscaled_ts_trsp.transpose()

    return time_df

def get_all_voucher():

    conn = live_db_conn()
    curs = conn.cursor()

    sql = """
    SELECT o.user_id as reviewer_name, v.order_id, o.status, o.store_id as live_store_id, s.name, op.product_id, p.name, v.reserved_at
    FROM voucher v 
    INNER JOIN product p
    INNER JOIN `order` o
    INNER JOIN order_product op
    INNER JOIN store s
    ON v.order_id = o.id
    AND op.product_id = p.id
    AND o.id = op.order_id
    AND s.id = o.store_id
    where p.status = 'normal' and p.sale_status = 'sale'
    order by v.reserved_at desc
    """
    curs.execute(sql)

    df = pd.DataFrame(curs.fetchall())


    curs.close()
    conn.close()

    status_dic = {'cancel':1,'ready':3,'partial_cancel':2,'deny':2,'paid':5}

    df['reviewer_stars'] = df['status'].apply(lambda x:status_dic[x])

    df = df[['live_store_id','reviewer_name','reviewer_stars','reserved_at']]

    return df

def get_google_review():

    conn = dev_db_conn("database")
    curs = conn.cursor()

    sql = """
    select live_store_id, reviewer_name, reviewer_stars
    from ethan_google_review_detail
    """
    curs.execute(sql)

    google_review1 = pd.DataFrame(curs.fetchall())


    sql = """
    SELECT STORE_ID as live_store_id, reviewer_name, reviewer_star as reviewer_stars
    FROM emile_google_review_detail
    where reviewer_star is not null
    """
    curs.execute(sql)
    google_review2 = pd.DataFrame(curs.fetchall())
    google_review2['reviewer_stars'] = google_review2['reviewer_stars'].apply(lambda x:x.split('/')[0])
    
    curs.close()
    conn.close()

    google_review = pd.concat([google_review1, google_review2])

    return google_review


def get_naver_review():

    conn = dev_db_conn("database")
    curs = conn.cursor()

    sql = """
    SELECT *
    FROM naver_review
    where rating is not null
    """
    curs.execute(sql)
    naver_review = pd.DataFrame(curs.fetchall())
    naver_review.drop_duplicates(['writer_id','restaurant_id','visited'], inplace=True)

    sql = """
    SELECT A.RSTR_ID, A.SRC_ID as store_id, B.SRC_ID as naver_src_id FROM (SELECT * FROM redtable01.RSTR_CONN where SRC='live_db') A
    join (SELECT * FROM redtable01.RSTR_CONN where SRC='naver') B on A.RSTR_id=B.RSTR_id
    WHERE B.SRC_ID in {};
    """.format(tuple(naver_review['restaurant_id'].unique().tolist()))
    curs.execute(sql)
    store_conn = pd.DataFrame(curs.fetchall())

    curs.close()
    conn.close()
    
    naver_review = pd.merge(naver_review, store_conn, how='left', left_on = 'restaurant_id', right_on = 'naver_src_id').drop(['RSTR_ID','naver_src_id'],axis=1)
    naver_review = naver_review[['store_id','writer_id','rating']].rename(columns={'store_id':'live_store_id','writer_id':'reviewer_name','rating':'reviewer_stars'})

    return naver_review

def make_total_review(review1, review2, review3):

    total_review = pd.concat([review1, review2, review3])
    total_review.dropna(inplace=True)

    return total_review

### 학습

1. Review 통합

In [19]:
google_review = get_google_review()

naver_review = get_naver_review()

df = get_all_voucher()

voucher_review = df.groupby(['live_store_id','reviewer_name']).mean('reviewer_stars').reset_index()

total_review = make_total_review(google_review, naver_review, voucher_review)

print('리뷰 개수:',len(total_review))
print('리뷰 작성자 이름 수:',len(total_review['reviewer_name'].unique().tolist()))
print('리뷰 매장 수:',len(total_review['live_store_id'].unique().tolist()))

리뷰 개수: 414814
리뷰 작성자 이름 수: 318831
리뷰 매장 수: 5031


2. Reviewer-Store Dictionary 생성 및 저장

In [20]:
reviewer_list = total_review['reviewer_name'].unique().tolist()
store_list = total_review['live_store_id'].unique().tolist()

reviewer_name2idx, reviewer_idx2name = make_dic(reviewer_list)
store_name2idx, store_idx2name = make_dic(store_list)

In [21]:
# 저장
pickle.dump(reviewer_name2idx, open('reviewer_name2idx.pkl', 'wb'))
pickle.dump(reviewer_idx2name, open('reviewer_idx2name.pkl', 'wb'))
pickle.dump(store_name2idx, open('store_name2idx.pkl', 'wb'))
pickle.dump(store_idx2name, open('store_idx2name.pkl', 'wb'))

# 불러오기
reviewer_name2idx = pickle.load(open('reviewer_name2idx.pkl', 'rb'))
reviewer_idx2name = pickle.load(open('reviewer_idx2name.pkl', 'rb'))
store_name2idx = pickle.load(open('store_name2idx.pkl', 'rb'))
store_idx2name = pickle.load(open('store_idx2name.pkl', 'rb'))

3. 학습을 위한 전처리

In [22]:
total_review['reviewer_name'] = total_review['reviewer_name'].apply(lambda x:reviewer_name2idx[x])
total_review['live_store_id'] = total_review['live_store_id'].apply(lambda x:store_name2idx[x])

In [24]:
total_review = total_review.astype(float)
total_review = total_review.astype(int)
total_review.reset_index(inplace=True)
total_review.drop('index',axis=1,inplace=True)
num_reviewer = total_review['reviewer_name'].nunique()
num_store = total_review['live_store_id'].nunique()

4. User-Based Als_Model 학습 및 저장

In [25]:
reviewer_store = csr_matrix((total_review['reviewer_stars'].values, (total_review.reviewer_name, total_review.live_store_id)), shape= (num_reviewer, num_store))
sparse.save_npz('reviewer_store.npz',reviewer_store)
# 1. User-Based

# Implicit AlternatingLeastSquares 모델의 선언
als_model = AlternatingLeastSquares(factors=256, regularization=0.01, use_gpu=False, iterations=5, dtype=np.float32)

# 모델 훈련
als_model.fit(reviewer_store)

pickle.dump(als_model, open('ALS_USER_BASED', 'wb'))

  0%|          | 0/5 [00:00<?, ?it/s]

5. Item-Based Als_Model 학습 및 저장

In [26]:
store_reviewer = csr_matrix((total_review['reviewer_stars'].values, (total_review.live_store_id, total_review.reviewer_name)), shape= (num_store,num_reviewer))

# 2. Item-Based

# Implicit AlternatingLeastSquares 모델의 선언
als_model = AlternatingLeastSquares(factors=256, regularization=0.01, use_gpu=False, iterations=5, dtype=np.float32)

# 모델 훈련
als_model.fit(store_reviewer)

pickle.dump(als_model, open('ALS_ITEM_BASED', 'wb'))

  0%|          | 0/5 [00:00<?, ?it/s]

### 서비스

In [31]:
def service(input_df):

    final_input = input_df['live_store_id'][0]
    main_product = get_main_product(str(final_input))['product_id'][0]

    df = get_all_voucher()

    reviewer_name = input_df['reviewer_name'][0]

    reviewer_name2idx = pickle.load(open('reviewer_name2idx.pkl', 'rb'))
    
    
    if reviewer_name in list(reviewer_name2idx.keys()):
        
        reviewer_idx2name = pickle.load(open('reviewer_idx2name.pkl', 'rb'))
        store_name2idx = pickle.load(open('store_name2idx.pkl', 'rb'))
        store_idx2name = pickle.load(open('store_idx2name.pkl', 'rb'))
        
        test_user_idx = reviewer_name2idx[reviewer_name]
        
        reviewer_store = sparse.load_npz("reviewer_store.npz")

        als_ub = pickle.load(open('ALS_USER_BASED', 'rb'))
        als_ib = pickle.load(open('ALS_ITEM_BASED', 'rb'))

        store_rate = pd.DataFrame(columns=['user_idx','user_name','store_idx','store_id','rec_name','predict_rate'])        
        
        try:  # 기존 학습 구조와 다르면 Except로 넘어감 
            
            store_recommended = als_ub.recommend(test_user_idx, reviewer_store[test_user_idx], N=100, filter_already_liked_items=True)
            
            rec_list = [store_idx2name[i] for i in store_recommended[0]]

            rec_livestore = get_store(final_input, rec_list) # Input 위치값과 동일한 매장만 추천
            
            if len(rec_livestore) != 0:
                idx = 0
                for vals in rec_livestore.values:

                    reviewer_idxname = reviewer_idx2name[test_user_idx]
                    store_id = int(vals[0])
                    store_idx = store_name2idx[store_id]
                    rec_name = vals[1]
                    predict_rate = store_recommended[1][idx]

                    s_v, r_v = als_ib.user_factors[store_idx], als_ib.item_factors[test_user_idx]
                    
                    weight = np.dot(s_v,r_v)
                    
                    scaled_pr = predict_rate * weight

                    temp = pd.DataFrame([[reviewer_name, reviewer_idxname, store_idx, store_id, rec_name, predict_rate, scaled_pr]], columns = ['user_idx','user_name','store_idx','store_id','rec_name','predict_rate', 'scaled_pr'])
                    
                    store_rate = pd.concat([store_rate,temp],axis=0)
                    idx += 1
            
            
                store_rate.rename(columns={'user_idx':'타겟 고객 INDEX','user_name':'타겟 고객 이름','store_idx':'추천 매장 INDEX','store_id':'추천 매장 ID','rec_name':'추천 매장명','predict_rate':'예상 선호도', 'scaled_pr':'예상 가중 선호도'}, inplace=True)
                
                store_rate['예상 가중 선호도'] = 5 * (store_rate['예상 가중 선호도'] - min(store_rate['예상 가중 선호도'])) / (max(store_rate['예상 가중 선호도']) - min(store_rate['예상 가중 선호도']))
                store_rate = store_rate[store_rate['예상 가중 선호도'] >= 1]

                store_rate = store_rate[['추천 매장 ID','추천 매장명','예상 가중 선호도']]
            
                try: # CF랑 merge할 CB
                
                    result = CB_rec(main_product)
                
                except Exception as e1:
                    print('e1:',e1)
                    result = except_do(final_input,df)
            
                tmp_df = pd.merge(store_rate, result, how='outer', left_on='추천 매장 ID',right_on='store_id')
                tmp_df.loc[tmp_df['추천 매장 ID'].isnull(),'추천 매장 ID'] = tmp_df['store_id']
                tmp_df.loc[tmp_df['추천 매장명'].isnull(),'추천 매장명'] = tmp_df['store_name']
                tmp_df['추천 매장 ID'] = tmp_df['추천 매장 ID'].astype('int') 
                tmp_df.fillna(0,inplace=True)
                tmp_df.drop(['store_id','store_name'],axis=1,inplace=True)
                tmp_df['time_sim'] = 0
                
                total_list = tmp_df['추천 매장 ID'].unique().tolist() + [int(final_input)]
            
                voucher_df = store_voucher(total_list)
                
                # 예약 시간 오류 제거
                voucher_df = voucher_df[voucher_df['reserved_at']!='0000-00-00 00:00:00']
                voucher_df = voucher_df[voucher_df['reserved_at']!=0]

                voucher_df.dropna(inplace=True)
                voucher_df.rename(columns={'p.name':'product_name', 'name':'store_name'},inplace=True)
                voucher_df = voucher_df.astype({'reserved_at':'datetime64[ns]'})
                voucher_df['time'] = [i.time().hour for i in voucher_df['reserved_at']]

                time_df = time_sim_cal(voucher_df)
                
                input_store_id = int(final_input)
            
                if len(time_df[time_df.index == input_store_id]) == 0:
                    
                    input_product = [0 for i in range(len(time_df.columns))]
                    tmp_df['time_sim'] = 0

                else:
                    
                    input_product = time_df[time_df.index == input_store_id].iloc[0,0:].values.tolist()
                    compare_product_df = time_df[time_df.index != input_store_id].iloc[:,0:]

                    for product in compare_product_df.reset_index().values.tolist():

                        if len(set(product)) != 0:
                            pearson_sim = pearsonr(input_product,product[1:])
                            tmp_df.loc[tmp_df['추천 매장 ID']==product[0],'time_sim'] = pearson_sim[0]
                try:
                    
                    tmp_df['최종 점수'] = 0.3 * tmp_df['예상 가중 선호도'] + 0.6 * tmp_df['scaled_simv'] + 0.1 * tmp_df['time_sim']
                    
                except Exception as e3:
                    
                    print('e3:',e3)
                    tmp_df['scaled_simv'] = 2.5
                    tmp_df['최종 점수'] = 0.3 * tmp_df['예상 가중 선호도'] + 0.6 * tmp_df['scaled_simv'] + 0.1 * tmp_df['time_sim']

                result = tmp_df.sort_values(by='최종 점수', ascending=False).head(15)

            else:
                # CF 추천 매장들 중 지역이 동일한게 없어서 CB로만 
                result = CB_rec(main_product)
            
        except Exception as e2:
            print('e2:',e2)
            result = except_do(final_input, df)
            
            

        
    # 첫 상호작용의 경우 CB
    
    else:
       
        result = CB_rec(main_product)
        
    result.rename(columns={'product_name':'상품명','product_id':'상품 ID','store_id':'추천 매장 ID','store_name':'추천 매장명','scaled_simv':'CB 상품 유사도'}, inplace=True)
    result = result[result['추천 매장 ID'] != final_input].head(10)

    return result

In [32]:
# Sample Input

target_list_df = pd.DataFrame(columns=['live_store_id','reviewer_name','reviewer_stars'])
target_list_df['live_store_id'] = [25530, 28479, 948205, 10035]
target_list_df['reviewer_name'] = ['Yongshi Jung', 'Ryan', 96180, 67844]
target_list_df['reviewer_stars'] = [3,3,4,4]

In [34]:
inpdf  = pd.DataFrame([target_list_df.iloc[0]]).reset_index().drop('index',axis=1)
result = service(inpdf)

result[['추천 매장 ID','추천 매장명','최종 점수']]

Unnamed: 0,추천 매장 ID,추천 매장명,최종 점수
8,40175,원조한치,1.5
0,10016,명동한우방,1.453132
6,14408,미성옥,1.242247
11,40855,원조 곱창떡볶이 화떡,1.21886
86,40668,김가네,0.950413
93,10988,유가네닭갈비,0.950413
100,14955,월매네남원추어탕,0.950413
99,42225,사위식당,0.950413
98,13355,강남면옥,0.950413
97,42443,렛츠 케이-기프트(Let's K-gift) 고기야,0.950413
