## test-time evaluation test

#### basic settings

In [1]:
import os

import pandas as pd
import numpy as np
import random
from google.cloud import storage


import pandas as pd
import numpy as np
import sqlalchemy
import ast
from core.config import DATABASE_URL

In [2]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="./core/storage.json"
storage_client = storage.Client()
bucket = storage_client.bucket('foodcom_als_model')

In [3]:
def get_db_engine():
    '''Returns a connection and a metadata object'''
    engine = sqlalchemy.create_engine(DATABASE_URL, echo=True)
    #meta = sqlalchemy.MetaData(bind=engine, reflect=True)
    return engine  # , meta

def update_sql(df:pd.DataFrame) -> None:
    df.to_sql(name='meta_data',
                con=engine,
                schema='public',
                if_exists='replace',
                index=False,
                dtype={
                    'user_count': sqlalchemy.types.INTEGER(),
                    'recipe_count': sqlalchemy.types.INTEGER(),
                    'interaction_count': sqlalchemy.types.INTEGER(),
                    'best_model': sqlalchemy.types.Text(),
                    'batch_tag': sqlalchemy.types.INTEGER(),
                    'inference_traffic': sqlalchemy.types.Text()
                    }
                )
    
engine = get_db_engine()
meta_data = pd.read_sql(f"select * from public.meta_data", engine)

2022-06-09 02:13:32,264 INFO sqlalchemy.engine.Engine select pg_catalog.version()
2022-06-09 02:13:32,265 INFO sqlalchemy.engine.Engine [raw sql] {}
2022-06-09 02:13:32,269 INFO sqlalchemy.engine.Engine select current_schema()
2022-06-09 02:13:32,269 INFO sqlalchemy.engine.Engine [raw sql] {}
2022-06-09 02:13:32,274 INFO sqlalchemy.engine.Engine show standard_conforming_strings
2022-06-09 02:13:32,274 INFO sqlalchemy.engine.Engine [raw sql] {}
2022-06-09 02:13:32,279 INFO sqlalchemy.engine.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
2022-06-09 02:13:32,279 INFO sqlalchemy.engine.Engine [generated in 0.00063s] {'name': 'select * from public.meta_data'}
2022-06-09 02:13:32,286 INFO sqlalchemy.engine.Engine select * from public.meta_data
2022-06-09 02:13:32,286 INFO sqlalchemy.engine.Engine [raw sql] {}


In [4]:
temp = pd.read_sql(f"select * from public.meta_data", engine)
#update_sql(temp)

2022-06-09 02:13:32,386 INFO sqlalchemy.engine.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
2022-06-09 02:13:32,387 INFO sqlalchemy.engine.Engine [cached since 0.1079s ago] {'name': 'select * from public.meta_data'}
2022-06-09 02:13:32,393 INFO sqlalchemy.engine.Engine select * from public.meta_data
2022-06-09 02:13:32,394 INFO sqlalchemy.engine.Engine [raw sql] {}


In [5]:
temp

Unnamed: 0,user_count,recipe_count,interaction_count,best_model,batch_tag,inference_traffic
0,9726,147302,580526,"['MultiDAE', 'MultiVAE', 'RecVAE']",2,"[10.1, 8.7, 8.5, 27.3, 27.3, 27.3]"


#### preference matrix downloader (by user)

In [6]:
from io import BytesIO
def download_preference_matrix(meta_data:pd.DataFrame, user_id:int) -> list:
    '''
    아이템 id가 들어있는 matrix를 반환합니다.
    (아이템을 뽑기 쉽게 하기 위해 제일 뒤에 있는 아이템의 점수가 가장 높게 reverse)
    '''
    model_preferences = list()
    model_list = ast.literal_eval(meta_data['best_model'].item())
    for model in model_list:
        downloaded_model = bucket.blob(f'{model}.npy').download_as_string()
        downloaded_model = BytesIO(downloaded_model)
        preference_matrix = np.load(downloaded_model, allow_pickle=True).item()[user_id]
        model_preferences.append(list(reversed(preference_matrix)))
    
    return model_preferences


In [7]:
preference_matrix = download_preference_matrix(meta_data, user_id=777)
a, b, c = preference_matrix
len(a), len(b), len(c)

(100, 100, 100)

#### recommend top 10 recipes

In [8]:
meta_data

Unnamed: 0,user_count,recipe_count,interaction_count,best_model,batch_tag,inference_traffic
0,9726,147302,580526,"['MultiDAE', 'MultiVAE', 'RecVAE']",2,"[10.1, 8.7, 8.5, 27.3, 27.3, 27.3]"


In [9]:
def recommend_top_k(meta_data:pd.DataFrame, preference_matrix:list, top_k:int = 10) -> list:
    recommended_list = list()
    recommender_list = list()
    a1, a2, a3, b1, b2, b3 = ast.literal_eval(meta_data['inference_traffic'].item())
    while len(recommended_list) <= 10:
        sampling_list = [np.random.beta(a1, b1), np.random.beta(a2, b2), np.random.beta(a3, b3)]
        best_model = np.argsort(sampling_list)[-1]
        while True:
            rec_item = preference_matrix[best_model].pop()
            if rec_item not in recommended_list:
                recommended_list.append(rec_item)
                recommender_list.append(best_model)
                break
    return recommended_list, recommender_list
    

In [10]:
recommended_list, recommender_list = recommend_top_k(meta_data, preference_matrix, top_k = 10)
print(recommended_list)
print(recommender_list)

[1828, 5841, 19404, 30387, 4081, 641, 3685, 50761, 12822, 5838, 12311]
[0, 2, 1, 1, 0, 0, 1, 0, 2, 1, 0]
