In [1]:
import os
import random
import numpy as np
import pandas as pd
from tqdm import tqdm

import torch

import warnings

warnings.filterwarnings(action='ignore')
torch.set_printoptions(sci_mode=True)

DATA_PATH = '/opt/ml/final-project-level3-recsys-05/Model/Model-Experiment/data'
MODEL_PATH = '/opt/ml/final-project-level3-recsys-05/Model/Model-Experiment/model'
VAL_TO_IDX_DATA_PATH = '/opt/ml/final-project-level3-recsys-05/Model/Model-Server/data'

In [4]:
import pickle

with open(os.path.join(MODEL_PATH, 'ease.pickle'), 'rb') as file: 
    load_model = pickle.load(file)

In [5]:
load_model

<__main__.EASE at 0x7f99a46c4f10>

In [2]:
import gc

gc.collect()
torch.cuda.empty_cache()

In [3]:
import json

with open(os.path.join(VAL_TO_IDX_DATA_PATH, 'problemId_to_idx.json'), 'r', encoding = 'utf-8') as f:
    problemId_to_idx = json.load(f)

with open(os.path.join(VAL_TO_IDX_DATA_PATH, 'idx_to_problemId.json'), 'r', encoding = 'utf-8') as f:
    idx_to_problemId = json.load(f)

In [4]:
user_df = pd.read_csv(os.path.join(DATA_PATH, 'user.csv'))

In [5]:
def get_problems_to_idx(problems):
    problems = eval(problems)
    ret = []
    for problemId in problems:
        try: ret.append(problemId_to_idx[problemId])
        except: continue
    return ret

user_df['problems_to_idx'] = user_df['problems'].apply(lambda x : get_problems_to_idx(x))

In [6]:
new_user_df = user_df[user_df['problems_to_idx'].apply(lambda x : True if len(x) >= 15 else False)].reset_index(drop = False)

In [7]:
user_name_to_idx = {}

for idx, user_name in enumerate(new_user_df['user_name'].tolist()):
    user_name_to_idx[user_name] = idx

In [8]:
mat = torch.zeros(size = (len(user_name_to_idx), len(problemId_to_idx)))
valid_dict = {}

group_df = new_user_df.groupby('user_name')

for user_name, df in group_df:
    random.seed(22)
    total = df['problems_to_idx'].values[0]
    # valid = random.sample(total, 10)
    # train = list(set(total) - set(valid))
    
    mat[user_name_to_idx[user_name], total] = 1

    # valid_dict[user_name_to_idx[user_name]] = valid

In [3]:
class EASE():
    def __init__(self, reg):
        self.reg = reg
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    def clear_memory(self):
        gc.collect()
        torch.cuda.empty_cache()

    def fit(self, X):
        X = X.to(self.device)
        G = X.t() @ X
        diagIndices = torch.eye(G.shape[0]) == 1
        G[diagIndices] += self.reg

        P = G.inverse()
        B = P / (-1 * P.diag())
        B[diagIndices] = 0

        self.B = B.cpu()
    
    def predict(self, X):
        output = (X.to(self.device) @ self.B.to(self.device)).cpu()
        return output

In [10]:
def get_hit(pred_list, true_list):
    hit_list = set(true_list) & set(pred_list)
    hit = len(hit_list) / len(true_list)
    return hit

In [11]:
reg = 1000
model = EASE(reg = reg)
model.fit(mat)
model.clear_memory()

In [12]:
import pickle

with open(os.path.join(MODEL_PATH, 'ease.pickle'), 'wb') as file:
    pickle.dump(model, file)

In [22]:
import pickle

with open(os.path.join(MODEL_PATH, 'ease.pickle'), 'rb') as file: 
    load_model = pickle.load(file)

In [11]:
for reg in tqdm([1000]):
    model = EASE(reg = reg)
    model.fit(mat)
    model.clear_memory()

    output = model.predict(mat)
    model.clear_memory()

    output[mat == 1] = -np.Inf
    rec_list = output.argsort(dim = 1)

    hit = 0
    for idx, rec in enumerate(rec_list):
        pred = rec.cpu().numpy().tolist()[::-1][:10]
        true = valid_dict[idx]
        hit += get_hit(pred, true)

    hit /= len(output)
    print(f'reg: {reg} | hit : {hit:.5f}')

100%|██████████| 1/1 [02:11<00:00, 131.53s/it]

reg: 1000 | hit : 0.54859



