In [3]:
import os
import random
import numpy as np
import pandas as pd
from tqdm import tqdm

import torch

import warnings

warnings.filterwarnings(action='ignore')
torch.set_printoptions(sci_mode=True)

DATA_PATH = '/opt/ml/final-project-level3-recsys-05/Model/Model-Experiment/data'
MODEL_PATH = '/opt/ml/final-project-level3-recsys-05/Model/Model-Experiment/model'
VAL_TO_IDX_DATA_PATH = '/opt/ml/final-project-level3-recsys-05/Model/Model-Server/data'

PICKLE_PROTOCOL = 4

In [4]:
import gc

gc.collect()
torch.cuda.empty_cache()

In [5]:
import json

with open(os.path.join(VAL_TO_IDX_DATA_PATH, 'problemId_to_idx.json'), 'r', encoding = 'utf-8') as f:
    problemId_to_idx = json.load(f)

with open(os.path.join(VAL_TO_IDX_DATA_PATH, 'idx_to_problemId.json'), 'r', encoding = 'utf-8') as f:
    idx_to_problemId = json.load(f)

In [6]:
user_df = pd.read_csv(os.path.join(DATA_PATH, 'user.csv'))

In [7]:
def get_problems_to_idx(problems):
    problems = eval(problems)
    ret = []
    for problemId in problems:
        try: ret.append(problemId_to_idx[problemId])
        except: continue
    return ret

user_df['problems_to_idx'] = user_df['problems'].apply(lambda x : get_problems_to_idx(x))

In [8]:
new_user_df = user_df[user_df['problems_to_idx'].apply(lambda x : True if len(x) >= 15 else False)].reset_index(drop = False)

In [9]:
user_name_to_idx = {}

for idx, user_name in enumerate(new_user_df['user_name'].tolist()):
    user_name_to_idx[user_name] = idx

In [10]:
mat = torch.zeros(size = (len(user_name_to_idx), len(problemId_to_idx)))
valid_dict = {}

group_df = new_user_df.groupby('user_name')

for user_name, df in group_df:
    random.seed(22)
    total = df['problems_to_idx'].values[0]
    # valid = random.sample(total, 10)
    # train = list(set(total) - set(valid))
    
    mat[user_name_to_idx[user_name], total] = 1

    # valid_dict[user_name_to_idx[user_name]] = valid

In [11]:
class EASE():
    def __init__(self, reg):
        self.reg = reg
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    def clear_memory(self):
        gc.collect()
        torch.cuda.empty_cache()

    def fit(self, X):
        X = X.to(self.device)
        G = X.t() @ X
        diagIndices = torch.eye(G.shape[0]) == 1
        G[diagIndices] += self.reg

        P = G.inverse()
        B = P / (-1 * P.diag())
        B[diagIndices] = 0

        self.B = B.cpu()
    
    def predict(self, X):
        output = (X.to(self.device) @ self.B.to(self.device)).cpu()
        return output

In [12]:
def get_hit(pred_list, true_list):
    hit_list = set(true_list) & set(pred_list)
    hit = len(hit_list) / len(true_list)
    return hit

In [13]:
reg = 1000
model = EASE(reg = reg)
model.fit(mat)
model.clear_memory()

In [14]:
import pickle

with open(os.path.join(MODEL_PATH, 'ease.pickle'), 'wb') as file:
    pickle.dump(model, file, protocol = PICKLE_PROTOCOL)

In [15]:
import pickle

with open(os.path.join(MODEL_PATH, 'ease.pickle'), 'rb') as file: 
    load_model = pickle.load(file)

In [11]:
for reg in tqdm([1000]):
    model = EASE(reg = reg)
    model.fit(mat)
    model.clear_memory()

    output = model.predict(mat)
    model.clear_memory()

    output[mat == 1] = -np.Inf
    rec_list = output.argsort(dim = 1)

    hit = 0
    for idx, rec in enumerate(rec_list):
        pred = rec.cpu().numpy().tolist()[::-1][:10]
        true = valid_dict[idx]
        hit += get_hit(pred, true)

    hit /= len(output)
    print(f'reg: {reg} | hit : {hit:.5f}')

100%|██████████| 1/1 [02:11<00:00, 131.53s/it]

reg: 1000 | hit : 0.54859





In [26]:
import requests

url = 'http://101.101.218.250:30005/test/'

data = {
    'key' : 123456,
    'username' : '2712qwer',
}

res = requests.post(url, json = data)
res = res.json()

In [27]:
res

{'model': {'multi_modal_user_seq_and_ease': ['14238',
   '1759',
   '12996',
   '11058',
   '2660',
   '10026',
   '1446',
   '4811',
   '2669',
   '12872'],
  'pretrained_user_seq_and_ease': ['13549',
   '13913',
   '1806',
   '2252',
   '2638',
   '1062',
   '2589',
   '1766',
   '2668',
   '5557'],
  'user_seq': ['5931',
   '16928',
   '13424',
   '4650',
   '5921',
   '6156',
   '4386',
   '1457',
   '2146',
   '17129'],
  'pretrained_user_seq': ['13424',
   '1038',
   '2623',
   '2026',
   '1469',
   '12851',
   '3967',
   '12969',
   '1068',
   '2636'],
  'multi_modal_user_seq': ['23631',
   '2916',
   '12869',
   '5972',
   '13700',
   '12969',
   '2479',
   '1245',
   '21738',
   '17259'],
  'ease': ['15652',
   '2447',
   '1012',
   '18870',
   '1966',
   '11866',
   '1149',
   '2156',
   '10824',
   '2609']},
 'tag': {'lately_preference_tags': ['그래프 이론', '그래프 탐색', '너비 우선 탐색'],
  'total_preference_tags': ['구현', '수학', '사칙연산']},
 'rank': 'Silver II'}

In [22]:
import requests
from bs4 import BeautifulSoup

headers = {'User-Agent': "Mediapartners-Google"}
user_id = 'koosaga'
url = f'https://www.acmicpc.net/user/{user_id}'
response = requests.request("GET", url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
user_seq = [problem.text for problem in soup.select('body > div.wrapper > div.container.content > div.row > div:nth-child(2) > div > div.col-md-9 > div:nth-child(2) > div.panel-body > div > a')]

In [23]:
import requests
from bs4 import BeautifulSoup

headers = {'User-Agent': "Mediapartners-Google"}
user_id = 'koosaga'
url = f'https://www.acmicpc.net/user/{user_id}'
response = requests.request("GET", url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
user_seq = soup.find('div', {'class':'problem-list'}).text.split()

In [19]:
problems

['1000',
 '1001',
 '1002',
 '1003',
 '1004',
 '1005',
 '1006',
 '1007',
 '1008',
 '1009',
 '1010',
 '1011',
 '1012',
 '1013',
 '1014',
 '1015',
 '1016',
 '1017',
 '1018',
 '1019',
 '1020',
 '1021',
 '1022',
 '1023',
 '1024',
 '1025',
 '1026',
 '1027',
 '1028',
 '1029',
 '1030',
 '1031',
 '1032',
 '1033',
 '1034',
 '1037',
 '1038',
 '1039',
 '1041',
 '1043',
 '1044',
 '1045',
 '1047',
 '1049',
 '1051',
 '1052',
 '1053',
 '1056',
 '1057',
 '1058',
 '1059',
 '1062',
 '1063',
 '1064',
 '1065',
 '1067',
 '1068',
 '1069',
 '1070',
 '1071',
 '1072',
 '1073',
 '1074',
 '1075',
 '1076',
 '1077',
 '1080',
 '1081',
 '1082',
 '1083',
 '1084',
 '1085',
 '1086',
 '1087',
 '1088',
 '1089',
 '1090',
 '1092',
 '1093',
 '1094',
 '1100',
 '1101',
 '1102',
 '1103',
 '1105',
 '1106',
 '1107',
 '1110',
 '1113',
 '1114',
 '1115',
 '1116',
 '1119',
 '1120',
 '1121',
 '1124',
 '1126',
 '1127',
 '1128',
 '1129',
 '1132',
 '1135',
 '1138',
 '1139',
 '1140',
 '1141',
 '1143',
 '1144',
 '1145',
 '1146',
 '1149',
 