In [1]:
import numpy as np
import pandas as pd
import time
from collections import deque

import tensorflow as tf
from six import next
from sklearn import preprocessing
import sys
from scipy.sparse import lil_matrix
from scipy.sparse import coo_matrix

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
def get_UserData100k():
    col_names = ["user", "age", "gender", "occupation","PostCode"]
    df = pd.read_csv('./data/ml100k/u.user', sep='|', header=None, names=col_names, engine='python')
    del df["PostCode"]
    df["user"]-=1
    df=pd.get_dummies(df,columns=[ "age", "gender", "occupation"])
    del df["user"]
    return df.values

def get_ItemData100k():
    col_names = ["movieid", "movietitle", "releasedate", "videoreleasedate","IMDbURL"
                ,"unknown","Action","Adventure","Animation","Childrens","Comedy","Crime","Documentary"
                ,"Drama","Fantasy","FilmNoir","Horror","Musical","Mystery","Romance","SciFi","Thriller"
                ,"War","Western"]
    df = pd.read_csv('./data/ml100k/u.item', sep='|', header=None, names=col_names, engine='python')
    df['releasedate'] = pd.to_datetime(df['releasedate'])
    df['year'],df['month']=zip(*df['releasedate'].map(lambda x: [x.year,x.month]))
    df['year']-=df['year'].min()
    df['year']/=df['year'].max()
    df['year']=df['year'].fillna(0.0)

    del df["month"]
    del df["movietitle"]
    del df["releasedate"]
    del df["videoreleasedate"]
    del df["IMDbURL"]
  
    df["movieid"]-=1
    del  df["movieid"]
    return df.values 

def read_process(filname, sep="\t"):
    col_names = ["user", "item", "rate", "st"]
    df = pd.read_csv(filname, sep=sep, header=None, names=col_names, engine='python')
    df["user"] -= 1
    df["item"] -= 1
    for col in ("user", "item"):
        df[col] = df[col].astype(np.int32)
    df["rate"] = df["rate"].astype(np.float32)
    return df


def get_data100k():
    global PERC
    df = read_process("./data/ml100k/u.data", sep="\t")
    rows = len(df)
    df = df.iloc[np.random.permutation(rows)].reset_index(drop=True)
    split_index = int(rows * PERC)
    df_train = df[0:split_index]
    df_test = df[split_index:].reset_index(drop=True)
    return df_train, df_test

def optimization(infer, regularizer, rate_batch, learning_rate=0.0005, reg=0.1):
    with tf.device(DEVICE):
        global_step = tf.train.get_global_step() #훈련 중단시 체크포인트
        assert global_step is not None
        cost_l2 = tf.nn.l2_loss(tf.subtract(infer, rate_batch)) #infer - rate_batch?
        cost = tf.add(cost_l2, regularizer)
        train_op = tf.train.AdamOptimizer(learning_rate).minimize(cost, global_step=global_step)
    return cost, train_op

def optimizationSparse(infer, regularizer, rate_batch, learning_rate=0.0005, reg=0.1):
    global_step = tf.train.get_global_step()
    assert global_step is not None
    cost_l2 = tf.nn.l2_loss(tf.subtract(infer, rate_batch))
    cost = tf.add(cost_l2, regularizer)
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(cost, global_step=global_step)
    return cost, train_op

def clip(x):
    return np.clip(x, 1.0, 5.0) #벗어나는 값들 위치시키기


In [273]:
############# ML 100k dataset ###########

BATCH_SIZE = 1000
PERC=0.9
USER_NUM = 943
ITEM_NUM = 1682
df_train, df_test = get_data100k()

#With Graph Features
MFSIZE=50
UW=0.05
IW=0.02
LR=0.00003
EPOCH_MAX = 200
tf.reset_default_graph()
DEVICE="/cpu:0"

# GraphRec(df_train, df_test,ItemData=False,UserData=False,Graph=True,Dataset='100k')

#############################################

In [208]:
AdjacencyUsers

array([[1. , 0.6, 0.8, ..., 0. , 0. , 0. ],
       [0.8, 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       ...,
       [1. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 1. , 0. , ..., 0. , 0. , 0. ]], dtype=float32)

In [18]:
class ShuffleIterator(object):

    def __init__(self, inputs, batch_size=10):
        self.inputs = inputs
        self.batch_size = batch_size
        self.num_cols = len(self.inputs)
        self.len = len(self.inputs[0])
        self.inputs = np.transpose(np.vstack([np.array(self.inputs[i]) for i in range(self.num_cols)]))

    def __len__(self):
        return self.len

    def __iter__(self):
        return self

    def __next__(self):
        return self.next()

    def next(self):
        ids = np.random.randint(0, self.len, (self.batch_size,)) #0과 len사이의  batch_size 크기의 랜덤 정수 생성
        out = self.inputs[ids, :] #뭐임?
        return [out[:, i] for i in range(self.num_cols)]


class OneEpochIterator(ShuffleIterator):
    def __init__(self, inputs, batch_size=10):
        super(OneEpochIterator, self).__init__(inputs, batch_size=batch_size)
        if batch_size > 0:
            self.idx_group = np.array_split(np.arange(self.len), np.ceil(self.len / batch_size)) #len 만큼의 array를 len/batch size의 올림만큼 분할
        else:
            self.idx_group = [np.arange(self.len)]
        self.group_id = 0

    def next(self):
        if self.group_id >= len(self.idx_group):
            self.group_id = 0
            raise StopIteration
        out = self.inputs[self.idx_group[self.group_id], :]
        self.group_id += 1
        return [out[:, i] for i in range(self.num_cols)]
    

def inferenceDense(phase,user_batch, item_batch,idx_user,idx_item, user_num, item_num,UReg=0.05,IReg=0.1):
    with tf.device(DEVICE): 
        user_batch = tf.nn.embedding_lookup(idx_user, user_batch, name="embedding_user")
        item_batch = tf.nn.embedding_lookup(idx_item, item_batch, name="embedding_item")
        
        
        ul1mf=tf.layers.dense(inputs=user_batch, units=MFSIZE,activation=tf.nn.crelu, kernel_initializer=tf.random_normal_initializer(stddev=0.01))
        il1mf=tf.layers.dense(inputs=item_batch, units=MFSIZE,activation=tf.nn.crelu, kernel_initializer=tf.random_normal_initializer(stddev=0.01))
        InferInputMF=tf.multiply(ul1mf, il1mf)


        infer=tf.reduce_sum(InferInputMF, 1, name="inference")

        regularizer = tf.add(UW*tf.nn.l2_loss(ul1mf), IW*tf.nn.l2_loss(il1mf), name="regularizer")

    return infer, regularizer


## Embedding + dot.product

In [220]:
def inferenceDense(phase,user_batch, item_batch,idx_user,idx_item, user_num, item_num,UReg=0.05,IReg=0.1):
    with tf.device(DEVICE): 
        user_batch = tf.nn.embedding_lookup(idx_user, user_batch, name="embedding_user") #idx_user에서 user_batch의 index값을 뽑음
        item_batch = tf.nn.embedding_lookup(idx_item, item_batch, name="embedding_item") #w_item 이 들어감
        
        
        ul1mf=tf.layers.dense(inputs=user_batch, units=MFSIZE,activation=tf.nn.crelu, kernel_initializer=tf.random_normal_initializer(stddev=0.01))
        print(ul1mf.shape)
        il1mf=tf.layers.dense(inputs=item_batch, units=MFSIZE,activation=tf.nn.crelu, kernel_initializer=tf.random_normal_initializer(stddev=0.01))
        print(il1mf.shape)
        InferInputMF=tf.multiply(ul1mf, il1mf)
        print(InferInputMF.shape)


        infer=tf.reduce_sum(InferInputMF, 1, name="inference") #reduce_sum은 모든 차원제거하고 원소합

        regularizer = tf.add(UW*tf.nn.l2_loss(ul1mf), IW*tf.nn.l2_loss(il1mf), name="regularizer") # l2 regularize
    return infer, regularizer, ul1mf, il1mf


- InferInputMF의 shape[1] 이 의미하는 것 (100) 
- mf 를 왜쓰냐
- mf 의 output 이 어떠한 방식으로 prediction의 형태를 띄느냐

## GraphRec

## 뜯어보기

In [274]:
AdjacencyUsers = np.zeros((USER_NUM,ITEM_NUM), dtype=np.float32) # N x M shape의 zero matrix 생성 (Adjacency)
DegreeUsers = np.zeros((USER_NUM,1), dtype=np.float32) #N x 1  shape의 zero vactor 생성 (Degree)

AdjacencyItems = np.zeros((ITEM_NUM,USER_NUM), dtype=np.float32) # M x N shape의 zero matrix 생성
DegreeItems =  np.zeros((ITEM_NUM,1), dtype=np.float32)  # M X 1 shape의 zero vactor 생성
for index, row in df_train.iterrows():
    userid=int(row['user']) #row돌면서 'user'와 'item' column의 값 저장
    itemid=int(row['item'])
    AdjacencyUsers[userid][itemid]=row['rate']/5.0 #train set의 rating / max 값을 numpy matrix에 저장
    AdjacencyItems[itemid][userid]=row['rate']/5.0 #동일, transpose matrix에
    DegreeUsers[userid][0]+=1
    DegreeItems[itemid][0]+=1

DUserMax=np.amax(DegreeUsers) #max값
DItemMax=np.amax(DegreeItems)
DegreeUsers=np.true_divide(DegreeUsers, DUserMax) #DegreeUsers의 array들 전부를 Max값으로 나누기
DegreeItems=np.true_divide(DegreeItems, DItemMax)

AdjacencyUsers=np.asarray(AdjacencyUsers,dtype=np.float32) #정규화된 rating이 적힌 matrix를 array로
AdjacencyItems=np.asarray(AdjacencyItems,dtype=np.float32)


UserFeatures= np.concatenate((np.identity(USER_NUM,dtype=np.bool_), AdjacencyUsers,DegreeUsers), axis=1)#np.identity concat
print(UserFeatures.shape) #
ItemFeatures= np.concatenate((np.identity(ITEM_NUM,dtype=np.bool_), AdjacencyItems,DegreeItems), axis=1) 




UsrDat=get_UserData100k()
UserFeatures=np.concatenate((UserFeatures,UsrDat), axis=1) 



ItmDat=get_ItemData100k()
ItemFeatures=np.concatenate((ItemFeatures,ItmDat), axis=1) 

UserFeaturesLength=UserFeatures.shape[1]
ItemFeaturesLength=ItemFeatures.shape[1]

print(UserFeatures.shape)
print(ItemFeatures.shape)


samples_per_batch = len(df_train) // BATCH_SIZE #90000 / 1000 = 90

iter_train = ShuffleIterator([df_train["user"],df_train["item"],df_train["rate"]],batch_size=BATCH_SIZE) #1000 X 90 이나옴

iter_test = OneEpochIterator([df_test["user"],df_test["item"],df_test["rate"]],batch_size=10000) #10000개?

#tensor 값을 할당할 placeholder 생성
user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") #dtype,shape default
item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
rate_batch = tf.placeholder(tf.float64, shape=[None])
phase = tf.placeholder(tf.bool, name='phase')

#tensor matrix생성
w_user = tf.constant(UserFeatures,name="userids", shape=[USER_NUM,UserFeatures.shape[1]],dtype=tf.float64) #943x2710을 constant
w_item = tf.constant(ItemFeatures,name="itemids", shape=[ITEM_NUM, ItemFeatures.shape[1]],dtype=tf.float64)#1682x2646


infer, regularizer, p,s = inferenceDense(phase,user_batch, item_batch,w_user,w_item, user_num=USER_NUM, item_num=ITEM_NUM)
global_step = tf.contrib.framework.get_or_create_global_step()
_, train_op = optimization(infer, regularizer, rate_batch, learning_rate=LR, reg=0.09)

init_op = tf.global_variables_initializer()
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.5
finalerror=-1
train_ls = []
test_ls= []
p_ls = []
s_ls = []

p_dict = dict()
s_dict = dict()


with tf.Session(config=config) as sess:
    sess.run(init_op)
    print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time"))
    errors = deque(maxlen=samples_per_batch)
    start = time.time()
    for i in range(EPOCH_MAX * samples_per_batch): #10 X 90
        #users, items, rates,y,m,d,dw,dy,w = next(iter_train)
        users, items, rates = next(iter_train)
        _, pred_batch,p_mat,s_mat  = sess.run([train_op, infer, p,s], feed_dict={user_batch: users,
                                                               item_batch: items,
                                                               rate_batch: rates,
                                                               phase:True})
        pred_batch = clip(pred_batch)
        train_ls.append(pred_batch)
        errors.append(np.power(pred_batch - rates, 2))
        if i % samples_per_batch == 0: #1 Epoch 일때마다 / batch가 90단위마다
            train_err = np.sqrt(np.mean(errors))
            test_err2 = np.array([])
            degreelist=list()
            predlist=list()
            for users, items, rates in iter_test: #test의 pred_batch
                pred_batch = sess.run(infer, feed_dict={user_batch: users,
                                                        item_batch: items,                                                                                             
                                                        phase:False})

                pred_batch = clip(pred_batch)
                test_ls.append(pred_batch)
                test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2))
            end = time.time()
            test_err = np.sqrt(np.mean(test_err2))
            finalerror=test_err
            print("{:3d},{:f},{:f},{:f}(s)".format(i // samples_per_batch, train_err, test_err, end - start))
            start = end
            
    for users, items, rates in iter_test: #test의 pred_batch
        pred_batch, p_mat, s_mat = sess.run([infer, p, s], feed_dict={user_batch: users,
                                                item_batch: items,                                                                                             
                                                phase:False})
    
        p_ls.append(p_mat)
        s_ls.append(s_mat)

        concat_p = np.vstack(p_ls)
        concat_s = np.vstack(s_ls)

        user_arr = df_test.user.values
        for idx, user in enumerate(user_arr) : 
            if user not in p_dict : 
                p_dict[user] = concat_p[idx]

        item_arr = df_test.item.values
        for idx, item in enumerate(item_arr) : 
            if item not in s_dict : 
                s_dict[item] = concat_s[idx]

KeyboardInterrupt: 

In [243]:
p_dict

{497: array([0.24521923, 0.34529819, 0.20732286, 0.        , 0.        ,
        0.        , 0.14826147, 0.        , 0.13111128, 0.0461241 ,
        0.08179421, 0.09327078, 0.19774278, 0.        , 0.        ,
        0.        , 0.18676064, 0.04274155, 0.        , 0.05151122,
        0.0019209 , 0.        , 0.        , 0.        , 0.14815984,
        0.15020242, 0.        , 0.        , 0.16109034, 0.        ,
        0.12779294, 0.        , 0.        , 0.31626703, 0.        ,
        0.25111608, 0.        , 0.16967835, 0.        , 0.        ,
        0.        , 0.        , 0.21937002, 0.30722205, 0.        ,
        0.10564778, 0.        , 0.        , 0.20791312, 0.        ,
        0.        , 0.        , 0.        , 0.175218  , 0.07821176,
        0.1388813 , 0.        , 0.12504783, 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.11662055, 0.24935906,
        0.21273065, 0.        , 0.        , 0.16957221, 0.        ,
        0.        , 0.25334151, 0.2150143 ,

In [275]:
AdjacencyUsers = np.zeros((USER_NUM,ITEM_NUM), dtype=np.float32) # N x M shape의 zero matrix 생성 (Adjacency)
DegreeUsers = np.zeros((USER_NUM,1), dtype=np.float32) #N x 1  shape의 zero vactor 생성 (Degree)

AdjacencyItems = np.zeros((ITEM_NUM,USER_NUM), dtype=np.float32) # M x N shape의 zero matrix 생성
DegreeItems =  np.zeros((ITEM_NUM,1), dtype=np.float32)  # M X 1 shape의 zero vactor 생성
for index, row in df_train.iterrows():
    userid=int(row['user']) #row돌면서 'user'와 'item' column의 값 저장
    itemid=int(row['item'])
    AdjacencyUsers[userid][itemid]=row['rate']/5.0 #train set의 rating / max 값을 numpy matrix에 저장
    AdjacencyItems[itemid][userid]=row['rate']/5.0 #동일, transpose matrix에
    DegreeUsers[userid][0]+=1
    DegreeItems[itemid][0]+=1

DUserMax=np.amax(DegreeUsers) #max값
DItemMax=np.amax(DegreeItems)
DegreeUsers=np.true_divide(DegreeUsers, DUserMax) #DegreeUsers의 array들 전부를 Max값으로 나누기
DegreeItems=np.true_divide(DegreeItems, DItemMax)

AdjacencyUsers=np.asarray(AdjacencyUsers,dtype=np.float32) #정규화된 rating이 적힌 matrix를 array로
AdjacencyItems=np.asarray(AdjacencyItems,dtype=np.float32)


UserFeatures= np.concatenate((np.identity(USER_NUM,dtype=np.bool_), AdjacencyUsers,DegreeUsers), axis=1)#np.identity concat
print(UserFeatures.shape) #
ItemFeatures= np.concatenate((np.identity(ITEM_NUM,dtype=np.bool_), AdjacencyItems,DegreeItems), axis=1) 




UsrDat=get_UserData100k()
UserFeatures=np.concatenate((UserFeatures,UsrDat), axis=1) 



ItmDat=get_ItemData100k()
ItemFeatures=np.concatenate((ItemFeatures,ItmDat), axis=1) 

UserFeaturesLength=UserFeatures.shape[1]
ItemFeaturesLength=ItemFeatures.shape[1]

print(UserFeatures.shape)
print(ItemFeatures.shape)


samples_per_batch = len(df_train) // BATCH_SIZE #90000 / 1000 = 90

iter_train = ShuffleIterator([df_train["user"],df_train["item"],df_train["rate"]],batch_size=BATCH_SIZE) #1000 X 90 이나옴

iter_test = OneEpochIterator([df_test["user"],df_test["item"],df_test["rate"]],batch_size=10000) #10000개?

#tensor 값을 할당할 placeholder 생성
user_batch = tf.placeholder(tf.int32, shape=[None], name="id_user") #dtype,shape default
item_batch = tf.placeholder(tf.int32, shape=[None], name="id_item")
rate_batch = tf.placeholder(tf.float64, shape=[None])
phase = tf.placeholder(tf.bool, name='phase')

#tensor matrix생성
w_user = tf.constant(UserFeatures,name="userids", shape=[USER_NUM,UserFeatures.shape[1]],dtype=tf.float64) #943x2710을 constant
w_item = tf.constant(ItemFeatures,name="itemids", shape=[ITEM_NUM, ItemFeatures.shape[1]],dtype=tf.float64)#1682x2646


infer, regularizer, p,s = inferenceDense(phase,user_batch, item_batch,w_user,w_item, user_num=USER_NUM, item_num=ITEM_NUM)
global_step = tf.contrib.framework.get_or_create_global_step()
_, train_op = optimization(infer, regularizer, rate_batch, learning_rate=LR, reg=0.09)

init_op = tf.global_variables_initializer()
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.5
finalerror=-1
train_ls = []
test_ls= []
p_ls = []
s_ls = []

p_dict = dict()
s_dict = dict()

total_df = pd.concat([df_train,df_test])
iter_final = OneEpochIterator([total_df["user"],total_df["item"],total_df["rate"]],batch_size=100000) #10000개?

with tf.Session(config=config) as sess:
    sess.run(init_op)
    print("{} {} {} {}".format("epoch", "train_error", "val_error", "elapsed_time"))
    errors = deque(maxlen=samples_per_batch)
    start = time.time()
    for i in range(EPOCH_MAX * samples_per_batch): #10 X 90
        #users, items, rates,y,m,d,dw,dy,w = next(iter_train)
        users, items, rates = next(iter_train)
        _, pred_batch,p_mat,s_mat  = sess.run([train_op, infer, p,s], feed_dict={user_batch: users,
                                                               item_batch: items,
                                                               rate_batch: rates,
                                                               phase:True})
        pred_batch = clip(pred_batch)
        train_ls.append(pred_batch)
        errors.append(np.power(pred_batch - rates, 2))
        if i % samples_per_batch == 0: #1 Epoch 일때마다 / batch가 90단위마다
            train_err = np.sqrt(np.mean(errors))
            test_err2 = np.array([])
            degreelist=list()
            predlist=list()
            for users, items, rates in iter_test: #test의 pred_batch
                pred_batch = sess.run(infer, feed_dict={user_batch: users,
                                                        item_batch: items,                                                                                             
                                                        phase:False})

                pred_batch = clip(pred_batch)
                test_ls.append(pred_batch)
                test_err2 = np.append(test_err2, np.power(pred_batch - rates, 2))
            end = time.time()
            test_err = np.sqrt(np.mean(test_err2))
            finalerror=test_err
            print("{:3d},{:f},{:f},{:f}(s)".format(i // samples_per_batch, train_err, test_err, end - start))
            start = end
            
    for users, items, rates in iter_final: #test의 pred_batch
        pred_batch, p_mat, s_mat = sess.run([infer, p, s], feed_dict={user_batch: users,
                                                item_batch: items,                                                                                             
                                                phase:False})
    
        p_ls.append(p_mat)
        s_ls.append(s_mat)

        concat_p = np.vstack(p_ls)
        concat_s = np.vstack(s_ls)

        user_arr = total_df.user.values
        for idx, user in enumerate(user_arr) : 
            if user not in p_dict : 
                p_dict[user] = concat_p[idx]

        item_arr = total_df.item.values
        for idx, item in enumerate(item_arr) : 
            if item not in s_dict : 
                s_dict[item] = concat_s[idx]

(943, 2626)
(943, 2710)
(1682, 2646)
(?, 100)
(?, 100)
(?, 100)
epoch train_error val_error elapsed_time
  0,2.771642,2.770830,5.765494(s)
  1,2.383160,2.086649,7.274378(s)
  2,1.944071,1.849416,7.125306(s)
  3,1.733188,1.658056,7.079903(s)
  4,1.572143,1.527133,7.042967(s)
  5,1.455037,1.434316,7.041154(s)
  6,1.387246,1.370963,7.055392(s)
  7,1.324758,1.320997,7.129567(s)
  8,1.278790,1.277502,7.180947(s)
  9,1.236808,1.244630,7.351565(s)
 10,1.206185,1.214087,7.190862(s)
 11,1.178179,1.183615,7.286395(s)
 12,1.152580,1.163493,7.949858(s)
 13,1.131606,1.144066,7.310003(s)
 14,1.112657,1.125029,7.230132(s)
 15,1.089901,1.107610,7.622893(s)
 16,1.073208,1.093317,7.881595(s)
 17,1.058345,1.083572,8.184178(s)
 18,1.044145,1.070470,8.237676(s)
 19,1.039363,1.057311,8.187098(s)
 20,1.025974,1.047160,7.914568(s)
 21,1.007762,1.039887,7.678673(s)
 22,1.004019,1.027506,7.464377(s)
 23,0.991164,1.019244,7.257315(s)
 24,0.984327,1.012605,7.178988(s)
 25,0.977751,1.005725,7.194723(s)
 26,0.97243

In [271]:
df_train[(df_train['user'] == 547) & (df_train['item'] == 689)]

Unnamed: 0,user,item,rate,st
61181,547,689,3.0,891042475


 user, item, rate

294, 736 -> 5

547, 689 -> 3

In [262]:
#epoch 1
print(sum(p_dict[294]*s_dict[736]))
print(sum(p_dict[547]*s_dict[689]))

1.140994522037851
1.884972591886624


In [269]:
#epoch 10
print(sum(p_dict[294]*s_dict[736]))
print(sum(p_dict[547]*s_dict[689]))

2.771330827017519
4.224728656036783


In [276]:
#epoch 200
print(sum(p_dict[294]*s_dict[736]))
print(sum(p_dict[547]*s_dict[689]))

3.7873552716037886
3.1124059492027465


In [None]:
ㅓㅗ