In [55]:
import pandas as pd
import numpy as np

In [56]:
df = pd.read_csv('dataset/train_dataset.txt', delimiter='\t')
df.head()

Unnamed: 0,user_id,item_id,rate,review_text
0,A2YKWYC3WQJX5J,B00106AC06,1,I usually love the Motions conditioners and ma...
1,A2LXC5ZHHP0WXP,B00AE07BMQ,1,Axe messy look styling gum is a product that w...
2,A3HLTHHLPKLRQA,B00AIQOKDY,1,I have always found liquid soap to be as much ...
3,A6N1DC5AMPLSK,B000F6RFX4,1,I've tried plenty of products that claim to he...
4,ALNFHVS3SC4FV,B0020122ZS,1,Suave Kids is one of my favorite brands of sha...


In [57]:
users = set(df['user_id'])
items = set(df['item_id'])

grouping = df.groupby(by=['user_id', 'item_id'])
user_to_id = { x:i for i,x in enumerate(users) }
item_to_id = { x:i for i,x in enumerate(items) }

R = np.zeros((len(users), len(items)))

for user, item in grouping.groups.keys():
    user_id = user_to_id[user]
    item_id = item_to_id[item]
    R[user_id, item_id] = 1

In [58]:
rng = np.random.default_rng(seed=1234)

def choice(size, sample_size):
    n, m = size, sample_size
    return rng.choice(n, m, replace=False)

def random_choice(x, y, n, n_sample):
    i = choice(n, n_sample)
    return x[i], y[i]

def random_points(n, n_sample, m=-1):
    I = choice(n, n_sample)
    if m < 0:
        m = n
    J = choice(m, n_sample)
    return I, J

In [200]:
def error(R, Q, P, reg):
    return (R - Q@P.T).sum() + reg * (Q@Q.T).sum() + (P@P.T).sum()

In [259]:
# learning rate
LR = 0.2
END_LR = 0.01
# regularization (lambda)
r = 0.001

# maximum iterations
n_epochs = 100
# n_epochs = 100

n_latent = 64

n_users = len(users)
n_items = len(items)

# error check steps
erc_step = 100

# min error threshold
eth = 1e-4

# sample size
# s = min(n_users, n_items) // 3
s = 1

w = 1000

def norm(M):
    n = (M*M).sum(axis=1, keepdims=True)
    return np.sqrt(n)

def normalize(M):
    nn = norm(M)
    if nn.sum() > 0:
        return M / nn
    return M

norm_step = 1

Q0 = rng.random((n_users, n_latent))
P0 = rng.random((n_items, n_latent))


In [264]:

Q = Q0[:,:]
P = P0[:,:]
Q = normalize(Q)
P = normalize(P.T).T

for epoch in range(n_epochs):
    lr = LR - (LR-END_LR) * np.exp(0.01*(epoch - n_epochs)) 

    # I, J = 
    for i, j in zip(*random_points(n_users, s, n_items)):
    # update P and Q
        i0, i1 = max(0, i-w), min(i+w, n_users-1)
        j0, j1 = max(0, i-w), min(j+w, n_items-1)

        q = Q[i0:i1, :]
        p = P[j0:j1, :]

        qp = q @ p.T

        e = R[i0:i1, j0:j1] - qp
        
        # print(f'i, j, e: {i} {j} {e}')

        # print("updating Q with", (-e * P[j, :] + 2*r*Q[i, :]))
        
        q = q - lr * (-e @ p + 2*r*q)
        p = p - lr * (-e.T @ q + 2*r*p)

        # q = np.where(q > 0, q, 0.)
        # p = np.where(p > 0, p, 0.)
        
        if epoch % norm_step == 0:
            q = normalize(q)
            p = normalize(p)


        Q[i0:i1, :] = q
        P[j0:j1, :] = p

    if epoch % erc_step == 0 and abs(error(R, Q, P, reg=r)) < eth:
        print('breaking at', epoch)
        break

  p = p - lr * (-e.T @ q + 2*r*p)
  p = p - lr * (-e.T @ q + 2*r*p)


In [265]:
error(R, Q, P, reg=r)

nan

In [262]:
P1, Q1 = P, Q
(Q @ P.T - R).sum()

493839.2221619478

In [263]:
# learning rate
LR = 0.2
END_LR = 0.01
# regularization (lambda)
r = 0.001

# maximum iterations
n_epochs = 100
# n_epochs = 100

n_latent = 64

n_users = len(users)
n_items = len(items)

# error check steps
erc_step = 100

# min error threshold
eth = 1e-4


def norm(M):
    n = (M*M).sum(axis=1, keepdims=True)
    return np.sqrt(n)

def normalize(M):
    nn = norm(M)
    if nn.sum() > 0:
        return M / nn
    return M

norm_step = 20

n_cv_users = n_users // 10
n_cv_items = n_items // 10

def normal(v):
    return v / np.sqrt(np.dot(v, v))


def MF(R, Q, P, LR=LR, END_LR=END_LR, n_users=n_users, n_items=n_items):
    # Q = rng.random((n_users, n_latent))
    # P = rng.random((n_items, n_latent))
    Q = normalize(Q)
    P = normalize(P.T).T
    R = R[:n_users, :n_items]

    for epoch in range(n_epochs):
        lr = LR - (LR-END_LR) * np.exp(0.01*(epoch - n_epochs)) 

        # I, J = 
        for i, j in zip(range(n_users), range(n_items)):
        # update P and Q

            q = Q[i, :]
            p = P[j, :]

            qp = q @ p.T
            e = R[i, j] - qp
            
            q = q - lr * (-e * p + 2*r*q)
            p = p - lr * (-e * q + 2*r*p)

            # q = np.where(q > 0, q, 0.)
            # p = np.where(p > 0, p, 0.)
            
            if epoch % norm_step == 0:
                q = normal(q)
                p = normal(p)
                
            Q[i, :] = q
            P[j, :] = p

        if epoch % erc_step == 0 and abs(error(R, Q, P, reg=r)) < eth:
            print('breaking at', epoch)
            break
    
    return error(R, Q, P, reg=r)

# bLS, bLE = None, None
# x = np.Inf
# for LS in [0.1, 0.2, 0.4]:
#     for LE in [0.1, 0.01, 0.001, 0.0001, 0.00001]:
#         e = MF(R, LS, LE, n_cv_users, n_cv_items)
#         if e < x:
#             x = e
#             bLS, bLE = LS, LE

# print("best")
# bLS, bLE, x

MF(R, Q, P)

399515.9597256015

In [226]:
print((Q@P.T).sum())
print()
print(Q @ P.T - R)

482682.05965397257

[[-1.28926429e-17  3.53553695e-01  2.68799723e-01 ...  5.40656499e-01
   4.07660150e-01  5.15921985e-01]
 [ 4.69270085e-01 -8.40094313e-18  4.12739258e-01 ...  5.52155389e-01
   3.55100879e-01  4.44494085e-01]
 [ 4.68349786e-01  5.52842537e-01 -1.35026451e-17 ...  5.28878952e-01
   5.16009532e-01  3.88881496e-01]
 ...
 [ 5.97290576e-01  5.39367267e-01  4.37187110e-01 ...  5.71218287e-01
   5.69738935e-01  5.90248138e-01]
 [ 5.39938185e-01  5.25967362e-01  4.98240605e-01 ...  5.76094599e-01
   5.94864364e-01  5.04644554e-01]
 [ 5.94038212e-01  5.53118957e-01  4.66514268e-01 ...  6.04724978e-01
   6.11972999e-01  3.90003794e-01]]


In [227]:
error(R, Q, P, 0.1)

-131088.50507372533

In [213]:
P.T

array([[0.12353138, 0.12353138, 0.12353138, ..., 0.12353138, 0.12353138,
        0.05667184],
       [0.12331475, 0.12331475, 0.12331475, ..., 0.12331475, 0.12331475,
        0.04795947],
       [0.12545248, 0.12545248, 0.12545248, ..., 0.12545248, 0.12545248,
        0.02747783],
       ...,
       [0.12550174, 0.12550174, 0.12550174, ..., 0.12550174, 0.12550174,
        0.04304229],
       [0.12617285, 0.12617285, 0.12617285, ..., 0.12617285, 0.12617285,
        0.01979859],
       [0.1235909 , 0.1235909 , 0.1235909 , ..., 0.1235909 , 0.1235909 ,
        0.04390774]])

In [179]:
a = np.array([1,2,-3,1])
np.where(a > 0, a, 0)

array([1, 2, 0, 1])

In [114]:
Q

array([[0.0396746 , 0.17673503, 0.11262975, ..., 0.09197452, 0.06747847,
        0.0855569 ],
       [0.0249935 , 0.05907143, 0.00788802, ..., 0.01138497, 0.1906193 ,
        0.07765782],
       [0.04108991, 0.0109894 , 0.19732257, ..., 0.13621631, 0.06658739,
        0.08384918],
       ...,
       [0.17372221, 0.1506166 , 0.01665649, ..., 0.0259795 , 0.16754996,
        0.13501963],
       [0.13569126, 0.        , 0.00882256, ..., 0.20652043, 0.10464155,
        0.05052449],
       [0.12820072, 0.05514588, 0.01293362, ..., 0.00106556, 0.00702635,
        0.18920364]])

array([[0.01294956, 0.05354392, 0.01876144, ..., 0.0148397 , 0.03272223,
        0.02203582],
       [0.01193093, 0.04818973, 0.0191468 , ..., 0.01657741, 0.0275575 ,
        0.01745377],
       [0.01791556, 0.04142934, 0.01830682, ..., 0.01069668, 0.02790092,
        0.01213667],
       ...,
       [0.01216479, 0.04362893, 0.02152118, ..., 0.02112804, 0.02141489,
        0.01316631],
       [0.01601868, 0.04488458, 0.01821144, ..., 0.01718663, 0.02761061,
        0.01837809],
       [0.0143585 , 0.04560999, 0.01746998, ..., 0.01508824, 0.02434002,
        0.01615099]])