In [None]:
# -*- coding: utf-8 -*-
# Matrix Factorization -- SGD
"""
data understanding
train.txt size      4*80000  
test.txt size       4*20000 
user                943
item                1682
 ____________________________________
|userid | itemid | preference | time |
|-------|--------|------------|------|
|_______|________|____________|______|

bui = mu + bu + bi

"""
# Parameters Preset
# Regularization
lamda4 = 0.05
# Learning rate
gamma = 0.01

In [None]:
import os
import numpy as np

In [None]:
# tr_zise = file("train.txt").read().count('\n')
# te_size = file("test.txt").read().count('\n')
# print tr_size, te_size
f1 = open("train.txt")
f2 = open("test.txt")
unum = 0
inum = 0
for it in xrange(80000):
    record = f1.readline().split('\t')
    uid = int(record[0])
    iid = int(record[1])
    if(uid > unum):
        unum = uid
    if(iid > inum):
        inum = iid
        
for it in xrange(20000):
    record = f2.readline().split('\t')
    uid = int(record[0])
    iid = int(record[1])
    if(uid > unum):
        unum = uid
    if(iid > inum):
        inum = iid
print unum, inum

In [None]:
""" Data load in, 943 users and 1682 items """
Rm = np.zeros((943, 1682))
Tm = np.zeros((943, 1682))
Pm = np.random.randn(943, 10) * 0.5
Qm = np.random.randn(1682, 10) * 0.5

f1.seek(0)
for it in xrange(80000):
    record = f1.readline().split('\t')
    u = int(record[0]) - 1
    i = int(record[1]) - 1
    Rm[u][i] = int(record[2]) - 1

f2.seek(0)
for it in xrange(20000):
    record = f2.readline().split('\t')
    u = int(record[0]) - 1
    i = int(record[1]) - 1
    Tm[u][i] = int(record[2]) - 1

In [None]:
def mu_overall(M):
    """ mu -- overall average rating """
    tot = 0
    active = 0
    for u in xrange(943):
        for i in xrange(1682):
            tot += M[u][i]
            active += 1
    mu = tot/active
    return mu

In [None]:
def bias_item(M, mu):
    """ bi -- observed deviations of item """
    bim = np.zeros((1682))
    for i in xrange(1682):
        tot = 0
        for u in xrange(943):
            tot += M[u][i]
        bim[i] = tot/943 - mu
    return bim

#print bias_item(Rm, mu(Rm))

def bias_user(M, mu):
    """ bu -- observed deviations of user """
    bum = np.zeros((943))
    for u in xrange(943):
        tot = 0
        for i in xrange(1682):
            tot += M[u][i]
        bum[u] = tot/1682 - mu
    return bum

#print bias_user(Rm, mu(Rm))

In [None]:
def train():
    """ Training process, stochasitc gradient descent."""
    global Pm
    global Qm
    mu = mu_overall(Rm) 
    bum = bias_user(Rm, mu)
    bim = bias_item(Rm, mu)
    for u in xrange(unum):
        for i in xrange(inum):
            if(Rm[u][i] != 0):
                rui = Rm[u][i] 
                rui_hat = mu + bum[u] + bim[i] + Pm[u].dot(Qm[i].T)
                eui = rui - rui_hat
                t_bu = bum[u]
                t_bi = bim[i]
                t_qi = Qm[i]
                t_pu = Pm[u]
                bum[u] = t_bu + gamma *(eui - lamda4 * t_bu)
                bim[i] = t_bi + gamma *(eui - lamda4 * t_bi)
                Qm[i] = t_qi + gamma *(eui * t_pu - lamda4 * t_qi)
                Pm[u] = t_pu + gamma *(eui * t_qi - lamda4 * t_pu)

In [None]:
def test(M, up, iq):
    mu = mu_overall(M)
    bum = bias_user(M, mu)
    bim = bias_item(M, mu)
    active = 0
    loss = 0
    for u in range(unum):
        for i in range(inum):
            if(M[u][i] != 0):
                active += 1
                rui = M[u][i]
                rui_hat = mu + bum[u] + bim[i] + np.dot(Pm[u], Qm[i].T)
                eui = rui - rui_hat
                loss += eui * eui
    loss = np.sqrt(loss/active)
    return loss

In [None]:
for i in xrange(50):
    print i, 
    train()
    final = test(Tm, Pm, Qm)
    print final

###Result
0 2.71621852146
1 2.66202359461
2 2.46879928892
3 1.97609090194
4 1.48417942887
5 1.23412409035
6 1.1231249733
7 1.07025902377
8 1.04183826714
9 1.02450113388
10 1.01282936692
11 1.0044357988
12 0.998136746198
13 0.993272221952
14 0.989437934952
15 0.986368904223
16 0.983882983894
17 0.98185051682
18 0.980176659035
19 0.978790458074
20 0.97763780775
21 0.976676752445
22 0.975874274028
23 0.975204043969
24 0.97464482004
25 0.974179282997
26 0.973793179389
27 0.973474681054
28 0.973213900353
29 0.973002518866
30 0.972833499788
31 0.972700862703
32 0.972599505327
33 0.972525060908
34 0.972473782956
35 0.972442451055
36 0.972428293137
37 0.972428920702
38 0.972442274349
39 0.972466577616
40 0.972500297606
41 0.972542111231
42 0.972590876149
43 0.972645605697
44 0.972705447228
45 0.972769663394
46 0.972837615979
47 0.972908751942
48 0.9729825914
49 0.973058717291