In [1]:
import numpy as np

R=np.array([[4,np.NaN,np.NaN,2,np.NaN],
             [np.NaN,5,np.NaN,3,1],
             [np.NaN,np.NaN,3,4,4],
             [5,2,1,2,np.NaN]])
num_users,num_items=R.shape
K=3

np.random.seed(1)
P=np.random.normal(scale=1./K,size=(num_users,K))
Q=np.random.normal(scale=1./K,size=(num_items,K))             

In [2]:
from sklearn.metrics import mean_squared_error

def get_rmse(R,P,Q,non_zeros):
    error=0
    
    full_pred_matrix=np.dot(P,Q.T)
    
    x_non_zero_ind=[non_zero[0] for non_zero in non_zeros]
    y_non_zero_ind=[non_zero[1] for non_zero in non_zeros]
    R_non_zeros=R[x_non_zero_ind,y_non_zero_ind]
    full_pred_matrix_non_zeros=full_pred_matrix[x_non_zero_ind,y_non_zero_ind]
    mse=mean_squared_error(R_non_zeros,full_pred_matrix_non_zeros)
    rmse=np.sqrt(mse)
    
    return rmse


In [3]:
non_zeros=[(i,j,R[i,j]) for i in range(num_users) for j in range(num_items) if R[i,j]>0]
steps=1000
learning_rate=0.01
r_lambda=0.01

for step in range(steps):
    for i,j,r in non_zeros:
        eij=r-np.dot(P[i,:],Q[j,:].T)
        
        P[i,:]=P[i,:]+learning_rate*(eij*Q[j,:]-r_lambda*P[i,:])
        Q[j,:]=Q[j,:]+learning_rate*(eij*P[i,:]-r_lambda*Q[j,:])
        rmse=get_rmse(R,P,Q,non_zeros)
    if(step %50)==0:
        print('### iteration step :',step,"rmse :",rmse)
    

### iteration step : 0 rmse : 3.2388050277987723
### iteration step : 50 rmse : 0.4876723101369648
### iteration step : 100 rmse : 0.1564340384819247
### iteration step : 150 rmse : 0.07455141311978046
### iteration step : 200 rmse : 0.04325226798579314
### iteration step : 250 rmse : 0.029248328780878973
### iteration step : 300 rmse : 0.022621116143829466
### iteration step : 350 rmse : 0.019493636196525135
### iteration step : 400 rmse : 0.018022719092132704
### iteration step : 450 rmse : 0.01731968595344266
### iteration step : 500 rmse : 0.016973657887570753
### iteration step : 550 rmse : 0.016796804595895633
### iteration step : 600 rmse : 0.01670132290188466
### iteration step : 650 rmse : 0.01664473691247669
### iteration step : 700 rmse : 0.016605910068210026
### iteration step : 750 rmse : 0.016574200475705
### iteration step : 800 rmse : 0.01654431582921597
### iteration step : 850 rmse : 0.01651375177473524
### iteration step : 900 rmse : 0.01648146573819501
### iteration

In [4]:
pred_matrix=np.dot(P,Q.T)
print(pred_matrix)

[[3.99062329 0.89653623 1.30649077 2.00210666 1.66340846]
 [6.69571106 4.97792757 0.97850229 2.98066034 1.0028451 ]
 [6.67689303 0.39076095 2.98728588 3.9769208  3.98610743]
 [4.96790858 2.00517956 1.00634763 2.01691675 1.14044567]]


In [18]:
def matrix_factorization(R,K,steps=200,learning_rate=0.01,r_lambda = 0.01):
    num_users,num_items = R.shape
    np.random.seed(1)
    P=np.random.normal(scale=1./K,size=(num_users,K))
    Q=np.random.normal(scale=1./K,size=(num_items,K))
    
    prev_rmse=10000
    break_count =0
    
    non_zeros=[(i,j,R[i,j]) for i in range(num_users) for j in range(num_items) if R[i,j]>0]
    # R>0 인 행 위치, 열 위치, 값을 리스트 객체로 저장
    
    for step in range(steps):
        for i,j,r in non_zeros:
            eij = r-np.dot(P[i,:],Q[j,:].T)
            P[i,:] = P[i,:] + learning_rate*(eij*Q[j,:]-r_lambda*P[i,:])
            Q[j,:] = Q[j,:] + learning_rate*(eij*P[i,:]-r_lambda*Q[j,:])
            
        rmse = get_rmse(R,P,Q,non_zeros)
        if(step % 10)==0:
            print('### iteration step :',step,"rmse :",rmse)
            
    return P,Q

In [9]:
import pandas as pd
import numpy as np

movies=pd.read_csv(r'C:\Users\집\data\ml-latest-small\ml-latest-small\movies.csv')
ratings=pd.read_csv(r'C:\Users\집\data\ml-latest-small\ml-latest-small\ratings.csv')
ratings=ratings[['userId','movieId','rating']]
ratings_matrix=ratings.pivot_table('rating',index='userId',columns='movieId')
#pivot_table  value,index,col   3 feature  & no meaning index situation

In [12]:
ratings_movies=pd.merge(ratings,movies,on='movieId')
ratings_matrix=ratings_movies.pivot_table('rating',index='userId',columns='title') #not_mentioned columns == drop

In [19]:
P,Q=matrix_factorization(ratings_matrix.values,K=50,steps=200,learning_rate=0.01,r_lambda=0.01)
pred_matrix=np.dot(P,Q.T)

### iteration step : 0 rmse : 2.9023619751336867
### iteration step : 10 rmse : 0.7335768591017927
### iteration step : 20 rmse : 0.5115539026853442
### iteration step : 30 rmse : 0.37261628282537446
### iteration step : 40 rmse : 0.2960818299181014
### iteration step : 50 rmse : 0.2520353192341642
### iteration step : 60 rmse : 0.22487503275269854
### iteration step : 70 rmse : 0.2068545530233154
### iteration step : 80 rmse : 0.19413418783028688
### iteration step : 90 rmse : 0.18470082002720406
### iteration step : 100 rmse : 0.17742927527209104
### iteration step : 110 rmse : 0.1716522696470749
### iteration step : 120 rmse : 0.16695181946871723
### iteration step : 130 rmse : 0.16305292191997542
### iteration step : 140 rmse : 0.15976691929679646
### iteration step : 150 rmse : 0.1569598699945732
### iteration step : 160 rmse : 0.1545339818671543
### iteration step : 170 rmse : 0.15241618551077643
### iteration step : 180 rmse : 0.1505508073962831
### iteration step : 190 rmse : 0

In [20]:
ratings_pred_matrix=pd.DataFrame(data=pred_matrix,index=ratings_matrix.index,columns=ratings_matrix.columns)
ratings_pred_matrix.head(3)

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,3.055084,4.092018,3.56413,4.502167,3.981215,1.271694,3.603274,2.333266,5.091749,3.972454,...,1.402608,4.208382,3.705957,2.720514,2.787331,3.475076,3.253458,2.161087,4.010495,0.859474
2,3.170119,3.657992,3.308707,4.166521,4.31189,1.275469,4.237972,1.900366,3.392859,3.647421,...,0.973811,3.528264,3.361532,2.672535,2.404456,4.232789,2.911602,1.634576,4.135735,0.725684
3,2.307073,1.658853,1.443538,2.208859,2.229486,0.78076,1.997043,0.924908,2.9707,2.551446,...,0.520354,1.709494,2.281596,1.782833,1.635173,1.323276,2.88758,1.042618,2.29389,0.396941
