In [69]:
import pandas as pd
import numpy as np

df = pd.read_csv("https://files.grouplens.org/datasets/movielens/ml-100k/u.data", delimiter = r'\t', 
                 names = ['user_id', 'item_id', 'rating', 'timestamp'])

r = df.pivot(index = 'user_id', columns = 'item_id', values = 'rating').values
r

  df = pd.read_csv("https://files.grouplens.org/datasets/movielens/ml-100k/u.data", delimiter = r'\t',


array([[ 5.,  3.,  4., ..., nan, nan, nan],
       [ 4., nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [ 5., nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan,  5., nan, ..., nan, nan, nan]])

In [70]:
# Train-test split

irow, jcol = np.where(~np.isnan(r)) # Burası matriste değere sahip olan entrylerin row ve column indexlerini veriyor.
# 100_000 dolu entry mevcut

idx = np.random.choice(np.arange(100_000), 1000, replace=False) # 1 ile 100k arası 1000 adet tekrarı olmadan random sayı seçiyor
test_irow = irow[idx] # 100_000 entry içerisinden random seçilen idx listesindeki indexlere sahip entryler test row ve col 
test_jcol = jcol[idx] # olarak seçilir.

r_copy = r.copy() # r bizim orijinal matrisimiz, r_copy ise test matrisimiz (tahmini gerçekleştirdiğimiz) olacak.

for i, j in zip(test_irow, test_jcol):
    r_copy[i][j] = np.nan
    

#r[test_irow, test_jcol], r_copy[test_irow, test_jcol]

In [71]:
# Train-Validation split (Masking validation entries)

# Artık r_copy üzerinden çalışacağım için validation set'i de buradan ayıracağım
irow2, jcol2 = np.where(~np.isnan(r_copy))

idx2 = np.random.choice(np.arange(99_000), 300, replace=False)

test_irow2 = irow2[idx2]
test_jcol2 = jcol2[idx2]

r_copy2 = r_copy.copy() # r_copy2 hem test hem validation set'in maskelenmiş hali (öğrendiğimiz model) olacak.

for i, j in zip(test_irow2, test_jcol2):
    r_copy2[i][j] = np.nan
        
# Buranın sonunda artık b_user ve b_item optimizasyonunu sağlayacak dataları belirlemiş oluyorum.

In [72]:
# Train-Validation Split (Masking train entries)

A = np.arange(99_000)
idx3 = np.array(list(set(A) - set(idx2))) # Burada idx2 setinde olan indisleri A'dan çıkarıyoruz.

r_copy3 = r_copy.copy() # r bizim orijinal matrisimiz, r_copy ise test matrisimiz (tahmini gerçekleştirdiğimiz) olacak.

test_irow3 = irow2[idx3]
test_jcol3 = jcol2[idx3]

for i, j in zip(test_irow3, test_jcol3):
    r_copy3[i][j] = np.nan

#r_copy3[test_irow3, test_jcol3], r_copy[test_irow3, test_jcol3]

In [152]:
# Prediction and gradient descent without regularization.

def gradientDescent(r: np.ndarray, b_user: np.ndarray, b_item: np.ndarray, alpha: float = 0.001, 
                    ite: int = 1000) -> np.ndarray:
    
    error_in_each_ite = []
    m, n = r.shape

    for it in range(ite):
        total_e = 0

        for i in range(m):
            for j in range(n):
                if np.isnan(r[i, j]):
                    continue        
                else:
                    # Prediction of r_ij
                    y_pred = b_user[i][0] + b_item[j][0]

                    e = r[i][j] - y_pred
                    
                    Loss = (((r[i][j] - y_pred) ** 2) / 2)

                    b_user[i][0] += e * alpha 
                    b_item[j][0] += e * alpha

                    total_e += e
        
        print(it, total_e)
        error_in_each_ite.append(total_e) 

        if it > 2:
            if (error_in_each_ite[it - 2] - error_in_each_ite[it - 1] < 0.001):
                break 
    
    return b_user, b_item, error_in_each_ite

In [155]:
# Prediction and gradient descent with l2 regularization.

def l2regularized(r: np.ndarray, b_user: np.ndarray, b_item: np.ndarray, lamb: float = 0., 
                  alpha: float = 0.001, ite: int = 1000) -> np.ndarray:
    error_in_each_ite = []
    m, n = r.shape
    for it in range(ite):
        total_e = 0

        for i in range(m):
            for j in range(n):
                if np.isnan(r[i, j]):
                    continue        
                else:
                    # Prediction of r_ij
                    y_pred = b_user[i][0] + b_item[j][0]

                    Loss = (((r[i][j] - y_pred) ** 2) / 2) + (lamb / 2 * (b_user[i][0] ** 2 + b_item[j][0] ** 2))
                    
                    e = r[i][j] - y_pred

                    b_user[i][0] += (e - lamb * (b_user[i][0])) * alpha
                    b_item[j][0] += (e - lamb * (b_item[j][0])) * alpha

                    total_e += e
                    
        print(it, total_e)
        error_in_each_ite.append(total_e) 

        if it > 2:
            if (error_in_each_ite[it - 2] - error_in_each_ite[it - 1] < 0.001):
                break 
    
    return b_user, b_item, error_in_each_ite

In [154]:
# Initializing the random bias values for each user and item
b_user = np.random.rand(r.shape[0], 1)
b_item = np.random.rand(r.shape[1], 1)

b_user_gd, b_item_gd, errors_gd = gradientDescent(r_copy2, b_user, b_item, ite = 100)

0 208704.6769963375
1 147865.73080480893
2 107586.97891570322
3 80502.4124296429
4 61979.04944677193
5 49076.112056158345
6 39909.31792571837
7 33259.503734732185
8 28329.774007161013
9 24593.632591799596
10 21699.268816489835
11 19408.760243816952
12 17559.146256100496
13 16037.301522121077
14 14763.580969228646
15 13681.081648978652
16 12748.52869298426
17 11935.517740970276
18 11219.301865808508
19 10582.599187539045
20 10012.080815240022
21 9497.3163103629
22 9030.029700082385
23 8603.568324212623
24 8212.519013551191
25 7852.4273176222405
26 7519.5895809279245
27 7210.897081903717
28 6923.7177935525115
29 6655.805635255135
30 6405.230038366219
31 6170.32068890725
32 5949.623733428102
33 5741.86673511083
34 5545.930378004149
35 5360.825426807559
36 5185.673818332554
37 5019.693030139901
38 4862.183070496544
39 4712.515581687422
40 4570.124659815728
41 4434.499078453009
42 4305.175667911821
43 4181.733651581329
44 4063.7897793851494
45 3950.9941286739886
46 3843.02646674457
47 3739.

In [164]:
# Initializing the random bias values for each user and item
b_user = np.random.rand(r.shape[0], 1)
b_item = np.random.rand(r.shape[1], 1)

b_user_l2, b_item_l2, errors_l2 = l2regularized(r_copy2, b_user, b_item, lamb = 0.1, ite = 100)

0 210931.78727734098
1 151992.5833387941
2 113994.85527031099
3 89010.25999244789
4 72229.73992335981
5 60700.15972931502
6 52585.360568000004
7 46729.06489194957
8 42393.50087940504
9 39101.43720495006
10 36539.713857791816
11 34499.715283283025
12 32840.25571583852
13 31464.194014172714
14 30303.54033167515
15 29309.865871227685
16 28448.05671368381
17 27692.197337858066
18 27022.824245036685
19 26425.070247239386
20 25887.39400663335
21 25400.698437257968
22 24957.71046385328
23 24552.53853151426
24 24180.352482872582
25 23837.1487279506
26 23519.575618364415
27 23224.801859661045
28 22950.416082491192
29 22694.34925688594
30 22454.814060520057
31 22230.25698149874
32 22019.32009727277
33 21820.810287337194
34 21633.67421715356
35 21456.97784709433
36 21289.889522410434
37 21131.665921901502
38 20981.640307243986
39 20839.212637899043
40 20703.841209485072
41 20575.035544378687
42 20452.350317904275
43 20335.380145826966
44 20223.75509200278
45 20117.136781173172
46 20015.2150226507

In [165]:
def pred(r: np.ndarray, b_user: np.ndarray, b_item: np.ndarray):
    m, n = r.shape
    r_hat = np.zeros((m, n))
    
    for i in range(m):
        for j in range(n):
            y_pred = b_user[i][0] + b_item[j][0]
            r_hat[i][j] = y_pred
            
    return r_hat

In [166]:
r_hat_gd = pred(r_copy2, b_user_gd, b_item_gd)
r_hat_l2 = pred(r_copy2, b_user_l2, b_item_l2)

In [167]:
# Sum of squares errors without and with regularization, respectively. Before hyperparameter optimization.
sq_error_gd = np.nansum((r[test_irow, test_jcol] - r_hat_gd[test_irow, test_jcol]) ** 2) 
sq_error_l2 = np.nansum((r[test_irow, test_jcol] - r_hat_l2[test_irow, test_jcol]) ** 2) 

sq_error_gd, sq_error_l2

(893.2067042236139, 925.802434710798)

In [133]:
# Loss fonksiyonu lambda'ya göre convex olduğu için aynı şekilde iterasyonla hyperparameter optimization yapacağım.

def lam_opt(r: np.ndarray, b_user: np.ndarray, b_item: np.ndarray, alpha: float = 0.001, ite: int = 1000) -> np.ndarray:
    
    lambd = (np.random.rand(1)*100)[0]
    error_in_each_ite = []
    m, n = r.shape
    for it in range(ite):
        total_e = 0

        for i in range(m):
            for j in range(n):
                if np.isnan(r[i, j]):
                    continue        
                else:
                    # Prediction of r_ij
                    y_pred = b_user[i][0] + b_item[j][0]
                    
                    e = r[i][j] - y_pred
                    
                    Loss = ((e ** 2) / 2) + (lambd / 2 * (b_user[i][0] ** 2 + b_item[j][0] ** 2))

                    lambd -= ((b_user[i][0] ** 2 + b_item[j][0] ** 2) / 2) * alpha
                    
                    total_e += e
                    
        print(it, total_e)
        error_in_each_ite.append(total_e) 

        if it > 2:
            if (error_in_each_ite[it - 2] - error_in_each_ite[it - 1] < 0.001):
                break 
    
    return lambd, error_in_each_ite

In [136]:
optimal_lamb, lam_errors_gd = lam_opt(r_copy3, b_user_l2, b_item_l2, ite = 100)

0 -10.57715738197612
1 -10.57715738197612
2 -10.57715738197612
3 -10.57715738197612


In [None]:
# Hyperparameter optimization için yine convexity kullanmak aklıma geldi fakat ben lambd değiştirdiğimde ve artık b_user 
# ve b_item'ı sabit tuttuğumda bunun y_pred'e bir etkisi olmuyor. Farklı bir yöntem olarak yalnızca lambda değerlerini 
# değiştirerek en düşük loss'u verecek olan lambdayı seçebilirdim fakat bu da nedense kullanabileceğim en doğr yöntemmiş
# gibi gelmiyor. Buradan nasıl devam edebileceğim konusunda bir feedback verebilir misiniz? 

# Artık buradan devam etmeyeceğim çünkü benzer bir problemle capstone projede de karşılaşacağım, en azından vaktimi o tarafa
# harcamak daha mantıklı geliyor.