In [None]:
import numpy as np
from scipy import linalg   # Linear Algebra Library

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
shows_dir = "/content/drive/MyDrive/HW2-3,4/q4/data/shows.txt"
user_shows_dir = "/content/drive/MyDrive/HW2-3,4/q4/data/user-shows.txt"

In [None]:
def init_P_Q(R):
  """
  initialize matrices P and Q with diagonal matrix
  """
  row_sum = []
  col_sum = []
  for row in R:
    row_sum.append(np.sum(row))
  for col in np.transpose(R):
    col_sum.append(np.sum(col))
  return np.diag(row_sum), np.diag(col_sum)

In [None]:
def factorize(x):
  return np.where(x == 0, 0, x**-0.5)

In [None]:
def i_i_gamma(R, Q):
  """
  Returns dot product all of them (R, sqrt_Q, R^t, R, sqrt_Q) --> Refer to (a) and (b). 

  multi_dot = multi dot product
  """

  sqrt_Q = factorize(Q)
  return np.linalg.multi_dot([R, sqrt_Q, np.transpose(R), R, sqrt_Q])

In [None]:
def u_u_gamma(R, P):
  """
  Returns dot product all of them (R^t, sqrt_P, R, R^t, sqrt_P) --> Refer to (a) and (b). 

  multi_dot = multi dot product
  """
  sqrt_P = factorize(P)
  return np.transpose(np.linalg.multi_dot([np.transpose(R), sqrt_P, R, np.transpose(R), sqrt_P]))

In [None]:
def get_top_indexes(arr, n):
  """
  Returns the index of the top n elements in an array
  """
  processed_arr = [(arr[i], i) for i in range(len(arr))]
  processed_arr = sorted(processed_arr, key = lambda ele: (-ele[0], ele[1]))
  return [ele[1] for ele in processed_arr[:n]]

In [None]:
def get_show_titles(shows_dir):
  shows = []
  with open(shows_dir) as f:
    for line in f.readlines():
      shows.append(line.strip("\"\n"))
  return shows

In [None]:
R = np.loadtxt(user_shows_dir)
P, Q = init_P_Q(R)

u_gamma = u_u_gamma(R, P)
i_gamma = i_i_gamma(R, Q)

u_top_indexes = get_top_indexes(u_gamma[499][:100], 5)   # Python’s 0-based indexing (originally it was 500)
i_top_indexes = get_top_indexes(i_gamma[499][:100], 5)
show_titles = get_show_titles(shows_dir)

print(u_top_indexes)
print(i_top_indexes)
print ("top 5 shows by user-user collaborative filtering: {0}".format(list(map(lambda ele: show_titles[ele], u_top_indexes))))
print ("top 5 shows by item-item collaborative filtering: {0}".format(list(map(lambda ele: show_titles[ele], i_top_indexes))))\

  


[96, 74, 45, 60, 9]
[96, 74, 60, 45, 82]
top 5 shows by user-user collaborative filtering: ['FOX 28 News at 10pm', 'Family Guy', '2009 NCAA Basketball Tournament', 'NBC 4 at Eleven', 'Two and a Half Men']
top 5 shows by item-item collaborative filtering: ['FOX 28 News at 10pm', 'Family Guy', 'NBC 4 at Eleven', '2009 NCAA Basketball Tournament', 'Access Hollywood']
