In [3]:
import numpy as np
import scipy.sparse as sp
import csv
import time
import pandas as pd

## Read Data

In [26]:
class CR_Data():
    def __init__(self, csv_file="small.csv"):
        self.csv_file = csv_file
        df = pd.read_csv(csv_file)
        users = set(df["user_id"])
        tracks = set(df["trackname"])
        self.user2idx, self.idx2user, self.track2idx, self.idx2track = {}, {}, {}, {}
        #print(users, tracks)
        for idx, user in enumerate(users):
            self.user2idx[user] = idx
            self.idx2user[idx] = user
        for idx, track in enumerate(tracks):
            self.track2idx[track] = idx
            self.idx2track[idx] = track

    def split_train_test(self, train_portion=0.8):
        datas = []
        with open(self.csv_file, 'rt', newline="", encoding='utf-8') as f:
            reader = csv.reader(f)
            assert tuple(next(reader)) == ("user_id","artistname","trackname","playlistname")
            datas = [row for row in reader if len(row) == 4] # Filter invalid data
        
        train_num = int(len(datas) * train_portion)
        train_datas, test_datas = datas[:train_num], datas[train_num:]
        train_data_sparse, test_data_sparse = self.build_sparse_matrix(train_datas), self.build_sparse_matrix(test_datas)
        
        return train_data_sparse, test_data_sparse
    
    def build_sparse_matrix(self, inputs):
        rows, cols, data = [], [], []
        for item in inputs:
            user, _, track, _ = item
            rows.append(self.user2idx[user])
            cols.append(self.track2idx[track])
            data.append(1)
        X = sp.coo_matrix((data, (rows, cols)), shape=(len(self.user2idx), len(self.track2idx)))
        
        return X
    
cr_data = CR_Data()
train_data, test_data = cr_data.split_train_test()
#print(train_data, test_data)

In [28]:
def train(X_train, X_test, k, niters=12, lam=10., verbose=True):
    """ Train a collaborative filtering model. 
        Args: 
            X_train : np.array[num_users, num_movies] -- the training ratings matrix, assumed dense
            X_test : np.array[num_users, num_movies] -- the test ratings matrix, assumed dense
            k : int -- the number of features in the CF model
            niters : int -- number of iterations to run
            lam : float -- regularization parameter, shown as lambda
            verbose : boolean -- if true, print the error on train and test sets every few iterations 

        return : Tuple[U, V]
            U : np.array[num_users,  num_features] -- the user-feature matrix
            V : np.array[num_movies, num_features] -- the movie-feature matrix
    """
    # MODIFY THIS FUNCTION
    
    m, n = X_train.shape
    W = np.zeros([m, n])
    W[np.where(X_train != 0)] = 1
#     i_indices, j_indices = np.where(X_train != 0)
#     print(i_indices, j_indices)
#     print(m, n, len(i_indices), len(j_indices))
    U = np.random.normal(scale=1.0, size=(m, k))
    V = np.random.normal(scale=1.0, size=(k, n))
    I = np.identity(k)

    if verbose:
        print("| Time    | Iter  | Train Err | Test Err |")
        print("| ------- | ----- | --------- | -------- |")

    start_time = time.perf_counter()
    for e in range(niters):
        
        for j in range(n):
            i_indices = np.where(W[:, j] == 1)[0]
            V[:, j] = la.solve(U[i_indices, :].T.dot(U[i_indices, :]) + lam * I, U[i_indices, :].T.dot(X_train[i_indices, j]))

        for i in range(m):
            j_indices = np.where(W[i] == 1)[0]
            U[i] = la.solve(V[:, j_indices].dot(V[:, j_indices].T) + lam * I, V[:, j_indices].dot(X_train[i, j_indices]))
        
        if verbose: 
            print(f"| {time.perf_counter() - start_time: 7.3f} |{e+1: 6d} |{error(X_train, U, V.T):10.4f} |{error(X_test, U, V.T):9.4f} |")
    
    if verbose: 
        print("")
    return U, V.T

In [29]:
def recommend(X, U, V):
    """Recommend a new movie for every user.

        args: 
            X : np.array[num_users, num_movies] -- the ratings matrix
            U : np.array[num_users, num_features] -- a matrix of features for each user
            V : np.array[num_movies,num_features] -- a matrix of features for each movie

        return: List[int] -- a list of movie Ids for each user
    """
    
    res = []
    pred = U @ V.T
    pred[np.where(X != 0)] = np.NINF
    
    return list(np.argmax(pred, axis=1))

In [30]:
train(train_data, test_data, 1, 1)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all().