In [1]:
import numpy as np


class MatrixFactorization:
    def __init__(self, num_items, num_users, num_factors, learning_rate, regularization_rate, num_iterations):
        """
        Initialize the Matrix Factorization model.

        Args:
            num_items (int): Number of items.
            num_users (int): Number of users.
            num_factors (int): Number of latent factors.
            learning_rate (float): Learning rate for gradient descent.
            regularization_rate (float): Regularization rate for L2 regularization.
            num_iterations (int): Number of iterations for training.
        """
        self.num_items = num_items
        self.num_users = num_users
        self.num_factors = num_factors
        self.learning_rate = learning_rate
        self.regularization_rate = regularization_rate
        self.num_iterations = num_iterations

        # Initialize Q and P matrices with random values
        # Start your code
        self.Q = np.random.rand(self.num_items, self.num_factors)
        self.P = np.random.rand(self.num_users, self.num_factors)
        # End your code

    def sigmoid(self, x):
        """
        Compute the sigmoid function.

        Args:
            x (float): Input value.

        Returns:
            float: Sigmoid value.
        """
        return 1 / (1 + np.exp(-x))

    def update_parameters(self, R):
        """
        Update the parameters Q and P using Stochastic Gradient Descent.

        Args:
            R (ndarray): Rating matrix.
        """
        # Start your code
        for i in range(self.num_items):
            for u in range(self.num_users):
                if R[i, u] == 0:
                    continue
                error =  np.log(self.sigmoid(R[i, u] - np.dot(self.Q[i], self.P[u])))
                self.Q[i] += self.learning_rate * (error * self.P[u] - self.regularization_rate * self.Q[i])
                self.P[u] += self.learning_rate * (error * self.Q[i] - self.regularization_rate * self.P[u])
        # End your code

    def train(self, R):
        """
        Train the Matrix Factorization model.

        Args:
            R (ndarray): Rating matrix.
        """
        self.update_parameters(R)

    def predict_rating(self, i, u):
        """
        Predict the rating for item i and user u.

        Args:
            i (int): Item index.
            u (int): User index.

        Returns:
            float: Predicted rating.
        """
        # Start your code
        return self.sigmoid(np.dot(self.Q[i], self.P[u]))
        # End your code

    def evaluate(self, users_list, groundTruth_list, topk):
            """
            Evaluate the trained model for a list of users and calculate the accuracy.

            Args:
                users_list (list): List of user indexes.
                groundTruth_list (list): List of ground truth items in the user's test set.
                topk (int): Threshold for top-k item selection.

            Returns:
                float: Accuracy of the model.
            """
            accuracies = []

            for u, groundTruth in zip(users_list, groundTruth_list):
                predicted_ratings = [self.predict_rating(i, u) for i in range(self.num_items)]
                topk_indices = np.argsort(predicted_ratings)[-topk:]
                intersection = len(set(topk_indices) & set(groundTruth))
                accuracy = intersection / len(groundTruth)
                accuracies.append(accuracy)

            return np.mean(accuracies)





In [2]:
import numpy as np

file_path = "/content/data.txt"

with open(file_path, "r") as file:
    lines = file.readlines()

lines = [line.strip() for line in lines if line.strip() != '']
num_users = len(lines)

max_item = 0
user_items = []

for line in lines:
    words = line.split(" ")
    user_id = int(words[0])
    items = [int(word) for word in words[1:]]
    max_item = max(max_item, max(items))
    user_items.append((user_id, items))

num_items = max_item + 1
R = np.zeros((num_users, num_items), dtype=np.int8)

for user_id, items in user_items:
    num_items_to_remove = int(0.2 * len(items))
    items_to_remove = np.random.choice(items, num_items_to_remove, replace=False)
    items = list(set(items) - set(items_to_remove))
    R[user_id, items] = 1

# Splitting into training set and test set
test_size = int(0.4 * num_users)
test_indices = np.random.choice(range(num_users), test_size, replace=False)
test_set = R[test_indices]
train_set = np.delete(R, test_indices, axis=0)

print("Training set:")
print(train_set)

print("Test set:")
print(test_set)


Training set:
[[0 1 1 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Test set:
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [5]:
test_set.shape

(11943, 40981)

In [6]:
train_set.shape

(17915, 40981)

In [4]:
# Example usage
num_items = 100
num_users = 100
num_factors = 10
learning_rates = [0.001, 0.1, 0.01]
regularization_rate = 0.1
num_iterations = 100

R_train = train_set  # Rating matrix for training
R_test = test_set  # Rating matrix for testing

for learning_rate in learning_rates:
    model = MatrixFactorization(num_items, num_users, num_factors, learning_rate, regularization_rate, num_iterations)
    model.train(R_train)

    # Test prediction for item 0 and user 0
    item_index = 0
    user_index = 0
    prediction = model.predict_rating(item_index, user_index)
    print(f"Predicted rating for item {item_index} and user {user_index}: {prediction}")

    # Evaluate model on test data
    user_indexes = list(range(num_users))  # List of all user indexes in the test set
    groundTruths = []  # List of ground truth items for each user in the test set

    for user_index in user_indexes:
        groundTruth = np.where(R_test[user_index] > 0)[0]  # Ground truth items for the current user
        groundTruths.append(groundTruth)

    topk = 10  # Top-k threshold

    accuracy = model.evaluate(user_indexes, groundTruths, topk)
    print(f"Accuracy for model with learning rate {learning_rate}: {accuracy}\n")


Predicted rating for item 0 and user 0: 0.8862937470958735
Accuracy for model with learning rate 0.001: 0.0014215686274509803

Predicted rating for item 0 and user 0: 0.21461710624541544
Accuracy for model with learning rate 0.1: 0.000588235294117647

Predicted rating for item 0 and user 0: 0.6851547681666874
Accuracy for model with learning rate 0.01: 0.001

