In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import lenskit.crossfold as xf
from lenskit.algorithms.als import BiasedMF
from lenskit.batch import predict
from lenskit.metrics.predict import rmse

In [None]:
user_factors = np.random.rand(10000, 30)
item_factors = np.random.rand(30, 1000)
dense = np.matmul(user_factors, item_factors)

In [None]:
mask = np.random.rand(dense.shape[0], dense.shape[1])
mask[mask < 0.95] = 0
mask[mask != 0] = 1
mask

In [None]:
sparse = np.multiply(dense, mask)

In [None]:
user_col = []
item_col = []
rating_col = []
for i in range(sparse.shape[0]):
  for j in range(sparse.shape[1]):
    rating = sparse[i][j]
    if rating > 0:
      user_col.append(i)
      item_col.append(j)
      rating_col.append(rating)
ratings = pd.DataFrame.from_dict({"user": user_col, "item": item_col, "rating": rating_col})

In [None]:
ratings.shape

In [None]:
train_test = next(xf.partition_users(ratings, 1, xf.SampleFrac(0.2)))
train = train_test.train
test = train_test.test

In [None]:
test.shape

In [None]:
model = BiasedMF(30, iterations=100, bias=False)

In [None]:
model_fit = model.fit(train)
preds = predict(model, test)
error = rmse(preds["prediction"], preds["rating"])

In [None]:
error

In [None]:
fig = px.histogram(ratings, x="rating", histnorm="probability density")
fig.show()

In [None]:
fig = px.histogram(pd.DataFrame(noise_vals), x=0, histnorm="probability density")
fig.show()

In [None]:
noise_for_train_mask = np.random.rand(train.shape[0])
noise_for_train_mask[noise_for_train_mask < 0.9] = 0
noise_for_train_mask[noise_for_train_mask != 0] = 1
#noise_for_train = np.multiply(((np.random.rand(train.shape[0]) * 20) - 10), noise_for_train_mask)
noise_for_train = np.multiply(np.random.randn(train.shape[0]), noise_for_train_mask)

In [None]:
#noise_for_train = np.random.randn(train.shape[0])

In [None]:
len(np.nonzero(noise_for_train)[0])
#noise_for_train[:50]

In [None]:
noisy_train = train.copy()
assert(noisy_train.shape[0] == noise_for_train.shape[0])
noisy_train["rating"] += noise_for_train

In [None]:
model_fit = model.fit(train)
preds = predict(model, test)
error = rmse(preds["prediction"], preds["rating"])

In [None]:
error

In [None]:
noise_dense = (np.random.rand(dense.shape[0], dense.shape[1]) * 6) - 3
#noise_dense = (np.random.randn(dense.shape[0], dense.shape[1]))
noise_mask = np.random.rand(dense.shape[0], dense.shape[1])
noise_mask[noise_mask < 0.90] = 0
noise_mask[noise_mask != 0] = 1

In [None]:
#noise = np.multiply(noise_dense, mask)
noise = np.multiply(np.multiply(noise_dense, noise_mask), mask)

In [None]:
noise_vals = [noise[i][j] for j in range(noise.shape[1]) for i in range(noise.shape[0]) if noise[i][j] != 0]

In [None]:
len(noise_vals)

In [None]:
noisy_train_test = next(xf.partition_users(noisy_ratings, 1, xf.SampleFrac(0.2)))
noisy_train = noisy_train_test.train
noisy_test = noisy_train_test.test

In [None]:
noisy_sparse = sparse + noise
user_col = []
item_col = []
rating_col = []
for i in range(noisy_sparse.shape[0]):
  for j in range(noisy_sparse.shape[1]):
    rating = noisy_sparse[i][j]
    if rating > 0:
      user_col.append(i)
      item_col.append(j)
      rating_col.append(rating)
noisy_ratings = pd.DataFrame.from_dict({"user": user_col, "item": item_col, "rating": rating_col})