# GreConDRecommender running over Belohlavek Dataset Demo

Copyright 2022 Bernardo C. Rodrigues

See COPYING file for license details

In [None]:
# Get the trainset
from tests.ToyDatasets import belohlavek_dataset_raw_rating, convert_raw_rating_list_into_trainset

trainset = convert_raw_rating_list_into_trainset(belohlavek_dataset_raw_rating, shuffle=False)

In [None]:
from recommenders.grecond_recommender import GreConDRecommender
from recommenders.common import cosine_distance

algo = GreConDRecommender(knn_distance_strategy=cosine_distance)
algo.fit(trainset)

In [None]:
# Access to formal context
from rich.jupyter import print

# Note that this concepts are using the internal Trainset representations
print(algo.number_of_factors)
print(algo.formal_context)

In [None]:
# Let's collect some concept characteristics
intent_sizes = []
extent_sizes = []
concept_sizes = []


for formal_concept in algo.formal_context:
    formal_concept_intent_size = len(formal_concept.intent)
    formal_concept_extent_size = len(formal_concept.extent)

    intent_sizes.append(formal_concept_intent_size)
    extent_sizes.append(formal_concept_extent_size)

    concept_sizes.append(formal_concept_intent_size * formal_concept_extent_size)

In [None]:
import matplotlib.pyplot as plt

plt.scatter(intent_sizes, extent_sizes, alpha=1)

plt.xlabel("Concept Intent Size")
plt.ylabel("Concept Extent Size")
plt.show()

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(20, 5))
fig.suptitle("Concept Histograms")

axs[0].hist(intent_sizes)
axs[0].set(xlabel="Concept's intent size", ylabel="Occurrences")

axs[1].hist(extent_sizes)
axs[1].set(xlabel="Concept's extent size", ylabel="Occurrences")

axs[2].hist(concept_sizes)
axs[2].set(xlabel="Concept's submatrix size", ylabel="Occurrences")

fig.show()

In [None]:
plt.boxplot(
    [intent_sizes, extent_sizes, concept_sizes],
    labels=["Concept's intent size", "Concept's extent size", "Concept's submatrix size"],
)
plt.show()

In [None]:
# Access to factored matrices
print(algo.A)
print(algo.A.shape)

print(algo.B)
print(algo.B.shape)

In [None]:
# Access to similarity matrix
print(algo.sim)
print(algo.sim.shape)

In [None]:
# Generate similarity matrix from a vanilla KNN for comparison
from surprise.prediction_algorithms import KNNBasic

knn_algo = KNNBasic(sim_options={"name": "cosine"})
knn_algo.fit(trainset)

print(knn_algo.sim)
print(knn_algo.sim.shape)

In [None]:
from recommenders.common import get_similarity_matrix

similarity_matrix_on_original_dataset = get_similarity_matrix(algo.binary_dataset)
similarity_delta = algo.sim - similarity_matrix_on_original_dataset

fig, axs = plt.subplots(1, 4, figsize=(20, 5))
fig.suptitle('Similartiy Matrices')

subfig = axs[0].imshow(similarity_matrix_on_original_dataset, vmin=0, vmax=1, cmap='Greys')
axs[0].set(xlabel='User', ylabel='User', title='Original Dataset Similarity Matrix')
fig.colorbar(subfig, ax=axs[1]).set_label('Similarity')

subfig = axs[1].imshow(algo.sim, vmin=0, vmax=1, cmap='Greys')
axs[1].set(xlabel='User', ylabel='User', title='Latent Space based Similarity Matrix')
fig.colorbar(subfig, ax=axs[0]).set_label('Similarity')

subfig = axs[2].imshow(similarity_delta, cmap='bwr', vmin=-0.5, vmax=0.5)
axs[2].set(xlabel='User', ylabel='User', title='My Toy dataset')
fig.colorbar(subfig, ax=axs[2]).set_label('Similarity delta')

subfig = axs[3].imshow(knn_algo.sim, cmap='Greys', vmin=0, vmax=1)
axs[3].set(xlabel='User', ylabel='User', title='Vanilla KNN Similarity Matrix')
fig.colorbar(subfig, ax=axs[3]).set_label('Similarity delta')

fig.show()

In [None]:
def flatten_similarity_matrix(matrix):
    similarities = []
    for i, _ in enumerate(matrix):
        for j, _ in enumerate(matrix):
            if i <= j:
                continue
            similarities.append(matrix[i,j])
    return similarities

original_dataset_similarities = flatten_similarity_matrix(similarity_matrix_on_original_dataset)
latent_dataset_similarities = flatten_similarity_matrix(algo.sim)


plt.boxplot([original_dataset_similarities, latent_dataset_similarities], labels=['Original Dataset', 'Latent Dataset'])
plt.show()


In [None]:
# Assert that factorization covers 100% of the original matrix

import numpy as np

I = np.matmul(algo.A, algo.B)
assert (I == algo.binary_dataset.binary_dataset).all()

In [None]:
# Generate some predictions
testset = trainset.build_testset()
predictions = algo.test(testset)

In [None]:
# Overall quality of the predictions
from surprise.accuracy import mae, rmse

mae(predictions=predictions)
rmse(predictions=predictions)

In [None]:
for prediction in predictions[:10]:
    print(prediction)