# TensorRec recommender system:
## The input data that it uses: user features, item features, and interactions.


### Raw ratings load :Each row represents a single rating: one user and one item. We’ll be using these ratings(frequency) as our interactions between the user and the product.

In [None]:
1

In [None]:
import csv
# Open and read in the ratings file
print('Loading ratings')
with open('transaccional_sample.csv', 'r') as ratings_file:
    ratings_file_reader = csv.reader(ratings_file)
    raw_ratings = list(ratings_file_reader)
    raw_ratings_header = raw_ratings.pop(0)

len(raw_ratings)

In [None]:
import collections
# Iterate through the input to map Item and User IDs to new internal IDs
# The new internal IDs will be created by the defaultdict on insertion
idcte_to_internal_user_ids = collections.defaultdict(lambda: len(idcte_to_internal_user_ids))
idfam1_to_internal_item_ids = collections.defaultdict(lambda: len(idfam1_to_internal_item_ids))
for row in raw_ratings:
    row[0] = idcte_to_internal_user_ids[int(row[0])]
    row[1] = idfam1_to_internal_item_ids[int(row[1])]
    row[2] = float(row[2])
n_users = len(idcte_to_internal_user_ids)
n_items = len(idfam1_to_internal_item_ids)
print(len(idcte_to_internal_user_ids.keys()))
print(n_users)
print(n_items)

In [None]:
idfam1_to_internal_item_ids
idcte_to_internal_user_ids

In [None]:
from collections import defaultdict
import csv
import numpy 
import random
from scipy import sparse
from sklearn.preprocessing import MultiLabelBinarizer

In [None]:
# Shuffle the ratings and split them in to train/test sets 80%/20%
random.shuffle(raw_ratings)  # Shuffles the list in-place
cutoff = int(.8 * len(raw_ratings))
train_ratings = raw_ratings[:cutoff]
test_ratings = raw_ratings[cutoff:]


### Next, we reorganize these ratings in to a Scipy sparse matrix. In this matrix, every row represents a user and every column is an item. The [i, j]th value in this matrix is User i’s interaction with Item j.

In [None]:

# This method converts a list of (user, item, rating) to a sparse matrix
def interactions_list_to_sparse_matrix(interactions):
    users_column, items_column, ratings_column = zip(*interactions)
    return sparse.coo_matrix((ratings_column, (users_column, items_column)),
                             shape=(n_users, n_items))


# Create sparse matrices of interaction data
sparse_train_ratings = interactions_list_to_sparse_matrix(raw_ratings) # creo que aqui ponen toda la data para no perder el encoding
sparse_test_ratings = interactions_list_to_sparse_matrix(test_ratings)



In [None]:
sparse_train_ratings

In [None]:
#!pip uninstall tensorflow -y
#!pip install tensorflow==1.13.1

In [None]:
import tensorflow as tf
print(tf.__version__)

In [None]:
#!pip install tensorrec --ignore-installed

In [None]:
import tensorrec

### TensorRec will perform matrix factorization by default if it is given only identity matrices as user/item features. These identity matrices are often called “indicator features.”

In [None]:
# Construct indicator features for users and items
user_indicator_features = sparse.identity(n_users)
item_indicator_features = sparse.identity(n_items)

# Build a matrix factorization collaborative filter model
cf_model = tensorrec.TensorRec(n_components=5)

# Fit the collaborative filter model
print("Training collaborative filter")
cf_model.fit(interactions=sparse_train_ratings,
             user_features=user_indicator_features,
             item_features=item_indicator_features)

In [None]:
# Create sets of train/test interactions that are only frequency > 1 since these represent the products that have been purchased 
sparse_train_ratings_1plus = sparse_train_ratings.multiply(sparse_train_ratings >= 1)
sparse_test_ratings_1plus = sparse_test_ratings.multiply(sparse_test_ratings >= 1)


# This method consumes item ranks for each user and prints out recall@10 train/test metrics
def check_results(ranks):
    train_recall_at_10 = tensorrec.eval.recall_at_k(
        test_interactions=sparse_train_ratings_1plus,
        predicted_ranks=ranks,
        k=10
    ).mean()
    test_recall_at_10 = tensorrec.eval.recall_at_k(
        test_interactions=sparse_test_ratings_1plus,
        predicted_ranks=ranks,
        k=10
    ).mean()
    print("Recall at 10: Train: {:.4f} Test: {:.4f}".format(train_recall_at_10,
                                                            test_recall_at_10))


# Check the results of the MF CF model
print("Matrix factorization collaborative filter:")
predicted_ranks = cf_model.predict_rank(user_features=user_indicator_features,
                                        item_features=item_indicator_features)
check_results(predicted_ranks)

In [None]:
#NEXT PHASE: TO IMPROVE AND KEEP ADDING DIFFERENT DIMENSIONS TO THE CLASSIC COLLABORATIVE FILTERING MODEL AND SEE IF THERE ARE IMPROVEMENTS

### TensorRec will perform matrix factorization by default if it is given only identity matrices as user/item features. These identity matrices are often called “indicator features.”

In [None]:
#//STILL WORKING ON THIS
# Let's try a new loss function: WMRB 
#print("Training collaborative filter with WMRB loss")
#ranking_cf_model = tensorrec.TensorRec(n_components=5,
#                                       loss_graph=tensorrec.loss_graphs.WMRBLossGraph())
#ranking_cf_model.fit(interactions=sparse_train_ratings_1plus,
 #                    user_features=user_indicator_features,
  #                   item_features=item_indicator_features,
   #                  n_sampled_items=int(n_items *1))

# Check the results of the WMRB MF CF model
#print("WMRB matrix factorization collaborative filter:")
#predicted_ranks = ranking_cf_model.predict_rank(user_features=user_indicator_features,
 #                                               item_features=item_indicator_features)

# Adding Metadata Features
## To continue experimenting, we should try to make use of other data available to us. We will try using User Demographic data

In [None]:
1

In [None]:
import csv
# To improve the recommendations, lets read in the user demographic data
print('Loading user metadata')
with open('transaccional_sample_features_combined.csv', 'r') as users_file:
    users_file_reader = csv.reader(users_file)
    raw_user_metadata = list(users_file_reader)
    raw_user_metadata_header = raw_user_metadata.pop(0)
raw_user_metadata

In [None]:
# Map the features IDs to our internal IDs and keep track of the gender and age
user_id_by_internal_id = {}
user_features_by_internal_id = {}
for row in raw_user_metadata:
    row[0] = idfam1_to_internal_item_ids[int(row[0])]  # Map to IDs
    row[1] = row[1].split(',')  # Split up
    user_id_by_internal_id[row[0]] = row[0]
    user_features_by_internal_id[row[0]] = row[1]

# Look at an example user metadata row
print("Raw metadata example:\n{}\n{}".format(raw_user_metadata_header, 
                                             raw_user_metadata[0]))



In [None]:
user_features_by_internal_id

In [None]:
# Build a list of features where the index is the internal user ID and
# the value is a list of features
user_feat = [user_features_by_internal_id[internal_id]
                for internal_id in user_features_by_internal_id]

In [None]:
user_feat

In [None]:
29 #perdido 

In [None]:
# Transform the features into binarized labels using scikit's MultiLabelBinarizer
user_features = MultiLabelBinarizer().fit_transform(user_feat)
n_features = user_features.shape[1]
#print("Binarized features example for user {}:\n{}".format(user_id_by_internal_id[0], 
 #                                                         user_features[0]))

In [None]:
user_features

In [None]:
n_features #perdido 

In [None]:
# Coerce the user features to a sparse matrix, which TensorRec expects
user_features_mat = sparse.coo_matrix(user_features)
user_features_mat

## Content-based Recommendation
### Now that we have metadata about our user, one thing we can try is to recommend based solely on the user metadata.
### //Ideal case is when I would have item metadata: because that would have a greater impact on making the recommendation better- also help solving the cold start problem. There is a major weakness to this system: these features alone are not very descriptive and are not enough information to make an informed recommendation.


In [None]:
# Fit a content-based model using the user features
print("Training content-based recommender")
content_model = tensorrec.TensorRec(
    n_components=n_features,
    user_repr_graph=tensorrec.representation_graphs.FeaturePassThroughRepresentationGraph()
    
)

In [None]:
help(content_model.fit)

In [None]:
user_features_mat

In [None]:
content_model.fit(interactions=sparse_train_ratings_1plus,
                  user_features=user_features_mat,
                  item_features=item_indicator_features,
                  n_sampled_items=int(n_items * .01))

In [None]:
# Check the results of the content-based model
print("Content-based recommender:")
predicted_ranks = content_model.predict_rank(user_features=user_features_mat,
                                             item_features=item_indicator_features)
check_results(predicted_ranks)

# Hybrid recommender
### Let’s combine these two: we’ll use indicator features to get the strengths of a collaborative filter, and we’ll also use the content features to take advantage of the metadata. This combination of collaborative filtering and content-based recommendation is the hybrid model.


#### We do this by stacking the two sets of features together:

In [None]:
# Try concatenating the user features on to the indicator features for a hybrid recommender system
full_user_features = sparse.hstack([  user_indicator_features, user_features_mat])
full_user_features

In [None]:
user_indicator_features

In [None]:
user_features_mat

In [None]:
print("Training hybrid recommender")
hybrid_model = tensorrec.TensorRec(
    n_components=5
)
hybrid_model.fit(interactions=sparse_train_ratings_1plus,
                 user_features=full_user_features,
                 item_features=item_indicator_features,
                 n_sampled_items=int(n_items * .01))


In [None]:
print("Hybrid recommender:")
predicted_ranks = hybrid_model.predict_rank(user_features=full_user_features,
                                            item_features=item_indicator_features)
check_results(predicted_ranks)

In [None]:
# Pull user features out of the user features matrix and predict for just that user
u_features = sparse.csr_matrix(user_indicator_features)[2001]
u_rankings = hybrid_model.predict_rank(user_features=u432_features,
                                          item_features=item_indicator_features)[0]

# Get internal IDs of User 432's top 10 recommendations
# These are sorted by item ID, not by rank
# This may contain items with which User 432 has already interacted
u_top_ten_recs = numpy.where(u432_rankings <= 10)[0]
print("User x: Item recommendations:")
u_top_ten_recs

In [None]:
for m in u_top_ten_recs:
    print(item_by_internal_id[m])