# TensorRec recommender engine:
## Prototyping of a recommender system in Python using TensorRec including input data manipulation, algorithm design, and usage for prediction.

#### TensorRec is a Python package for building recommender systems. A TensorRec recommender system consumes three pieces of input data: user features, item features, and interactions. Based on the user/item features, the system will predict which items to recommend. The interactions are used when fitting the model: predictions are compared to the interactions and a loss/penalty is calculated, which the system learns to decrease. As we prototype our system, we tackle three major situations: how we handle interactions, how we handle features, and how we structure the recommender itself.


<img src="https://miro.medium.com/max/1400/1*YotDpHjvGL8xK91ZggthbA.png" />


https://towardsdatascience.com/getting-started-with-recommender-systems-and-tensorrec-8f50a9943eef

### Raw ratings load :Each row represents a single rating: one user and one item. We’ll be using these ratings(frequency of purchase of each item) as our interactions between the user and the product.

In [None]:
#!conda install -c conda-forge google-cloud-sdk -y
#!pip install --upgrade six==1.13.0

In [1]:
from google.cloud import bigquery
limite = 310588 #310 588 606  # no corre en mi local y pesa 56MB %1 del total de la muestra

In [2]:
def get_data_BQ(sql):
    client = bigquery.Client()
    df = client.query(sql).to_dataframe()
    return(df)

In [390]:
sql =  '''SELECT ID_CTE as ID_CTE, ID_FAM as ID_CLAS1, FREQUENCY as FREQUENCY
FROM `rmf2gcp.RawData.Workflow_aggregado`
limit ''' + str(limite) 
print(sql)
raw_ratings = get_data_BQ(sql)

SELECT ID_CTE as ID_CTE, ID_FAM as ID_CLAS1, FREQUENCY as FREQUENCY
FROM `rmf2gcp.RawData.Workflow_aggregado`
limit 310588


In [391]:
raw_ratings

Unnamed: 0,ID_CTE,ID_CLAS1,FREQUENCY
0,37636515,224041,6
1,32799753,540059,6
2,2484396,106010,8
3,20547677,290059,6
4,22061135,314063,6
...,...,...,...
310583,28556843,295007,1
310584,5438885,861007,1
310585,24378139,121001,1
310586,11240074,387301,1


In [392]:
print(raw_ratings.columns)
raw_ratings.dtypes

Index(['ID_CTE', 'ID_CLAS1', 'FREQUENCY'], dtype='object')


ID_CTE       int64
ID_CLAS1     int64
FREQUENCY    int64
dtype: object

In [393]:
raw_ratings = raw_ratings.values.tolist() # pues vamos a seguir su mala practica de hacer una lista de listas 
raw_ratings[0:7]

[[37636515, 224041, 6],
 [32799753, 540059, 6],
 [2484396, 106010, 8],
 [20547677, 290059, 6],
 [22061135, 314063, 6],
 [28119603, 861020, 7],
 [35249526, 105244, 6]]

### Iterate through the input to map Item and User IDs to new internal IDs
### The new internal IDs will be created by the defaultdict on insertion

In [394]:
import collections
idcte_to_internal_user_ids = collections.defaultdict(lambda: len(idcte_to_internal_user_ids))
idfam1_to_internal_item_ids = collections.defaultdict(lambda: len(idfam1_to_internal_item_ids))
for row in raw_ratings:
    row[0] = idcte_to_internal_user_ids[int(row[0])]
    row[1] = idfam1_to_internal_item_ids[int(row[1])]
    row[2] = float(row[2])    # esta operacion esta de más 
n_users = len(idcte_to_internal_user_ids)
n_items = len(idfam1_to_internal_item_ids)
print(n_users)
print(n_items)

303685
1951


In [395]:
idfam1_to_internal_item_ids

defaultdict(<function __main__.<lambda>()>,
            {224041: 0,
             540059: 1,
             106010: 2,
             290059: 3,
             314063: 4,
             861020: 5,
             105244: 6,
             596056: 7,
             798009: 8,
             701305: 9,
             224057: 10,
             775260: 11,
             413238: 12,
             538092: 13,
             380161: 14,
             847301: 15,
             106003: 16,
             314156: 17,
             224065: 18,
             594417: 19,
             864212: 20,
             311315: 21,
             313155: 22,
             319162: 23,
             224033: 24,
             418278: 25,
             596242: 26,
             857020: 27,
             318203: 28,
             423132: 29,
             855022: 30,
             295027: 31,
             102016: 32,
             105007: 33,
             106059: 34,
             102017: 35,
             381009: 36,
             105074: 37,
             314

In [396]:
idcte_to_internal_user_ids

defaultdict(<function __main__.<lambda>()>,
            {37636515: 0,
             32799753: 1,
             2484396: 2,
             20547677: 3,
             22061135: 4,
             28119603: 5,
             35249526: 6,
             23870859: 7,
             13620577: 8,
             13385107: 9,
             33859178: 10,
             34500487: 11,
             12951397: 12,
             22618448: 13,
             12227995: 14,
             24229664: 15,
             35267359: 16,
             4981944: 17,
             37718480: 18,
             20366878: 19,
             35479340: 20,
             2161893: 21,
             26071608: 22,
             7547174: 23,
             22092149: 24,
             11831510: 25,
             22861052: 26,
             27377638: 27,
             6415992: 28,
             1536903: 29,
             21575161: 30,
             228803: 31,
             37121063: 32,
             26020040: 33,
             29047650: 34,
             30149160: 35,
  

In [397]:
from collections import defaultdict
import csv
import numpy 
import random
from scipy import sparse
from sklearn.preprocessing import MultiLabelBinarizer

### At this point, we’ll break the ratings in to a training and test set by shuffling and splitting the ratings. Our prototypes will be trained on the training set, and we’ll evaluate their success using the test set. Splitting the train/test sets at random like this is crude, and there are more rigorous techniques for model evaluation, but it is quick and clear for the purposes of this example. 

In [398]:
# Shuffle the ratings and split them in to train/test sets 80%/20%
random.shuffle(raw_ratings)  # Shuffles the list in-place
cutoff = int(.8 * len(raw_ratings))
train_ratings = raw_ratings[:cutoff]
test_ratings = raw_ratings[cutoff:]


### Next, we reorganize these ratings in to a Scipy sparse matrix. In this matrix, every row represents a user and every column is an item. The [i, j]th value in this matrix is User i’s interaction with Item j.

In [399]:

# This method converts a list of (user, item, rating) to a sparse matrix
def interactions_list_to_sparse_matrix(interactions):
    users_column, items_column, ratings_column, = zip(*interactions)
    return sparse.coo_matrix((ratings_column, (users_column, items_column)),
                             shape=(n_users, n_items))


# Create sparse matrices of interaction data
sparse_train_ratings = interactions_list_to_sparse_matrix(raw_ratings)
sparse_test_ratings = interactions_list_to_sparse_matrix(test_ratings)



In [400]:
sparse_train_ratings

<303685x1951 sparse matrix of type '<class 'numpy.float64'>'
	with 310588 stored elements in COOrdinate format>

In [401]:
n_users

303685

### TensorRec library runs on TensorFlow so we install a compatible version of TensorFlow 
### Both TensorFlow and TensorRec can be installed using !pip

In [402]:
#!pip install "tensorflow==1.13.1"

In [403]:
import tensorflow as tf
print(tf.__version__)

1.15.3-dlenv_tfe


In [404]:
#!pip install tensorrec --ignore-installed

In [405]:
import tensorrec

## Collaborative Filter Prototype
### A collaborative filter is an algorithm that learns which users have similar tastes and recommends items to a user based on what similar users have liked. A common way to do this is through matrix factorization. In matrix factorization, we have to learn two matrices (user representations and item representations) that, when multiplied together, approximate the interactions:
#### TensorRec will perform matrix factorization by default if it is given only identity matrices as user/item features. These identity matrices are often called “indicator features.”

In [406]:
# Construct indicator features for users and items
user_indicator_features = sparse.identity(n_users)
item_indicator_features = sparse.identity(n_items)

# Build a matrix factorization collaborative filter model
cf_model = tensorrec.TensorRec(n_components=5)

# Fit the collaborative filter model
print("Training collaborative filter")
cf_model.fit(interactions=sparse_train_ratings,
             user_features=user_indicator_features,
             item_features=item_indicator_features)

Training collaborative filter


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


### Performance of the model: first prototype
#### To do this, we’ll look at a metric called “recall at K.” Recall@K says, for the average user, what percentage of their test items made it in to the top K in the predicted rankings.
#### Recall@K is a nice metric for many recommender systems because it emulates the behavior of a recommendation product. Before calculating the recall, we’ll want to decide which interactions should count as a “purchased item.” In this case, choose to use all ratings of at least 1.0 as “liked products” and ignore the rest. 

In [407]:
# Create sets of train/test interactions that are only frequency > 1 since these represent the products that have been purchased 
'''sparse_train_ratings_1plus = sparse_train_ratings.multiply(sparse_train_ratings >= 1)
sparse_test_ratings_1plus = sparse_test_ratings.multiply(sparse_test_ratings >= 1)


# This method consumes item ranks for each user and prints out train/test metrics
def check_results(ranks):
    train_recall_at_10 = tensorrec.eval.recall_at_k(
        test_interactions=sparse_train_ratings_1plus,
        predicted_ranks=ranks,
        k=10
    ).mean()
    test_recall_at_10 = tensorrec.eval.recall_at_k(
        test_interactions=sparse_test_ratings_1plus,
        predicted_ranks=ranks,
        k=10
    ).mean()
    print("Performance metrics: Train: {:.4f} Test: {:.4f}".format(train_recall_at_10,
                                                            test_recall_at_10))


# Check the results of the MF CF model
print("Matrix factorization collaborative filter:")
predicted_ranks = cf_model.predict_rank(user_features=user_indicator_features,
                                        item_features=item_indicator_features)
check_results(predicted_ranks)
'''

'sparse_train_ratings_1plus = sparse_train_ratings.multiply(sparse_train_ratings >= 1)\nsparse_test_ratings_1plus = sparse_test_ratings.multiply(sparse_test_ratings >= 1)\n\n\n# This method consumes item ranks for each user and prints out train/test metrics\ndef check_results(ranks):\n    train_recall_at_10 = tensorrec.eval.recall_at_k(\n        test_interactions=sparse_train_ratings_1plus,\n        predicted_ranks=ranks,\n        k=10\n    ).mean()\n    test_recall_at_10 = tensorrec.eval.recall_at_k(\n        test_interactions=sparse_test_ratings_1plus,\n        predicted_ranks=ranks,\n        k=10\n    ).mean()\n    print("Performance metrics: Train: {:.4f} Test: {:.4f}".format(train_recall_at_10,\n                                                            test_recall_at_10))\n\n\n# Check the results of the MF CF model\nprint("Matrix factorization collaborative filter:")\npredicted_ranks = cf_model.predict_rank(user_features=user_indicator_features,\n                                

## Loss Graphs
### One way we can configure our TensorRec system is by changing the loss graph. The loss graph takes in predictions and interactions and calculates a penalty (loss) that the system will try to decrease as it learns.
#### WMRB, which stands for “weighted margin-rank batch,” works by taking a random sample of items the user hasn’t interacted with and comparing their predictions to items the user likes. Over time, this pushes items a user likes to the top of the rankings. We can try using different loss graphs like WARP

In [408]:
# Let's try a new loss function: WMRB 
'''
print("Training collaborative filter with WMRB loss")
ranking_cf_model = tensorrec.TensorRec(n_components=5,
                                       loss_graph=tensorrec.loss_graphs.WMRBLossGraph())
ranking_cf_model.fit(interactions=sparse_train_ratings_1plus,
                     user_features=user_indicator_features,
                     item_features=item_indicator_features,
                     n_sampled_items=int(n_items *1))

# Check the results of the WMRB MF CF model
print("WMRB matrix factorization collaborative filter:")
predicted_ranks = ranking_cf_model.predict_rank(user_features=user_indicator_features,
                                                item_features=item_indicator_features)
                                                '''

'\nprint("Training collaborative filter with WMRB loss")\nranking_cf_model = tensorrec.TensorRec(n_components=5,\n                                       loss_graph=tensorrec.loss_graphs.WMRBLossGraph())\nranking_cf_model.fit(interactions=sparse_train_ratings_1plus,\n                     user_features=user_indicator_features,\n                     item_features=item_indicator_features,\n                     n_sampled_items=int(n_items *1))\n\n# Check the results of the WMRB MF CF model\nprint("WMRB matrix factorization collaborative filter:")\npredicted_ranks = ranking_cf_model.predict_rank(user_features=user_indicator_features,\n                                                item_features=item_indicator_features)\n                                                '

# Adding Metadata Features
## To continue experimenting, we should try to make use of other data available to us. We will try using User Demographic data

In [409]:
# To improve the recommendations, lets read in the user demographic data
sql = """
select A.* 
from (
  select ID_CTE as USERID, CONCAT( CAST( DATE_DIFF( CURRENT_DATE(), cast( FECHA_NAC as date), YEAR)  as string) , ',', CAST( EDO_CIVIL as string), ',', CAST( GENERO as string) ) as FEATURES, D_EDO as STATE
    from `rmf2gcp.RawData.demographics`
    where ID_CTE in
      (SELECT  ID_CTE
        FROM `rmf2gcp.RawData.Workflow_aggregado`
        order by ID_CTE
        limit """ + str(limite) + """) ) as A
      WHERE A.FEATURES IS NOT NULL
"""
print(sql)


select A.* 
from (
  select ID_CTE as USERID, CONCAT( CAST( DATE_DIFF( CURRENT_DATE(), cast( FECHA_NAC as date), YEAR)  as string) , ',', CAST( EDO_CIVIL as string), ',', CAST( GENERO as string) ) as FEATURES, D_EDO as STATE
    from `rmf2gcp.RawData.demographics`
    where ID_CTE in
      (SELECT  ID_CTE
        FROM `rmf2gcp.RawData.Workflow_aggregado`
        order by ID_CTE
        limit 310588) ) as A
      WHERE A.FEATURES IS NOT NULL



In [410]:
raw_user_metadata = get_data_BQ(sql)
print(raw_user_metadata.head(800))
print(raw_user_metadata.dtypes)
print(raw_user_metadata.columns)
raw_user_metadata_header = ['USERID', 'FEATURES', 'STATE']
raw_user_metadata_header

     USERID FEATURES                      STATE
0     79619   73,S,F  BAJA CALIFORNIA NORTE    
1     80983   56,C,F  BAJA CALIFORNIA NORTE    
2    104786   74,S,M  BAJA CALIFORNIA NORTE    
3    104899   55,C,F  BAJA CALIFORNIA NORTE    
4     62964   50,U,F  BAJA CALIFORNIA NORTE    
..      ...      ...                        ...
795   91945   65,S,F  SINALOA                  
796   89453   70,D,F  SINALOA                  
797   42302   67,S,M  SINALOA                  
798   12066   50,S,M  SINALOA                  
799   49202   74,V,M  SINALOA                  

[800 rows x 3 columns]
USERID       int64
FEATURES    object
STATE       object
dtype: object
Index(['USERID', 'FEATURES', 'STATE'], dtype='object')


['USERID', 'FEATURES', 'STATE']

In [411]:
raw_user_metadata = raw_user_metadata.values.tolist()
raw_user_metadata[0:7]

[[79619, '73,S,F', 'BAJA CALIFORNIA NORTE    '],
 [80983, '56,C,F', 'BAJA CALIFORNIA NORTE    '],
 [104786, '74,S,M', 'BAJA CALIFORNIA NORTE    '],
 [104899, '55,C,F', 'BAJA CALIFORNIA NORTE    '],
 [62964, '50,U,F', 'BAJA CALIFORNIA NORTE    '],
 [61493, '71,D,F', 'BAJA CALIFORNIA NORTE    '],
 [63987, '70,S,M', 'BAJA CALIFORNIA NORTE    ']]

### First, we’ll want to read this data, map the movies to our internal IDs, and keep track of the features for each user. Then we’ll binarize the feature  labels using Scikit’s MultiLabelBinarizer. The binarized output will be our features for our new recommender system.

In [412]:
# Map the features IDs to our internal IDs and keep track of the gender and age
temp_string = ''
temp_list = []
count = 1
user_id_by_internal_id = {}
user_features_by_internal_id = {}
for row in raw_user_metadata:
    temp_string = ''
    temp_list = []
    

    temp_string = str(row[0])
    temp_list = row[1].split(',')
    #print(count)
    #print(temp_string)
    #print(temp_list)
    row[0] = idfam1_to_internal_item_ids[int(temp_string)]  # Map to IDs
    row[1] = temp_list  # Split up
    user_id_by_internal_id[temp_string] = temp_string
    user_features_by_internal_id[int(temp_string)] = row[1]
    count+=1
# Look at an example user metadata row
print("Raw metadata example:\n{}\n{}".format(raw_user_metadata_header, 
                                             raw_user_metadata[0]))



Raw metadata example:
['USERID', 'FEATURES', 'STATE']
[1951, ['73', 'S', 'F'], 'BAJA CALIFORNIA NORTE    ']


In [413]:
user_features_by_internal_id

{79619: ['73', 'S', 'F'],
 80983: ['56', 'C', 'F'],
 104786: ['74', 'S', 'M'],
 104899: ['55', 'C', 'F'],
 62964: ['50', 'U', 'F'],
 61493: ['71', 'D', 'F'],
 63987: ['70', 'S', 'M'],
 66726: ['76', 'S', 'F'],
 91044: ['73', 'S', 'F'],
 25530: ['78', 'S', 'F'],
 107804: ['62', 'S', 'F'],
 41180: ['65', 'S', 'M'],
 37561: ['73', 'S', 'F'],
 88436: ['62', 'S', 'F'],
 71127: ['69', 'S', 'F'],
 57044: ['77', 'V', 'F'],
 13832: ['71', 'S', 'M'],
 99385: ['63', 'S', 'F'],
 21173: ['61', 'U', 'F'],
 111370: ['58', 'C', 'M'],
 30659: ['83', 'D', 'M'],
 46598: ['50', 'V', 'M'],
 13467: ['75', 'S', 'F'],
 101038: ['66', 'V', 'M'],
 36707: ['82', 'S', 'M'],
 56679: ['70', 'U', 'M'],
 98798: ['72', 'S', 'F'],
 111708: ['64', 'S', 'F'],
 81585: ['71', 'V', 'F'],
 17945: ['66', 'U', 'F'],
 90087: ['85', 'C', 'M'],
 55656: ['87', 'C', 'F'],
 43450: ['87', 'C', 'F'],
 76360: ['84', 'V', 'F'],
 50750: ['92', 'C', 'M'],
 24953: ['67', 'S', 'F'],
 83622: ['87', 'C', 'M'],
 52575: ['66', 'S', 'F'],
 66537

###  Build a list of features where the index is the internal user ID and the value is a list of features

In [416]:
user_feat = [user_features_by_internal_id[internal_id]
                for internal_id in user_features_by_internal_id]

In [417]:
user_feat

[['73', 'S', 'F'],
 ['56', 'C', 'F'],
 ['74', 'S', 'M'],
 ['55', 'C', 'F'],
 ['50', 'U', 'F'],
 ['71', 'D', 'F'],
 ['70', 'S', 'M'],
 ['76', 'S', 'F'],
 ['73', 'S', 'F'],
 ['78', 'S', 'F'],
 ['62', 'S', 'F'],
 ['65', 'S', 'M'],
 ['73', 'S', 'F'],
 ['62', 'S', 'F'],
 ['69', 'S', 'F'],
 ['77', 'V', 'F'],
 ['71', 'S', 'M'],
 ['63', 'S', 'F'],
 ['61', 'U', 'F'],
 ['58', 'C', 'M'],
 ['83', 'D', 'M'],
 ['50', 'V', 'M'],
 ['75', 'S', 'F'],
 ['66', 'V', 'M'],
 ['82', 'S', 'M'],
 ['70', 'U', 'M'],
 ['72', 'S', 'F'],
 ['64', 'S', 'F'],
 ['71', 'V', 'F'],
 ['66', 'U', 'F'],
 ['85', 'C', 'M'],
 ['87', 'C', 'F'],
 ['87', 'C', 'F'],
 ['84', 'V', 'F'],
 ['92', 'C', 'M'],
 ['67', 'S', 'F'],
 ['87', 'C', 'M'],
 ['66', 'S', 'F'],
 ['89', 'C', 'M'],
 ['72', 'S', 'M'],
 ['74', 'S', 'F'],
 ['90', 'C', 'M'],
 ['92', 'C', 'F'],
 ['92', 'C', 'M'],
 ['59', 'C', 'M'],
 ['85', 'V', 'F'],
 ['74', 'S', 'F'],
 ['85', 'D', 'F'],
 ['69', 'S', 'F'],
 ['50', 'S', 'M'],
 ['59', 'C', 'M'],
 ['60', 'D', 'F'],
 ['85', 'V',

In [418]:
# Transform the features into binarized labels using scikit's MultiLabelBinarizer
user_features = MultiLabelBinarizer().fit_transform(user_feat)
n_features = user_features.shape[1]

In [419]:
user_features

array([[0, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 1, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

### Coerce the user features to a sparse matrix, which TensorRec expects


In [420]:
user_features_mat = sparse.coo_matrix(user_features)
user_features_mat

<17109x83 sparse matrix of type '<class 'numpy.int64'>'
	with 51327 stored elements in COOrdinate format>

## Content-based Recommendation
### Now that we have metadata about our user, one thing we can try is to recommend based solely on the user metadata.
### To do this, we will configure a TensorRec model to use a pass-through representation graph for item features. For us, this means that the user representations will be the same as the user features that are passed in (just the user information like gender, age etc.) and the item representations will reflect how much the item suits that particular set of user features.
#### Ideal case is when we would have item metadata as well: because that would have a greater impact on making the recommendation better- also help solving the cold start problem. There is a major weakness to this system: these features alone are not very descriptive and are not enough information to make an informed recommendation.


In [None]:
# Fit a content-based model using the user features
print("Training content-based recommender")
content_model = tensorrec.TensorRec(
    n_components=n_features,
   user_repr_graph=tensorrec.representation_graphs.FeaturePassThroughRepresentationGraph()
    
)

In [None]:
content_model.fit(interactions=sparse_train_ratings_1plus,
                  user_features=user_features_mat,
                  item_features=item_indicator_features,
                  n_sampled_items=int(n_items * .01))

In [None]:
# Check the results of the content-based model
print("Content-based recommender:")
predicted_ranks = content_model.predict_rank(user_features=user_features_mat,
                                             item_features=item_indicator_features)
check_results(predicted_ranks)

### It’s not as good as the ranking collaborative filter but it’s significantly more effective if we add it to the base collaborative filter.
#### There is a major weakness to this system: user feature alone are not very descriptive and are not enough information to make an informed recommendation. If we had more descriptive metadata and item metadata (views, clicks, basket information etc.) we may have more success with this content-based recommender system.
#### On the other hand, there is a major strength to this system: by relying on only metadata features, and not using indicator features, we can recommend products which were not present when training the model. Similarly, if we have valuable user metadata we can avoid using user indicator features and make predictions for users who’ve never interacted with a product before. This is called “cold-start” recommendation.

# Hybrid recommender: 
## Hybrid recommender systems combine two or more recommendation strategies in different ways to benefit from their complementary advantages.
### Let’s combine these two: we’ll use indicator features to get the strengths of a collaborative filter, and we’ll also use the content features to take advantage of the metadata. This combination of collaborative filtering and content-based recommendation is the hybrid model.


#### We do this by stacking the two sets of features together:

In [421]:
# Try concatenating the user features on to the indicator features for a hybrid recommender system
full_user_features = sparse.hstack([user_indicator_features, user_features_mat])
full_user_features

ValueError: blocks[0,:] has incompatible row dimensions. Got blocks[0,1].shape[0] == 17109, expected 303685.

In [427]:
(user_indicator_features)

<303685x303685 sparse matrix of type '<class 'numpy.float64'>'
	with 303685 stored elements (1 diagonals) in DIAgonal format>

In [428]:
(user_features_mat)

<17109x83 sparse matrix of type '<class 'numpy.int64'>'
	with 51327 stored elements in COOrdinate format>

In [429]:
n_users

303685

In [None]:
print("Training hybrid recommender")
hybrid_model = tensorrec.TensorRec(
    n_components=5
)
hybrid_model.fit(interactions=sparse_train_ratings_1plus,
                 user_features=full_user_features,
                 item_features=item_indicator_features,
                 n_sampled_items=int(n_items * .01))


### This performs the best even though we are using trivial features from users. If we have more metadata, we can expect larger impact of the Hybrid recommender

In [None]:
print("Hybrid recommender:")
predicted_ranks = hybrid_model.predict_rank(user_features=full_user_features,
                                            item_features=item_indicator_features)
check_results(predicted_ranks)

## Making recommendations
### We do this by passing the user’s feature vector and all the item features to predict_rank() and examining the resulting rankings

In [None]:
# Pull user features out of the user features matrix and predict for just that user
u_features = sparse.csr_matrix(user_indicator_features)[2001]
u_rankings = hybrid_model.predict_rank(user_features=u432_features,
                                          item_features=item_indicator_features)[0]

# Get internal IDs of User 432's top 10 recommendations
# These are sorted by item ID, not by rank
# This may contain items with which User 432 has already interacted
u_top_ten_recs = numpy.where(u432_rankings <= 10)[0]
print("User x: Item recommendations:")
u_top_ten_recs

#### The value of the range over which the recommender should iterate has to be the same as the # of the users

In [None]:
# Pull user features out of the user features matrix and predict for just all users
for user in range(n_users):
    u_features = sparse.csr_matrix(full_user_features)[user]
    u_rankings = hybrid_model.predict_rank(user_features=u_features,
                                          item_features=item_indicator_features)[0]
    u_top_ten_recs = numpy.where(u_rankings <= 10)[0]
    print("User"+str(user)+": Item recommendations:")
    print(u_top_ten_recs)


#### Converting Internal IDs back to the original IDs

In [None]:
for user in range(10):
    u_features = sparse.csr_matrix(full_user_features)[user]
    u_rankings = hybrid_model.predict_rank(user_features=u_features,
                                          item_features=item_indicator_features)[0]
    u_top_ten_recs = numpy.where(u_rankings <= 10)[0]
    print("User "+str(list(idcte_to_internal_user_ids.keys())[list(idcte_to_internal_user_ids.values()).index(user)])+": Item recommendations:")
    #print(list(idcte_to_internal_user_ids.keys())[list(idcte_to_internal_user_ids.values()).index(user)]) 
    for m in u_top_ten_recs:
        print(list(idfam1_to_internal_item_ids.keys())[list(idfam1_to_internal_item_ids.values()).index(m)]) 



## Creating and writing a resulting CSV for recommendations for all users in the input database

In [None]:
import pandas as pd

HybridRecommendations=pd.DataFrame([])

In [None]:
for user in range(n_users):
    u_features = sparse.csr_matrix(full_user_features)[user]
    u_rankings = hybrid_model.predict_rank(user_features=u_features,
                                          item_features=item_indicator_features)[0]
    u_top_ten_recs = numpy.where(u_rankings <= 10)[0]
    user_id =str(list(idcte_to_internal_user_ids.keys())[list(idcte_to_internal_user_ids.values()).index(user)])
    for m in u_top_ten_recs:
        items = (list(idfam1_to_internal_item_ids.keys())[list(idfam1_to_internal_item_ids.values()).index(m)]) 
        HybridRecommendations=HybridRecommendations.append(pd.DataFrame({'itemId': items,'userId': user_id}, index=[0]), ignore_index=True)

In [None]:
HybridRecommendations=HybridRecommendations[['userId','itemId']]
HybridRecommendations.head()

In [None]:
HybridRecommendations.to_csv("/user-home/libraries/Sampled_data/datasets/HybridResult.csv", index=False)

### Hybrid Recommender can also be used to predict similar items given some item IDs

In [None]:
hybrid_model.predict_similar_items(item_features=item_indicator_features,item_ids=[3,55,90], n_similar=10)