In [None]:
from IPython.core.interactiveshell import InteractiveShell
from IPython.display import display, HTML
InteractiveShell.ast_node_interactivity = "all"
%config InlineBackend.figure_format='retina'
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns


import warnings
warnings.filterwarnings('ignore')

In [None]:
from hwer.utils import normalize_affinity_scores_by_user_item, normalize_affinity_scores_by_user

from hwer.utils import unit_length, build_user_item_dict, build_item_user_dict, cos_sim, shuffle_copy
from hwer import HybridRecommender
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from typing import List, Dict, Tuple, Sequence, Type, Set, Optional

from surprise import Dataset
from surprise import accuracy
import pandas as pd
from pathlib import Path

from surprise.model_selection import train_test_split
import numpy as np
from tqdm import tqdm,tqdm_notebook


In [None]:


users = pd.read_csv("users.csv", sep="\t", engine="python")
movies = pd.read_csv("movies.csv", sep="\t", engine="python")
ratings = pd.read_csv("ratings.csv", sep="\t", engine="python")

users['user_id'] = users['user_id'].astype(str)
movies['movie_id'] = movies['movie_id'].astype(str)
ratings['movie_id'] = ratings['movie_id'].astype(str)
ratings['user_id'] = ratings['user_id'].astype(str)

print(users.shape, movies.shape, ratings.shape)


from importlib import reload
import hwer
reload(hwer)




In [None]:
from ast import literal_eval

movies.genres = movies.genres.fillna("[]").apply(literal_eval)
movies['year'] = movies['year'].fillna(-1).astype(int)

movies.keywords = movies.keywords.fillna("[]").apply(literal_eval)
movies.keywords = movies.keywords.apply(lambda x: " ".join(x))

movies.tagline = movies.tagline.fillna("")
text_columns = ["title","keywords","overview","tagline","original_title"]
movies[text_columns] = movies[text_columns].fillna("")

movies['text'] = movies["title"] +" "+ movies["keywords"] +" "+ movies["overview"] +" "+ movies["tagline"] +" "+ movies["original_title"]
movies["title_length"] = movies["title"].apply(len)
movies["overview_length"] = movies["overview"].apply(len)
movies["runtime"] = movies["runtime"].fillna(0.0)


In [None]:
ratings.head().values
user_item_affinities = [[row[0], row[1], row[2]] for row in ratings.values]


In [None]:

from hwer import MultiCategoricalEmbedding, FlairGlove100AndBytePairEmbedding, CategoricalEmbedding, NumericEmbedding, FlairGlove100Embedding
from hwer import Feature, FeatureSet, ContentRecommendation, FeatureType

embedding_mapper = {}
embedding_mapper['gender'] = CategoricalEmbedding(n_dims=1)
embedding_mapper['age'] = CategoricalEmbedding(n_dims=1)
embedding_mapper['occupation'] = CategoricalEmbedding(n_dims=2)
embedding_mapper['zip'] = CategoricalEmbedding(n_dims=2)

embedding_mapper['text'] = FlairGlove100Embedding()
embedding_mapper['numeric'] = NumericEmbedding(2)
embedding_mapper['genres'] = MultiCategoricalEmbedding(n_dims=2)


recsys = ContentRecommendation(embedding_mapper=embedding_mapper, knn_params=None, n_output_dims=8, rating_scale=(1,5))


u1 = Feature(feature_name="gender", feature_type=FeatureType.CATEGORICAL, values=users.gender.values)
u2 = Feature(feature_name="age", feature_type=FeatureType.CATEGORICAL, values=users.age.astype(str).values)
u3 = Feature(feature_name="occupation", feature_type=FeatureType.CATEGORICAL, values=users.occupation.astype(str).values)
u4 = Feature(feature_name="zip", feature_type=FeatureType.CATEGORICAL, values=users.zip.astype(str).values)
user_data = FeatureSet([u1, u2, u3, u4])

i1 = Feature(feature_name="text", feature_type=FeatureType.STR, values=movies.text.values)
i2 = Feature(feature_name="genres", feature_type=FeatureType.MULTI_CATEGORICAL, values=movies.genres.values)
i3 = Feature(feature_name="numeric", feature_type=FeatureType.NUMERIC, values=movies[["title_length", "overview_length", "runtime"]].values)
item_data = FeatureSet([i1, i2, i3])

kwargs = {}
kwargs['user_data'] = user_data
kwargs['item_data'] = item_data

user_vectors, item_vectors = recsys.fit(users.user_id.values, movies.movie_id.values,
               user_item_affinities, **kwargs)




In [None]:
res, dist = zip(*recsys.find_items_for_user(user='1', positive=[], negative=[]))
res = res[:100]

preds = set(movies[movies.movie_id.isin(res)]["title"])
actuals = set(movies.merge(ratings[ratings.user_id=='1'],on='movie_id')["title"])

len(preds.intersection(actuals))


# Code Graveyard

In [None]:

import tensorflow.keras.backend as K
class FixedNorm(tf.keras.constraints.Constraint):
    """
    Refer: 
    https://github.com/keras-team/keras/issues/1580
    https://github.com/tensorflow/tensorflow/issues/33755
    """
    def __init__(self, m=1.):
        self.m = m

    def __call__(self, p):
        p = K.transpose(p)
        unit_norm = p / (K.sqrt(K.sum(K.square(p), axis=0)) + 1e-6)
        unit_norm = K.transpose(unit_norm)
        return unit_norm * self.m

    def get_config(self):
        return {'name': self.__class__.__name__, 'm': self.m}

In [None]:

actual_vs_pred = [(r,cos_sim(item_vectors[self.item_id_to_index[i]],item_vectors[self.item_id_to_index[j]])) for i,j,r in random_item_item_aff]
np.sqrt(np.mean(np.square(np.array([a-p for a,p in actual_vs_pred]))))

In [12]:

import tensorflow.keras.backend as K
import tensorflow as tf
from tensorflow import keras
import numpy as np
batch_size = 2


def generate_training_samples():
    def generator():
        for i in range(batch_size*10):
            yield (np.random.rand(3),np.random.rand(3), np.random.rand()), 5
    return generator

output_shapes = (((3), (3), ()), ())
output_types = (((tf.float32), (tf.float32), tf.float32), tf.float32)
train = tf.data.Dataset.from_generator(generate_training_samples(),
                                       output_types=output_types, output_shapes=output_shapes,)

train = train.shuffle(batch_size).batch(batch_size)

from tensorflow.keras import layers

input_1 = keras.Input(shape=(3,))
input_2 = keras.Input(shape=(3,))
input_3 = keras.Input(shape=(1,))

inputs = K.concatenate([input_1, input_2, input_3])
inputs = tf.keras.layers.Flatten()(inputs)
dense_1 = layers.Dense(16, activation='relu')


x = dense_1(inputs)

x = layers.Dense(8, activation="relu")(x)

pred = layers.Dense(1, activation='linear')(x)

model = keras.Model(inputs=[input_1, input_2, input_3],
                    outputs=[pred])

adam = tf.keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.1, amsgrad=False)
model.compile(optimizer=adam,
              loss=['mean_squared_error'])


model.fit(train, epochs=2)


def generate_prediction_samples():
    def generator():
        for i in range(batch_size*2):
            yield np.random.rand(3),np.random.rand(3), np.random.rand()
    return generator

output_shapes = (3, 3, ())
output_types = (tf.float32, tf.float32, tf.float32)
predict = tf.data.Dataset.from_generator(generate_prediction_samples(),
                                       output_types=output_types, output_shapes=output_shapes,)

predict = predict.batch(batch_size)
next(iter(predict))

model.predict(next(iter(predict)))
model.predict(x=tf.compat.v1.data.make_one_shot_iterator(predict).get_next())
model.predict(x=predict.make_one_shot_iterator().get_next()).reshape((-1))

preds = []
for x in predict:
    preds.append(model.predict(x).reshape((-1)))
    
from more_itertools import flatten
print(preds)
list(flatten(preds))

# model.predict_generator(iter(predict), steps=2)




Epoch 1/2
Epoch 2/2
[array([0.2991138 , 0.51968646], dtype=float32), array([0.36083567, 0.08598872], dtype=float32)]


[0.2991138, 0.51968646, 0.36083567, 0.085988715]

In [None]:
def generate_prediction_samples():
    def generator():
        for i in range(batch_size*2):
#             yield np.random.rand(3),np.random.rand(3)
            yield [np.random.rand(3).reshape((-1,3)),np.random.rand(3).reshape((-1,3)), np.array([np.random.rand()])]


    return generator


model.predict_generator(iter(generate_prediction_samples()()), steps=4)
model.predict_generator(generate_prediction_samples()(), steps=4)


# model.predict(next(iter(generate_prediction_samples()())))
