In [None]:
import pandas as pd
import numpy as np
from myfm import MyFMRegressor, RelationBlock
from sklearn.preprocessing import OneHotEncoder
import eland as ed

In [None]:
# Run import-movielens-100k.ipynb to import data
log_df = ed.DataFrame('localhost:9220', 'logana_log')

In [None]:
ratings = ed.eland_to_pandas(log_df[['request.id.user', 'response.results.doc_movie.id', 'response.results.doc_movie.integer.rating']])\
    .rename(columns={'request.id.user': 'user_id',
                     'response.results.doc_movie.id':
                     'movie_id', 'response.results.doc_movie.integer.rating': 'rating'})\
    .reset_index()
ratings

In [None]:
user_ids, user_indices = np.unique(ratings.user_id, return_inverse=True)
movie_ids, movie_indices = np.unique(ratings.movie_id, return_inverse=True)

In [None]:
columns = [x for x in log_df.columns if x.startswith('request.attributes.') or x == 'request.id.user']
users = ed.eland_to_pandas(log_df[columns]).groupby('request.id.user').head(1).rename(columns={x:x.split('.')[-1] for x in columns})
users = users.rename(columns={'user': 'user_id'}).set_index('user_id')
users = users.drop(['zip_code'], axis=1)
users

In [None]:
columns = [x for x in log_df.columns if x.startswith('response.results.doc_movie.')]
movies = ed.eland_to_pandas(log_df[columns]).groupby('response.results.doc_movie.id').head(1).rename(columns={x:x.split('.')[-1] for x in columns})
movies = movies.rename(columns={'id': 'movie_id'}).set_index('movie_id')
movies = movies.drop(['rating', 'imdb_url', 'zip_code'], axis=1)
movies

In [None]:
user_ohe = OneHotEncoder(handle_unknown='ignore').fit(users.reset_index()) # include user id as feature
movie_ohe = OneHotEncoder(handle_unknown='ignore').fit(movies.reset_index())

In [None]:
X_user = user_ohe.transform(
    users.reindex(user_ids).reset_index()
)
X_movie = movie_ohe.transform(
    movies.reindex(movie_ids).reset_index()
)

In [None]:
block_user = RelationBlock(user_indices, X_user)
block_movie = RelationBlock(movie_indices, X_movie)

In [None]:
fm = MyFMRegressor(rank=2).fit(None, ratings.rating, X_rel=[block_user, block_movie])

In [None]:
prediction_df = pd.DataFrame([
    dict(user_id=user_id, movie_id=movie_id,
         user_index=user_index, movie_index=movie_index)
    for user_index, user_id in enumerate(user_ids)
    for movie_index, movie_id in enumerate(movie_ids)
])

In [None]:
predicted_rating = fm.predict(None, [
    RelationBlock(prediction_df.user_index, X_user),
    RelationBlock(prediction_df.movie_index, X_movie)
])

In [None]:
prediction_df['prediction']  = predicted_rating

In [None]:
prediction_df.merge(ratings.rename(columns={'rating':'ground_truth'}), how='left')
