```yaml
titan: v1
service:
    image: scipy
    machine: 
        cpu: 4
        memory: 2048MB
```

In [1]:
import os
import time
import json
import functools
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds

In [2]:
# 1M Dataset
df_movies = pd.read_csv("https://raw.githubusercontent.com/jfuentesibanez/datasets/master/movies.csv", usecols=['movieId', 'title', 'genre'], sep=';', dtype={'movieId': 'int32', 'title': 'str', 'genre': 'str'})

In [3]:
# 1M Dataset
df_ratings = pd.read_csv("https://raw.githubusercontent.com/jfuentesibanez/datasets/master/ratings.csv", usecols=['userId', 'movieId', 'rating'], sep=';', dtype={'userId': 'int32', 'movieId': 'int32', 'rating': 'float32'})

In [4]:
df_ratings=df_ratings[:200000]
df_movie_features = df_ratings.pivot(
    index='userId',
    columns='movieId',
    values='rating'
).fillna(0)

In [5]:
R = df_movie_features.as_matrix()
user_ratings_mean = np.mean(R, axis = 1)

  """Entry point for launching an IPython kernel.


In [6]:
U, sigma, Vt = svds(R, k = 50)
sigma = np.diag(sigma)
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)

In [7]:
preds_df = pd.DataFrame(all_user_predicted_ratings, columns = df_movie_features.columns)

In [8]:
def recommend_movies(preds_df, userID, movies_df, original_ratings_df, num_recommendations=5):
    
    # Retrieve and sort user top rated movies and top predictions 
    user_row_number = userID - 1 
    sorted_user_predictions = preds_df.iloc[user_row_number].sort_values(ascending=False) 

    user_data = original_ratings_df[original_ratings_df.userId == (userID)]
    user_full = (user_data.merge(movies_df, how = 'left', left_on = 'movieId', right_on = 'movieId').
                     sort_values(['rating'], ascending=False)
                 )
    predictions = (movies_df[~movies_df['movieId'].isin(user_full['movieId'])]).merge(pd.DataFrame(sorted_user_predictions).reset_index(), how = 'left', left_on = 'movieId',
               right_on = 'movieId').rename(columns = {user_row_number: 'Predictions'}).sort_values('Predictions', ascending = False).iloc[:num_recommendations, :-1]
                      
    return user_full, predictions

In [9]:
# Store total elapsed time, total requests, last 10 processing times in milliseconds
metrics_ = {'total': 0, 'requests': 0, 'recent': [] }

# Maximum processing time metrics to store from most recent to oldest one
max_recent_items = 20


def store_metrics(start):
  elapsed = int((time.time() - start) * 1000)
  metrics_['requests'] += 1
  metrics_['total'] += elapsed
  metrics_['recent'][0:0] = [elapsed]
  metrics_['recent'] = metrics_['recent'][0:max_recent_items] if len(metrics_['recent']) >= max_recent_items else metrics_['recent']


def measure(fn):
  @functools.wraps(fn)
  def wrapper(*args, **kwds):
    start = time.time()
    try:
      return fn(*args, **kwds)
    finally:
      store_metrics(start)
  return wrapper


def endpoint(fn):
  @functools.wraps(fn)
  def wrapper(*args, **kwds):
    req = args[0] if len(args) > 0 else '{}'
    request = json.loads(req)
    args = request.get('args', {})
    return fn(args, **kwds)
  return wrapper

In [10]:
def metrics():
  # Time units in milliseconds
  total_ = metrics_['total']
  requests = metrics_['requests']
  average_time = int(total_ / requests) if requests > 0 else 0
  data = {
      'requests': requests,
      'average': average_time,
      'total_time': total_,
      'last_calls': metrics_['recent']
  }
  return json.dumps(data, indent=2)

@measure
@endpoint
def recompute_svd(args):
  k_txt = args.get('param', args.get('50', None))
  k = int(list(filter(str.isdigit, k_txt))[0])
  U, sigma, Vt = svds(R, k = k)
  sigma = np.diag(sigma)
  all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)
  return str(k)

@endpoint
def viewed(args):
  user_id_txt = args.get('param', args.get('001', None))
  user_id = int(list(filter(str.isdigit, user_id_txt))[0])
  already_rated, predictions = recommend_movies(preds_df, user_id, df_movies, df_ratings, 10)
  return already_rated.title.to_string(index=False)

@endpoint
def recommended(args):
  user_id_txt = args.get('param', args.get('001', None))
  user_id = int(list(filter(str.isdigit, user_id_txt))[0])
  already_rated, predictions = recommend_movies(preds_df, user_id, df_movies, df_ratings, 10)
  return predictions.title.to_string(index=False)

In [11]:
# Mock request object for local API testing
args = {
    'param': ['400']
}
REQUEST = json.dumps({ 'args': args })

In [12]:
# GET /metrics
print(metrics())

{
  "requests": 0,
  "average": 0,
  "total_time": 0,
  "last_calls": []
}


In [13]:
# POST /viewed
print(viewed(REQUEST))

                                      Glory (1989)
                            American Beauty (1999)
                           Sixth Sense  The (1999)
 Star Wars: Episode V - The Empire Strikes Back...
                   Ferris Bueller's Day Off (1986)
                                Toy Story 2 (1999)
                  Silence of the Lambs  The (1991)
                                   Die Hard (1988)
                                   Election (1999)
                        Usual Suspects  The (1995)
                                   Rain Man (1988)
                           Schindler's List (1993)
                              Lethal Weapon (1987)
                                 GoodFellas (1990)
                        Wrong Trousers  The (1993)
                        Blues Brothers  The (1980)
                    When Harry Met Sally... (1989)
                                Skulls  The (2000)
                         Better Off Dead... (1985)
                             Pr

In [14]:
# POST /recommended
print(recommended(REQUEST))

                  Shawshank Redemption  The (1994)
         Star Wars: Episode IV - A New Hope (1977)
                        Saving Private Ryan (1998)
 Star Wars: Episode VI - Return of the Jedi (1983)
                               Pulp Fiction (1994)
                            Terminator  The (1984)
                                  Toy Story (1995)
                        Breakfast Club  The (1985)
                                  Airplane! (1980)
                 E.T. the Extra-Terrestrial (1982)


In [15]:
# POST /recompute_svd
print(recompute_svd(REQUEST))

400
