<a href="https://colab.research.google.com/github/joedockrill/jester-collab-filtering/blob/master/JesterCollabDemo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Joke recommendation system using fastai.collab & Jester

<img src="https://drive.google.com/uc?export=download&id=1C48DTaTbg3mYeMIAhAqrFQpP41i0m6J2" align="left"/>

This demo shows the use of a [fastai](https://fast.ai) collaborative filtering model to recommend jokes from the [jester dataset](http://eigentaste.berkeley.edu/dataset/).

The system makes predictions for a new user the model hasn't seen (because I don't like being told "you can't do that") by using euclidean distances to known users, which works pretty well. Since I'm doing that anyway I also use it to create a very crude confidence level in the predictions coming from the model.

The system cold-starts with a few jokes from the densest part of the ratings matrix and then switches to predictions. 

# Please Note
As with all of my heroku hosted demos, I need to load models and data files across from Google Drive. This may take a few seconds. If the UI hasn't rendered below then it's still downloading. 

# Disclaimer
I did not create the jokes dataset, and I find it incredibly likely that you will find at least some of the jokes distasteful and/or offensive.

In [1]:
# files we need to download: model, ratings matrix, jokes
import urllib.request

x = urllib.request.urlretrieve("https://drive.google.com/uc?export=download&id=1JXmaUWKoZl8J2RGQeH76zBcZKoPQQnm2", "ratings-matrix.csv")
x = urllib.request.urlretrieve("https://drive.google.com/uc?export=download&id=1-8w7SaBaA6Yp4Rr3zlf1IcaFgYWeLKBQ", "jokes.csv")
x = urllib.request.urlretrieve("https://drive.google.com/uc?export=download&id=1357gAY2nYI_Jf5wKrmvE-zgN2FhrQqVo","jester.pkl")

In [2]:
# recommendation system

from fastai.collab import *
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances

# implemented as a class because it's stateful 
# contains hard-coded paths because it's a mickey-mouse demo ;-)

class JokeRecommender():
  def __init__(self):
    """constructor: load model, get intial distances and preds"""    
    self.__learn = load_learner(".", "jester.pkl")
    self.__jokes = pd.read_csv("jokes.csv")
    self.__mtx = self.__create_matrix()
    self.__start_dist = self.__max_distance()
    self.__nn_index, self.__nn_dist = self.__nearest_user()    
    self.__get_preds()

  def __create_matrix(self):
    """pop out most of the matrix (not first col) and wedge in a fake user row at the start"""
    df = pd.read_csv("ratings-matrix.csv")
    new_user = np.array([99] * 100)
    mtx = np.concatenate(([new_user], df.iloc[:, 1:].values))
    return mtx

  def __max_distance(self):
    """used as a starting distance for a crude confidence level"""
    dists = euclidean_distances(self.__mtx, [self.__mtx[0]])
    return dists[1:].max()
    
  def __nearest_user(self):
    """find our nearest neighbour, return their id and distance"""
    dists = euclidean_distances(self.__mtx, [self.__mtx[0]])
    nn_dist = dists[1:].min()
    idxs,_ = np.where(dists == nn_dist)
    nn_index  = idxs[0] - 1 # -1 because i've wedged the fake one in there
    return nn_index, nn_dist

  def __get_preds(self):
    """fetches preds and stores them (we might stay nearest to this user for several jokes)"""   
    unrated = [i+1 for i in range(0,100) if self.__mtx[0][i] == 99]
    rows = [{"user_id":self.__nn_index, "joke_id":joke, "rating":0.} for joke in unrated]
    df = pd.DataFrame(rows)
    preds = {}
 
    for index, row in df.iterrows():
      _,_,pred = self.__learn.predict(df.loc[index])
      preds[pred.item()] = int(row.joke_id)
      
    self.__preds = preds
    self.__sorted_keys = sorted(preds)

  def add_rating(self, joke, rating):
    """add a rating to the matrix, get new preds if we're now nearer a different user"""
    self.__mtx[0, (joke-1)] = rating

    nn_index, nn_dist = self.__nearest_user()
    
    if(self.__nn_index != nn_index): 
      self.__nn_index = nn_index
      self.__nn_dist = nn_dist
      self.__get_preds()

  def next_joke(self):
    """return the next joke to show the user and predicted rating"""
    if(len(self.__sorted_keys) == 0):
      return None, None
    else:
      pred = self.__sorted_keys.pop()
      joke = self.__preds.pop(pred)
      return joke, pred

  def joke_text(self, joke_num):
    """return joke text for the ui"""
    return self.__jokes.loc[joke_num - 1, "joke"]

  @property
  def confidence(self):
    """return a very crude confidence level in our predictions based on nn distances"""
    # this is really just a function of how close we are to a real user so it's smoke and mirrors 
    # for the ui but it shows how we're warming up. * 1.2 just "feels about right". 
    c = ((self.__start_dist - self.__nn_dist) / self.__start_dist) * 1.2
    return np.clip(c, 0, 1)

  @property
  def unrated_jokes(self):
    """number of jokes still unrated for the ui"""
    return len([i+1 for i in range(0,100) if self.__mtx[0][i] == 99]) 


In [3]:
# create the recommender
jester = JokeRecommender()

# UI
import ipywidgets as widgets
from IPython.display import display

ui_cold_start_container = None
ui_cold_start_sliders = None
ui_recommender_container = None
ui_slider = None

def create_joke(joke, predicted_rating=0, show_prediction=True):
  # creates UI to display and rate one joke and returns it
  if(joke is not None): joke_text = jester.joke_text(joke)
  else:                 joke_text = "There are no more jokes left for you to rate.<p>:-("

  s = "<div style='font-size:20px;' align='center'>" + joke_text + "</div>" 
  html = widgets.HTML(value=s)
  html.layout = widgets.Layout(border="solid", padding="5px", width = "100%")
  
  slider = widgets.IntSlider(value=0, min=-10, max=10, step=1, readout=False)
  slider.layout.width = "100%"
  slider.tag = joke
  pbar = widgets.IntProgress(value=predicted_rating, min=-10, max=10, step=1)                            
  pbar.layout.width = "100%"

  items = [widgets.Label("Your Rating:"), slider]
  if show_prediction:
    items.append(widgets.Label("Predicted Rating:"))
    items.append(pbar)
    
  grid = widgets.GridBox(children=items)
  grid.layout = widgets.Layout(grid_template_columns="130px auto", width="75%")
  box = widgets.VBox([html, grid])
  box.layout = widgets.Layout(align_items="center", width="100%")

  return box, slider
  
def render_cold_start():
  # renders the cold-start UI, 5 jokes, 1 submit, no predictions
  jokes = [8, 20, 5, 17, 7]
  children = []
  sliders = []

  for joke in jokes:
    container, slider = create_joke(joke=joke, show_prediction=False)
    children.append(container)
    sliders.append(slider)

  btn = widgets.Button(description="Submit")
  btn.layout.margin = "20px"
  btn.on_click(on_click_cold_start)
  children.append(btn)

  box = widgets.VBox(children)
  box.layout = widgets.Layout(align_items="center", padding="10px", width="100%")
  display(box)

  return box, sliders

def on_click_cold_start(btn):
  # cold-start submit, rate the jokes and switch to the normal UI
  global ui_cold_start_container
  
  for slider in ui_cold_start_sliders:
    jester.add_rating(slider.tag, slider.value)
    
  ui_cold_start_container.close()
  del ui_cold_start_container
  render_recommender()

def render_recommender():
  global ui_recommender_container
  global ui_recommender_slider

  if(ui_recommender_container): 
    ui_recommender_container.close()
    del ui_recommender_container

  joke, pred = jester.next_joke()
  container, ui_recommender_slider = create_joke(joke, pred)
  
  btn = widgets.Button(description="Submit")
  btn.layout.margin = "20px"
  btn.on_click(on_click_recommender)
  btn.disabled = (joke is None)

  items = [widgets.Label("Confidence Level:"),
           widgets.FloatProgress(value=jester.confidence, min=0, max=1),
           widgets.Label("{:.0%}".format(jester.confidence) + " confidence in prediction accuracy"),
           widgets.Label("Jokes Rated:"),
           widgets.IntProgress(value=(100 - jester.unrated_jokes), min=0, max=100),
           widgets.Label("You have rated " + str(100 - jester.unrated_jokes) + " out of 100 jokes")
          ]
    
  grid = widgets.GridBox(children=items)
  grid.layout = widgets.Layout(grid_template_columns="110px 300px auto", grid_gap="10px",
                               width="100%", padding="5px", border="solid")
  
  box = widgets.VBox([container, btn, grid])
  box.layout = widgets.Layout(align_items="center", padding="10px", width="100%")
  display(box)

  ui_recommender_container = box

def on_click_recommender(btn):
  # normal UI on submit
  jester.add_rating(ui_recommender_slider.tag, ui_recommender_slider.value)
  render_recommender()

ui_cold_start_container, ui_cold_start_sliders = render_cold_start()

VBox(children=(VBox(children=(HTML(value="<div style='font-size:20px;' align='center'>Q. Did you hear about th…

VBox(children=(VBox(children=(HTML(value='<div style=\'font-size:20px;\' align=\'center\'>A guy goes into conf…

VBox(children=(VBox(children=(HTML(value='<div style=\'font-size:20px;\' align=\'center\'>A man arrives at the…