<a href="https://colab.research.google.com/github/ianwong74/Jester-Joke-Recommender-System/blob/main/csda_1040_lab1_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install html2text
!pip install nltk
!pip install db-sqlite3
!pip install fastai==1.0.61
!pip install pytest-shutil
!pip install viola

Collecting html2text
  Downloading https://files.pythonhosted.org/packages/ae/88/14655f727f66b3e3199f4467bafcc88283e6c31b562686bf606264e09181/html2text-2020.1.16-py3-none-any.whl
Installing collected packages: html2text
Successfully installed html2text-2020.1.16
Collecting db-sqlite3
  Downloading https://files.pythonhosted.org/packages/ff/00/e1f3d7bf1e0bff7c0574c0d5535c041e139d4ce43db196147e4c62f52ed5/db-sqlite3-0.0.1.tar.gz
Collecting db
  Downloading https://files.pythonhosted.org/packages/a9/22/f65d64c83e63790b3273c6adb3bff338ad594f46d84b41bd1f94593b40a6/db-0.1.1.tar.gz
Collecting antiorm
[?25l  Downloading https://files.pythonhosted.org/packages/0b/f8/71baa4824d9666c1be51d117119579a97f461ddbded48b2e01a6ad0554b5/antiorm-1.2.1.tar.gz (171kB)
[K     |████████████████████████████████| 174kB 27.6MB/s 
[?25hBuilding wheels for collected packages: db-sqlite3, db, antiorm
  Building wheel for db-sqlite3 (setup.py) ... [?25l[?25hdone
  Created wheel for db-sqlite3: filename=db_sqlite3

In [4]:
import sqlite3 as db
import re
import os
import codecs
import pandas as pd
import numpy as np
import html2text
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import random
import shutil
import functools
import time
import warnings
import urllib.request
warnings.filterwarnings('ignore') 
from fastai.collab import *
from fastai.tabular import *
#from google.colab import drive
#drive.mount("/content/drive", force_remount=True)
np.set_printoptions(precision=6, suppress=True)
from sklearn.metrics.pairwise import euclidean_distances
from io import StringIO
import random
import ipywidgets as widgets
from IPython.display import display

In [5]:
# download jester_jokes.db from shared file from Google drive
# CSDA 1040 Team, please update the shared file id if you want to use your train result. Otherwise you are using the team's master version.

# https://drive.google.com/file/d/1fCapOdOj5OjqX9snEWqYgBMQvU6EGWnA/view?usp=sharing
!gdown --id '1fCapOdOj5OjqX9snEWqYgBMQvU6EGWnA' --output jj 
cn = db.connect('jj')
cmd = cn.cursor()
# Record Count
#cmd.execute('select count(1) as cnt from jokes')
#print(cmd.fetchall())

Downloading...
From: https://drive.google.com/uc?id=1fCapOdOj5OjqX9snEWqYgBMQvU6EGWnA
To: /content/jj
143MB [00:00, 185MB/s]


In [6]:
# Check Rating table
#cmd.execute('select * from ratings limit 1')
#print(cmd.fetchall())

In [7]:
# download jester_jokes.pkl learner file from Google drive
# https://drive.google.com/file/d/1-1RYLX83MR5CP0E5Uw0stjNo82V9ns3o/view?usp=sharing
!gdown --id '1-1RYLX83MR5CP0E5Uw0stjNo82V9ns3o' --output pkl

Downloading...
From: https://drive.google.com/uc?id=1-1RYLX83MR5CP0E5Uw0stjNo82V9ns3o
To: /content/pkl
7.44MB [00:00, 65.6MB/s]


In [8]:
# define class & properties
class Jester_Jokes():
    def __init__(self):
      """constructor: load model, get intial distances and preds""" 
      self.__learn = load_learner('.','pkl')
      self.__jokes = pd.read_sql('select * from jokes', cn)
      self.__mtx = self.__create_matrix()
      self.__start_dist = self.__max_distance()
      self.__nn_index, self.__nn_dist = self.__nearest_user()    
      self.__get_preds()
    
    def __create_matrix(self):
      """pop out most of the matrix (not first col) and wedge in a fake user row at the start"""
      df = pd.read_sql('select * from ratings', cn)
      new_user = np.array([99] * 101)
      mtx = np.concatenate(([new_user], df.iloc[:, 1:].values))
      return mtx

    def __max_distance(self):
      """used as a starting distance for a crude confidence level"""
      dists = euclidean_distances(self.__mtx, [self.__mtx[0]])
      return dists[1:].max()
    
    def __nearest_user(self):
      """find our nearest neighbour, return their id and distance"""
      dists = euclidean_distances(self.__mtx, [self.__mtx[0]])
      nn_dist = dists[1:].min()
      idxs,_ = np.where(dists == nn_dist)
      nn_index  = idxs[0] - 1 # -1 because i've wedged the fake one in there
      return nn_index, nn_dist

    def __get_preds(self):
      """fetches preds and stores them (we might stay nearest to this user for several jokes)"""   
      unrated = [i+1 for i in range(0,100) if self.__mtx[0][i] == 99]
      rows = [{"user_id":self.__nn_index, "joke_id":joke, "rating":0.} for joke in unrated]
      df = pd.DataFrame(rows)
      preds = {}
 
      for index, row in df.iterrows():
        _,_,pred = self.__learn.predict(df.loc[index])
        preds[pred.item()] = int(row.joke_id)
      
      self.__preds = preds
      self.__sorted_keys = sorted(preds)

    def add_rating(self, joke, rating):
      """add a rating to the matrix, get new preds if we're now nearer a different user"""
      self.__mtx[0, (joke-1)] = rating

      nn_index, nn_dist = self.__nearest_user()
    
      if(self.__nn_index != nn_index): 
        self.__nn_index = nn_index
        self.__nn_dist = nn_dist
        self.__get_preds()

    def next_joke(self):
      """return the next joke to show the user and predicted rating"""
      if(len(self.__sorted_keys) == 0):
        return None, None
      else:
        pred = self.__sorted_keys.pop()
        joke = self.__preds.pop(pred)
        return joke, pred

    def joke_text(self, joke_num):
      """return joke text for the ui"""
      return self.__jokes.loc[joke_num - 1, "joke"]

    @property
    def confidence(self):
      """return a very crude confidence level in our predictions based on nn distances"""
      # this is really just a function of how close we are to a real user so it's smoke and mirrors 
      # for the ui but it shows how we're warming up. * 1.2 just "feels about right". 
      c = ((self.__start_dist - self.__nn_dist) / self.__start_dist) * 1.2
      return np.clip(c, 0, 1)

    @property
    def unrated_jokes(self):
      """number of jokes still unrated for the ui"""
      return len([i+1 for i in range(0,100) if self.__mtx[0][i] == 99])

In [9]:
# create the recommender
jester = Jester_Jokes()
ui_cold_start_container = None
ui_cold_start_sliders = None
ui_html = None
ui_slider = None
ui_pred = None
ui_confidence_bar = None
ui_confidence_lbl = None
ui_rated_bar = None
ui_rated_lbl = None
ui_sumbit = None
ui_container = None

def create_joke(joke, predicted_rating=0, show_prediction=True):
  # creates UI to display and rate one joke and returns it
  if(joke is not None): joke_text = jester.joke_text(joke)
  else:                 joke_text = ""

  #https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Styling.html

  s = "<br><div style='font-size:15px;' align='center'>" + joke_text + "</div><br>" 
  html = widgets.HTML(value=s)
  html.layout = widgets.Layout(border="none", width = '80%', height='auto')
  slider = widgets.FloatSlider(description='Your Rating:', value=0, min=-10, max=10, step=0.01, readout=True, readout_format='.2f')
  slider.tag = joke
  slider.style.handle_color = 'lightblue'
  pbar = widgets.FloatSlider(description='Predicted  : ', value=predicted_rating, min=-10, max=10, step=0.01, readout=True, readout_format='.2f', style={'bar_color': 'lightblue'}, disabled=True)                   
  items = [slider]
  if show_prediction: items.append(pbar)
  grid = widgets.GridBox(children=items)
  grid.layout = widgets.Layout(grid_template_columns='auto', width='auto')
  box = widgets.VBox([html, grid])
  box.layout = widgets.Layout(align_items="center", width='auto')
  return box, slider
  
def render_cold_start():
  # renders the cold-start UI, 5 jokes, 1 submit, no predictions
  global ui_cold_start_container
  global ui_cold_start_sliders

  jokes = []
  children = []
  sliders = []

  for i in range(0,10):
    n = random.randint(1,100)
    jokes.append(n)

  for joke in jokes:
    container, slider = create_joke(joke=joke, show_prediction=False)
    children.append(container)
    sliders.append(slider)

  btn = widgets.Button(description="Rate")
  btn.layout.margin = '20px'
  btn.on_click(on_click_cold_start)
  children.append(btn)

  box = widgets.VBox(children)
  box.layout = widgets.Layout(align_items='center', width='auto')
  display(box)

  ui_cold_start_container = box
  ui_cold_start_sliders = sliders
  
def on_click_cold_start(btn):
  # cold-start submit, rate the jokes and switch to the normal UI
  global ui_cold_start_container
  global ui_container
  for slider in ui_cold_start_sliders:
    jester.add_rating(slider.tag, slider.value)
  ui_cold_start_container.close()
  del ui_cold_start_container
  ui_container.layout.visibility = 'visible'
  on_click_recommender(None)

def render_recommender():
  global ui_html
  global ui_slider
  global ui_pred
  global ui_confidence_bar
  global ui_confidence_lbl
  global ui_rated_bar
  global ui_rated_lbl
  global ui_sumbit
  global ui_container
  
  container, ui_slider = create_joke(None, 0)
  ui_html = container.children[0]
  ui_slider = container.children[1].children[0]
  ui_pred = container.children[1].children[1] 
  
  btn = widgets.Button(description='Rate')
  btn.layout.margin = '20px'
  btn.on_click(on_click_recommender)
  ui_sumbit = btn

  ui_confidence_bar = widgets.FloatProgress(description='Confidence Level:', value=jester.confidence, min=0, max=1, style={'bar_color': 'lightblue'})
  ui_confidence_lbl = widgets.Label('{:.0%}'.format(jester.confidence) + ' confidence in prediction accuracy')
  ui_rated_bar = widgets.IntProgress(description='Jokes Rated:', value=(100 - jester.unrated_jokes), min=0, max=100, style={'bar_color': 'lightblue'})
  ui_rated_lbl = widgets.Label('You have rated ' + str(100 - jester.unrated_jokes) + '/100 jokes')

  items = [ui_confidence_bar,
           ui_confidence_lbl,
           ui_rated_bar,
           ui_rated_lbl
          ]

  grid = widgets.GridBox(children=items)
  grid.layout = widgets.Layout(grid_template_columns='auto', grid_gap='20px',
                               width='auto', padding='5px', border='none')
  
  box = widgets.VBox([container, btn, grid])
  box.layout = widgets.Layout(align_items='center', padding='10px', width='auto')
  display(box)
  box.layout.visibility = 'hidden'
  ui_container = box

def on_click_recommender(btn):
  # normal UI on submit
  global ui_html
  global ui_slider
  global ui_pred
  global ui_confidence_bar
  global ui_confidence_lbl
  global ui_rated_bar
  global ui_rated_lbl
  global ui_sumbit

  if(btn is not None): jester.add_rating(ui_slider.tag, ui_slider.value)
  joke, pred = jester.next_joke()
  
  if(joke is not None): joke_text = jester.joke_text(joke)
  else:                 joke_text = 'Complete rating all jokes'
  ui_html.value = '<br><div style=''font-size:15px;'' align=''center''>' + joke_text + '</div><br>'

  ui_slider.tag = joke
  ui_slider.value = 0
  if(pred is not None): ui_pred.value = pred
  ui_confidence_bar.value = jester.confidence
  ui_confidence_lbl.value = '{:.0%}'.format(jester.confidence) + ' confidence in prediction accuracy'
  ui_rated_bar.value = (100 - jester.unrated_jokes)
  ui_rated_lbl.value = 'Rated ' + str(100 - jester.unrated_jokes) + ' out of 100 jokes'
  ui_sumbit.disabled = (joke is None)

render_cold_start()
render_recommender()

VBox(children=(VBox(children=(HTML(value='<br><div style=\'font-size:15px;\' align=\'center\'>  \n  \nAn Asian…

VBox(children=(VBox(children=(HTML(value="<br><div style='font-size:15px;' align='center'></div><br>", layout=…