<a href="https://colab.research.google.com/github/ianwong74/Jester-Joke-Recommender-System/blob/main/csda_1040_lab1_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CSDA1040 - Group 5 - Lab1 - Jester Jokes Recommender
** This will take up to 1 minute for initial load, Thank you for your patience*

In [1]:
# Comment these block out before publishing to Binder, packages listed in requirements.txt will be preinstalled when building the image
#from IPython.utils import io
#with io.capture_output() as captured: 
#  !pip install html2text
#  !pip install nltk
#  !pip install db-sqlite3
#  !pip install fastai==1.0.61
#  !pip install pytest-shutil
#  !pip install viola

In [2]:
import sqlite3 as db
import re
import os
import codecs
import pandas as pd
import numpy as np
import html2text
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import random
import shutil
import functools
import time
import warnings
import urllib.request
warnings.filterwarnings('ignore') 
from fastai.collab import *
from fastai.tabular import *
#from google.colab import drive
#drive.mount("/content/drive", force_remount=True)
np.set_printoptions(precision=6, suppress=True)
from sklearn.metrics.pairwise import euclidean_distances
import random
import ipywidgets as widgets
from IPython.display import display

In [3]:
# download jester_jokes.db from shared file from Google drive
# CSDA 1040 Team, please update the shared file id if you want to use your train result. Otherwise you are using the team's master version.

# https://drive.google.com/file/d/1fCapOdOj5OjqX9snEWqYgBMQvU6EGWnA/view?usp=sharing
from IPython.utils import io
with io.capture_output() as captured: 
  !gdown --id '1fCapOdOj5OjqX9snEWqYgBMQvU6EGWnA' --output jj 
cn = db.connect('jj')
cmd = cn.cursor()
# Record Count
#cmd.execute('select count(1) as cnt from jokes')
#print(cmd.fetchall())

In [4]:
# Commit out to avoid output to web app
# Check Rating table
# cmd.execute('select * from ratings limit 1')
# print(cmd.fetchall())

In [5]:
# download jester_jokes.pkl learner file from Google drive
# https://drive.google.com/file/d/1-1RYLX83MR5CP0E5Uw0stjNo82V9ns3o/view?usp=sharing
from IPython.utils import io
with io.capture_output() as captured: 
  !gdown --id '1-1RYLX83MR5CP0E5Uw0stjNo82V9ns3o' --output pkl

In [6]:
# define class & properties
class Jester_Jokes():
    # class init function, load model jester_jokes (jj) from exported model from shared Google drive prepared by csda_1040_lab1.ipynb
    def __init__(self):
      # load mode to learner
      self.__learn = load_learner('.','pkl')
      # load jester_jokes.db db-sqlite3 file, extract joke text
      self.__jokes = pd.read_sql('select * from jokes', cn)
      # call create matrix function which load first dummy rating record with all ratings records from jester_jokes.db
      self.__mtx = self.__create_matrix()
      # call max_distance function,
      self.__start_dist = self.__max_distance()
      self.__nn_index, self.__nn_dist = self.__nearest_user()    
      self.__get_preds()
    
    def __create_matrix(self):
      # load all ratings to df from jester_jokes.db
      df = pd.read_sql('select * from ratings', cn)
      # dummy user
      new_user = np.array([99] * 101)
      # union dummy user with all ratings
      mtx = np.concatenate(([new_user], df.iloc[:, 1:].values))
      return mtx

    def __max_distance(self):
      # max dist between dummy user with all ratings, get max distance excluding dummy user
      dists = euclidean_distances(self.__mtx, [self.__mtx[0]])
      return dists[1:].max()
    
    def __nearest_user(self):
       # min dist between dummy user with all ratings, get max distance excluding dummy user
      dists = euclidean_distances(self.__mtx, [self.__mtx[0]])
      nn_dist = dists[1:].min()
      idxs,_ = np.where(dists == nn_dist)
      nn_index  = idxs[0] - 1 #offet dummy user
      return nn_index, nn_dist

    def __get_preds(self):
      # init unrated joke list
      unrated = [i+1 for i in range(0,100) if self.__mtx[0][i] == 99]
      rows = [{"user_id":self.__nn_index, "joke_id":joke, "rating":0.} for joke in unrated]
      df = pd.DataFrame(rows)
      preds = {}
      # populate unrated rating with predicted rating
      for index, row in df.iterrows():
        _,_,pred = self.__learn.predict(df.loc[index])
        preds[pred.item()] = int(row.joke_id)
      self.__preds = preds
      self.__sorted_keys = sorted(preds)

    def add_rating(self, joke, rating):
      # populate rating to matrix get nearest distance user and predict
      self.__mtx[0, (joke-1)] = rating
      nn_index, nn_dist = self.__nearest_user()
      if(self.__nn_index != nn_index): 
        self.__nn_index = nn_index
        self.__nn_dist = nn_dist
        self.__get_preds()

    def next_joke(self):
      # load next jokes by sorted recommandation matrix
      if(len(self.__sorted_keys) == 0):
        return None, None
      else:
        pred = self.__sorted_keys.pop()
        joke = self.__preds.pop(pred)
        return joke, pred

    def joke_text(self, joke_num):
      # return joke text by index (joke_no - 1, since joke index started with 0)
      return self.__jokes.loc[joke_num - 1, "joke"]

    @property
    def confidence(self):
      # return confidence level
      c = ((self.__start_dist - self.__nn_dist) / self.__start_dist) * 1.2
      return np.clip(c, 0, 1)

    @property
    def unrated_jokes(self):
      return len([i+1 for i in range(0,100) if self.__mtx[0][i] == 99])

In [14]:
# create the recommender
# from io import StringIO
jester = Jester_Jokes()
ui_cold_start_container = None
ui_cold_start_sliders = None
ui_html = None
ui_slider = None
ui_pred = None
ui_confidence_bar = None
ui_confidence_lbl = None
ui_rated_bar = None
ui_rated_lbl = None
ui_sumbit = None
ui_container = None

def ftn_create_joke(joke, predicted_rating=0, show_prediction=True):
  # creates UI to display and rate one joke and returns it
  if(joke is not None): joke_text = jester.joke_text(joke)
  else:                 joke_text = ""

  #https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Styling.html

  s = "<br><div style='font-size:15px;' align='center'>" + joke_text + "</div><br>" 
  html = widgets.HTML(value=s)
  html.layout = widgets.Layout(border="none", width = '80%', height='auto')
  slider = widgets.FloatSlider(description='Your Rating:', value=0, min=-10, max=10, step=0.01, readout=True, readout_format='.2f')
  slider.tag = joke
  slider.style.handle_color = 'lightblue'
  pbar = widgets.FloatSlider(description='Predicted  : ', value=predicted_rating, min=-10, max=10, step=0.01, readout=True, readout_format='.2f', style={'bar_color': 'lightblue'}, disabled=True)                   
  items = [slider]
  if show_prediction: items.append(pbar)
  grid = widgets.GridBox(children=items)
  grid.layout = widgets.Layout(grid_template_columns='auto', width='auto')
  box = widgets.VBox([html, grid])
  box.layout = widgets.Layout(align_items="center", width='auto')
  return box, slider
  
def ftn_render_cold_start():
  # renders the cold-start UI, x jokes, 1 submit, no predictions (x number of jokes to learn before recommendation)
  # start with 10% of the jokes
  global ui_cold_start_container
  global ui_cold_start_sliders
  # list init
  jokes = []
  children = []
  sliders = []
  x = 10
  for i in range(0,x):
    n = random.randint(1,100)
    jokes.append(n)
  # add x jokes for initial rating
  for joke in jokes:
    container, slider = ftn_create_joke(joke=joke, show_prediction=False)
    children.append(container)
    sliders.append(slider)
  btn = widgets.Button(description="Rate")
  btn.layout.margin = '20px'
  btn.on_click(ftn_on_click_cold_start)
  children.append(btn)
  box = widgets.VBox(children)
  box.layout = widgets.Layout(align_items='center', width='auto')
  display(box)
  ui_cold_start_container = box
  ui_cold_start_sliders = sliders
  
def ftn_on_click_cold_start(btn):
  # action on first submission 
  global ui_cold_start_container
  global ui_container
  for slider in ui_cold_start_sliders:
    jester.add_rating(slider.tag, slider.value)
  ui_cold_start_container.close()
  del ui_cold_start_container
  ui_container.layout.visibility = 'visible'
  ftn_on_click_recommender(None)

def ftn_render_recommender():
  global ui_html
  global ui_slider
  global ui_pred
  global ui_confidence_bar
  global ui_confidence_lbl
  global ui_rated_bar
  global ui_rated_lbl
  global ui_sumbit
  global ui_container
  container, ui_slider = ftn_create_joke(None, 0)
  ui_html = container.children[0]
  ui_slider = container.children[1].children[0]
  ui_pred = container.children[1].children[1] 
  btn = widgets.Button(description='Rate')
  btn.layout.margin = '20px'
  btn.on_click(ftn_on_click_recommender)
  ui_sumbit = btn
  ui_confidence_bar = widgets.FloatProgress(description='Confidence Level:', value=jester.confidence, min=0, max=1, style={'bar_color': 'lightblue'})
  ui_confidence_lbl = widgets.Label('{:.0%}'.format(jester.confidence) + ' confidence in prediction accuracy')
  ui_rated_bar = widgets.IntProgress(description='Jokes Rated:', value=(100 - jester.unrated_jokes), min=0, max=100, style={'bar_color': 'lightblue'})
  ui_rated_lbl = widgets.Label('You have rated ' + str(100 - jester.unrated_jokes) + '/100 jokes')
  items = [ui_confidence_bar, ui_confidence_lbl, ui_rated_bar, ui_rated_lbl]
  grid = widgets.GridBox(children=items)
  grid.layout = widgets.Layout(grid_template_columns='auto', grid_gap='20px', width='auto', padding='5px', border='none')
  box = widgets.VBox([container, btn, grid])
  box.layout = widgets.Layout(align_items='center', padding='10px', width='auto')
  display(box)
  box.layout.visibility = 'hidden'
  ui_container = box

def ftn_on_click_recommender(btn):
  global ui_html
  global ui_slider
  global ui_pred
  global ui_confidence_bar
  global ui_confidence_lbl
  global ui_rated_bar
  global ui_rated_lbl
  global ui_sumbit
  if(btn is not None): jester.add_rating(ui_slider.tag, ui_slider.value)
  joke, pred = jester.next_joke()
  if joke is not None: joke_text = jester.joke_text(joke) 
  else: joke_test = 'Complete rating all jokes'
  ui_html.value = '<br><div style=''font-size:15px;'' align=''center''>' + joke_text + '</div><br>'
  ui_slider.tag = joke
  ui_slider.value = 0
  if(pred is not None): ui_pred.value = pred
  ui_confidence_bar.value = jester.confidence
  ui_confidence_lbl.value = '{:.0%}'.format(jester.confidence) + ' confidence in prediction accuracy'
  ui_rated_bar.value = (100 - jester.unrated_jokes)
  ui_rated_lbl.value = 'Rated ' + str(100 - jester.unrated_jokes) + ' out of 100 jokes'
  ui_sumbit.disabled = (joke is None)

In [15]:
ftn_render_cold_start()
ftn_render_recommender()

VBox(children=(VBox(children=(HTML(value='<br><div style=\'font-size:15px;\' align=\'center\'>  \n  \nAn Asian…

VBox(children=(VBox(children=(HTML(value="<br><div style='font-size:15px;' align='center'></div><br>", layout=…