In [29]:
(ns ch7.nb
    (:require [clojupyter.misc.helper :as helper]
              [clojupyter.misc.display :as display]
              [clojure.java.io :as io]
              [incanter.charts :as c]
              [incanter.core :as i]
              [incanter.datasets :as d]
              [incanter.stats :as s]
              [clojure.string :as string]
              [medley.core :refer [map-vals]]))

# Data inspection

In [16]:
(->> (io/reader "data/ml-100k/ua.base")
     (line-seq)
     (first))

"1\t1\t5\t874965758"

In [17]:
(->> (io/reader "data/ml-100k/u.item")
     (io/reader)
     (line-seq)
     (first))

"1|Toy Story (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?Toy%20Story%20(1995)|0|0|0|1|1|1|0|0|0|0|0|0|0|0|0|0|0|0|0"

# Data parsing

In [20]:
(defn to-long
    [s]
    (Long/parseLong s))

(defn line->rating
    [line]
    (->> (string/split line #"\t")
         (map to-long)
         (zipmap [:user :item :rating])))

(defn load-ratings
    [file]
    (with-open [rdr (io/reader (str "data/ml-100k/" file))]
        (->> (line-seq rdr)
             (map line->rating)
             (into []))))

#'ch7.nb/load-ratings

In [21]:
(->> (load-ratings "ua.base")
     (first))

{:user 1, :item 1, :rating 5}

In [22]:
(defn line->item-tuple
    [line]
    (let [[id name] (string/split line #"\|")]
        (vector (to-long id) name)))

(defn load-items
    [path]
    (with-open [rdr (io/reader (str "data/ml-100k/" path))]
        (->> (line-seq rdr)
             (map line->item-tuple)
             (into {}))))

#'ch7.nb/load-items

In [23]:
(-> (load-items "u.item")
    (get 1))

"Toy Story (1995)"

# Recommender Systems

## Slope one

**Slope One** recommenders are a family of algos, we'll use the weighted version [Original paper](http://lemire.me/fr/abstracts/SDM2005.html).

### Difference

We have to start by calculating the average difference between every item

In [24]:
(defn conj-item-difference
    [dict [i j]]
    (let [difference (- (:rating j) (:rating i))]
        (update-in dict [(:item i) (:item j)] conj difference)))

(defn collect-item-differences
    [dict items]
    (reduce conj-item-difference dict
            (for [i items
                  j items
                  :when (not= i j)]
                [i j])))

(defn item-differences
    [user-ratings]
    (reduce collect-item-differences {} user-ratings))

#'ch7.nb/item-differences

In [27]:
(->> (load-ratings "ua.base")
     (group-by :user)
     (vals)
     (item-differences)
     (first))

[893 {558 (-2 4), 453 (-1), 637 (-1), 519 (2), 1244 (0), 1097 (3), 357 (-1 0 3), 716 (1), 275 (1 0), 530 (2), 929 (0), 789 (-2 2 3), 586 (0 0), 410 (1 -2 4), 433 (1), 765 (-1), 521 (4), 451 (0 -2), 291 (1 2), 443 (0 1 4), 798 (-1), 779 (0 0), 249 (0 3), 638 (0), 299 (0 0), 121 (1 0 2 3), 287 (-2 2 0), 65 (-1), 702 (3), 70 (0 0 3 -1), 949 (0 0), 218 (-2), 1070 (4), 812 (0 -1), 62 (1 2), 1185 (0), 774 (1 -2), 475 (0 4), 497 (1 2), 1009 (2), 891 (-2), 164 (0), 1040 (0 -1), 282 (1 1 3 1), 769 (1 0), 799 (1), 273 (0 -1 0 4), 1441 (0 0), 186 (0), 430 (2), 529 (1), 898 (3 -2 -2 -2 1), 370 (-2), 834 (-2), 233 (0 1), 298 (1 4), 188 (1 4), 240 (-1 -1), 110 (-1 0), 982 (0), 620 (1 0), 311 (1 0), 931 (0), 882 (-2 0), 1409 (1), 128 (0 -2), 399 (1), 1178 (0), 989 (-2), 377 (0 -2), 468 (1), 259 (-1 1 1 0 1), 210 (1 0), 229 (1), 153 (0 1), 621 (1), 670 (0), 977 (1 -1 -1), 343 (-2 -2 3 2), 887 (0 2 4), 472 (0 2 0), 7 (1 -1 4), 894 (-2), 59 (1 3), 1234 (-1 -2), 934 (0), 473 (0 1), 1010 (4), 86 (-1 -2 0)

In [28]:
(let [diffs (->> (load-ratings "ua.base")
                 (group-by :user)
                 (vals)
                 (item-differences))]
    (println "893:343" (get-in diffs [893 343])))

893:343 (-2 -2 3 2)


To use differences for prediction we have to summarize them into a mean and keep track of the count of ratings

In [33]:
(defn summarize-item-differences
    [related-items]
    (let [f (fn [differences]
                {:mean  (s/mean differences)
                 :count (count differences)})]
        (map-vals f related-items)))

(defn slope-one-recommender
    [ratings]
    (->> (item-differences ratings)
         (map-vals summarize-item-differences)))

#'ch7.nb/slope-one-recommender

In [34]:
(let [recommender (->> (load-ratings "ua.base")
                       (group-by :user)
                       (vals)
                       (slope-one-recommender))]
    (get-in recommender [893 343]))

{:mean 0.25, :count 4}

### Prediction

To make predictions we need the matrix of differences we calculated and the users' own previous ratings.

In [36]:
(defn candidates
    [recommender {:keys [rating item]}]
    (->> (get recommender item)
         (map (fn [[id {:keys [mean count]}]]
                  {:item id
                   :rating (+ rating mean)
                   :count count}))))

(defn weighted-rating
    [[id candidates]]
    (let [ratings-count (reduce + (map :count candidates))
          sum-rating (map #(* (:rating %) (:count %)) candidates)
          weighted-rating (/ (reduce + sum-rating) ratings-count)]
        {:item id
         :rating weighted-rating
         :count ratings-count}))

(defn slope-one-recommend
    [recommender rated top-n]
    (let [already-rated (set (map :item rated))
          already-rated? (fn [{:keys [id]}]
                             (contains? already-rated id))
          recommendations (->> (mapcat #(candidates recommender %) rated)
                               (group-by :item)
                               (map weighted-rating)
                               (remove already-rated?)
                               (sort-by :rating >))]
        (take top-n recommendations)))

#'ch7.nb/slope-one-recommend

In [37]:
(let [user-ratings (->> (load-ratings "ua.base")
                        (group-by :user)
                        (vals))
      user-1 (first user-ratings)
      recommender (->> (rest user-ratings)
                       (slope-one-recommender))
      items (load-items "u.item")
      item-name (fn [item]
                    (get items (:item item)))]
    (->> (slope-one-recommend recommender user-1 10)
         (map item-name)))

("Someone Else's America (1995)" "Aiqing wansui (1994)" "Great Day in Harlem, A (1994)" "Pather Panchali (1955)" "Boys, Les (1997)" "Saint of Fort Washington, The (1993)" "Marlene Dietrich: Shadow and Light (1996) " "Anna (1996)" "Star Kid (1997)" "Santa with Muscles (1996)")