-
-
Notifications
You must be signed in to change notification settings - Fork 210
/
fuzzy.clj
81 lines (68 loc) · 2.89 KB
/
fuzzy.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
;; Copyright © 2013-2015 Guillaume Plique
;; Permission is hereby granted, free of charge, to any person
;; obtaining a copy of this software and associated documentation
;; files (the "Software"), to deal in the Software without
;; restriction, including without limitation the rights to use, copy,
;; modify, merge, publish, distribute, sublicense, and/or sell copies
;; of the Software, and to permit persons to whom the Software is
;; furnished to do so, subject to the following conditions:
;; The above copyright notice and this permission notice shall be
;; included in all copies or substantial portions of the Software.
;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
;; BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
;; ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
;; CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
;; SOFTWARE.
;; -------------------------------------------------------------------
;; clj-fuzzy Levenshtein
;; -------------------------------------------------------------------
;;
;;
;; Author: PLIQUE Guillaume (Yomguithereal)
;; Version: 0.1
;; Source: https://gist.github.com/vishnuvyas/958488
;;
(ns leiningen.figwheel.fuzzy)
(defn- next-row
[previous current other-seq]
(reduce
(fn [row [diagonal above other]]
(let [update-val (if (= other current)
diagonal
(inc (min diagonal above (peek row))))]
(conj row update-val)))
[(inc (first previous))]
(map vector previous (next previous) other-seq)))
(defn levenshtein
"Compute the levenshtein distance between two [sequences]."
[sequence1 sequence2]
(peek
(reduce (fn [previous current] (next-row previous current sequence2))
(map #(identity %2) (cons nil sequence2) (range))
sequence1)))
(defn similar-key* [thresh ky ky2]
(and ((some-fn keyword? string? symbol?) ky)
((some-fn keyword? string? symbol?) ky2)
(let [dist (levenshtein (name ky) (name ky2))]
(when (<= dist thresh)
dist))))
(def similar-key (partial similar-key* 3))
#_(similar-key :hey :heeey)
(defn get-keylike [ky mp]
(when (map? mp)
(if-let [val (get mp ky)]
[ky val]
(when-let [res (not-empty
(sort-by
first
(keep (fn [[k v]]
(when-let [dist (and (map? v) (similar-key k ky))]
[dist [k v]])) mp)))]
(-> res first second)))))
(defn fuzzy-select-keys [m kys]
(into {} (map #(get-keylike % m) kys)))
(defn fuzzy-select-keys-and-fix [m kys]
(into {} (map #(let [[_ v] (get-keylike % m)] [% v]) kys)))