Add session_var setting to allow multiple caches in parallel

deanishe · Jan 13, 2018 · f991800 · f991800
1 parent 79436b2
commit f991800
Show file tree

Hide file tree

Showing 4 changed files with 59 additions and 13 deletions.
diff --git a/Fuzzy-Demo-0.2.alfredworkflow b/Fuzzy-Demo-0.2.alfredworkflow
diff --git a/README.md b/README.md
@@ -6,6 +6,22 @@ Fuzzy search for Alfred 3
 
 ![](./demo.gif "")
 
+<!-- MarkdownTOC autolink="true" bracket="round" depth="3" autoanchor="true" -->
+
+- [How it works](#how-it-works)
+- [Example usage](#example-usage)
+- [Demo](#demo)
+- [Caveats](#caveats)
+    - [Performance](#performance)
+    - [Utility](#utility)
+- [Technical details](#technical-details)
+- [Customisation](#customisation)
+    - [Multiple Script Filters](#multiple-script-filters)
+- [Thanks](#thanks)
+
+<!-- /MarkdownTOC -->
+
+<a name="how-it-works"></a>
 How it works
 ------------
 
@@ -14,6 +30,7 @@ Instead of calling your script directly, you call it via `fuzzy.py`, which cache
 The query is compared to each item's `matches` field if it's present, and against the item's `title` field if not.
 
 
+<a name="example-usage"></a>
 Example usage
 -------------
 
@@ -42,32 +59,37 @@ export query="$1"
 **Note**: Don't forget to turn off "Alfred filters results"!
 
 
+<a name="demo"></a>
 Demo
 ----
 
 Grab the [Fuzzy-Demo.alfredworkflow][demo] file from this repo to try out the search and view an example implementation.
 
 
+<a name="caveats"></a>
 Caveats
 -------
 
 Fuzzy search, and this implementation in particular, are by no means the "search algorithm to end all algorithms".
 
 
+<a name="performance"></a>
 ### Performance ###
 
 By dint of being written in Python and using a more complex algorithm, `fuzzy.py` can only comfortably handle a small fraction of the number of results that Alfred's native search can. On my 2012 MBA, it becomes noticeably, but not annoyingly, sluggist at about ~2500 items.
 
 If the script is well-received, I'll reimplement it in a compiled language. My [Go library for Alfred workflows][awgo] uses the same algorithm, and can comfortably handle 20K+ items.
 
 
+<a name="utility"></a>
 ### Utility ###
 
 Fuzzy search is awesome for some datasets, but fairly sucks for others. It can work very, very well when you only want to search one field, such as name/title or filename/filepath, but it tends to provide sub-optimal results when searching across multiple fields, especially keywords/tags.
 
 In such cases, you'll usually get better results from a word-based search.
 
 
+<a name="technical-details"></a>
 Technical details
 -----------------
 
@@ -76,6 +98,7 @@ The fuzzy algorithm is taken from [this gist][pyversion] by [@menzenski][menzens
 The only addition is smarter handling of non-ASCII. If the user's query contains only ASCII, the search is diacritic-insensitive. If the query contains non-ASCII, the search considers diacritics.
 
 
+<a name="customisation"></a>
 Customisation
 -------------
 
@@ -94,6 +117,23 @@ Export different values for the following environment variables before calling `
 | `separators`        | `_-.([/ ` | Characters to consider separators (for the purposes of assigning `sep_bonus`)                                              |
 
 
+<a name="multiple-script-filters"></a>
+### Multiple Script Filters ###
+
+If you're using multiple Script Filters chained together that use different datasets, you'll need to set the `session_var` environment variable to ensure each one uses a separate cache:
+
+```bash
+# Script Filter 1
+export query="$1"
+./fuzzy /usr/bin/python myscript.py
+
+# Script Filter 2 (downstream of 1)
+export query="$1"
+export session_var="fuzzy_filter2"
+./fuzzy /usr/bin/python myotherscript.py
+```
+
+<a name="thanks"></a>
 Thanks
 ------
 

diff --git a/demo/fuzzy.py b/demo/fuzzy.py
@@ -42,7 +42,7 @@
 from unicodedata import normalize
 
 # Name of workflow variable storing session ID
-SID = 'fuzzy_session_id'
+SID = os.getenv('session_var') or 'fuzzy_session_id'
 
 # Workflow's cache directory
 CACHEDIR = os.getenv('alfred_workflow_cache')
@@ -59,6 +59,8 @@
 sep_bonus = int(os.getenv('sep_bonus') or '10')
 # Penalty for each unmatched character
 unmatched_penalty = int(os.getenv('unmatched_penalty') or '-1')
+# Characters considered word separators
+separators = os.getenv('separators') or '_-.([/ '
 
 
 def log(s, *args):
@@ -100,20 +102,23 @@ class Fuzzy(object):
         lead_penalty (int): Penalty for each character before first match
         max_lead_penalty (int): Max total ``lead_penalty``
         sep_bonus (int): Bonus if after a separator
+        separators (str): Characters to consider separators
         unmatched_penalty (int): Penalty for each unmatched character
 
     """
 
     def __init__(self, adj_bonus=adj_bonus, sep_bonus=sep_bonus,
                  camel_bonus=camel_bonus, lead_penalty=lead_penalty,
                  max_lead_penalty=max_lead_penalty,
-                 unmatched_penalty=unmatched_penalty):
+                 unmatched_penalty=unmatched_penalty,
+                 separators=separators):
         self.adj_bonus = adj_bonus
         self.sep_bonus = sep_bonus
         self.camel_bonus = camel_bonus
         self.lead_penalty = lead_penalty
         self.max_lead_penalty = max_lead_penalty
         self.unmatched_penalty = unmatched_penalty
+        self.separators = separators
         self._cache = {}
 
     def filter_feedback(self, fb, query):
@@ -177,17 +182,18 @@ def match(self, query, terms):
         lead_penalty = self.lead_penalty
         max_lead_penalty = self.max_lead_penalty
         unmatched_penalty = self.unmatched_penalty
+        separators = self.separators
 
-        score, q_idx, s_idx, q_len, s_len = 0, 0, 0, len(query), len(terms)
+        score, q_idx, t_idx, q_len, t_len = 0, 0, 0, len(query), len(terms)
         prev_match, prev_lower = False, False
         prev_sep = True  # so that matching first letter gets sep_bonus
         best_letter, best_lower, best_letter_idx = None, None, None
         best_letter_score = 0
         matched_indices = []
 
-        while s_idx != s_len:
+        while t_idx != t_len:
             p_char = query[q_idx] if (q_idx != q_len) else None
-            s_char = terms[s_idx]
+            s_char = terms[t_idx]
             p_lower = p_char.lower() if p_char else None
             s_lower, s_upper = s_char.lower(), s_char.upper()
 
@@ -209,7 +215,7 @@ def match(self, query, terms):
                 # apply penalty for each letter before the first match
                 # using max because penalties are negative (so max = smallest)
                 if q_idx == 0:
-                    score += max(s_idx * lead_penalty, max_lead_penalty)
+                    score += max(t_idx * lead_penalty, max_lead_penalty)
 
                 # apply bonus for consecutive matches
                 if prev_match:
@@ -223,7 +229,7 @@ def match(self, query, terms):
                 if prev_lower and s_char == s_upper and s_lower != s_upper:
                     new_score += camel_bonus
 
-                # update query index iff the next query letter was matched
+                # update query index if the next query letter was matched
                 if next_match:
                     q_idx += 1
 
@@ -234,7 +240,7 @@ def match(self, query, terms):
                         score += unmatched_penalty
                     best_letter = s_char
                     best_lower = best_letter.lower()
-                    best_letter_idx = s_idx
+                    best_letter_idx = t_idx
                     best_letter_score = new_score
 
                 prev_match = True
@@ -244,9 +250,9 @@ def match(self, query, terms):
                 prev_match = False
 
             prev_lower = s_char == s_lower and s_lower != s_upper
-            prev_sep = s_char in '_ '
+            prev_sep = s_char in separators
 
-            s_idx += 1
+            t_idx += 1
 
         if best_letter:
             score += best_letter_score

diff --git a/fuzzy.py b/fuzzy.py
@@ -42,7 +42,7 @@
 from unicodedata import normalize
 
 # Name of workflow variable storing session ID
-SID = 'fuzzy_session_id'
+SID = os.getenv('session_var') or 'fuzzy_session_id'
 
 # Workflow's cache directory
 CACHEDIR = os.getenv('alfred_workflow_cache')
@@ -95,7 +95,7 @@ def decode(s):
 
 class Fuzzy(object):
     """Fuzzy comparison of strings.
-    
+
     Attributes:
         adj_bonus (int): Bonus for adjacent matches
         camel_bonus (int): Bonus if match is uppercase
@@ -104,7 +104,7 @@ class Fuzzy(object):
         sep_bonus (int): Bonus if after a separator
         separators (str): Characters to consider separators
         unmatched_penalty (int): Penalty for each unmatched character
-    
+
     """
 
     def __init__(self, adj_bonus=adj_bonus, sep_bonus=sep_bonus,