Permalink
Browse files

Store codes in Tokyo Tyrant

* Update solr config to not store codes
* Make ingest put codes into TT
* Make best_match_for_query get codes from TT to calculate scores
  • Loading branch information...
1 parent aafe9fa commit 4db306884a6304bd6d76bfe36eec06e5716f6e44 @alastair alastair committed Jun 13, 2011
Showing with 592 additions and 18 deletions.
  1. +31 −7 API/fp.py
  2. +549 −0 API/pytyrant.py
  3. +10 −10 README.md
  4. +2 −1 solr/solr/solr/conf/schema.xml
View
@@ -12,6 +12,7 @@
import pickle
from collections import defaultdict
import zlib, base64, re, time, random, string, math
+import pytyrant
try:
import json
@@ -21,7 +22,8 @@
_fp_solr = solr.SolrConnectionPool("http://localhost:8502/solr/fp")
_hexpoch = int(time.time() * 1000)
logger = logging.getLogger(__name__)
-
+_tyrant_address = ['localhost', 1978]
+_tyrant = None
class Response(object):
# Response codes
@@ -133,10 +135,15 @@ def best_match_for_query(code_string, elbow=10, local=False):
original_scores = {}
actual_scores = {}
+ trackids = [r["track_id"].encode("utf8") for r in response.results]
+ tcodes = get_tyrant().multi_get(trackids)
+
# For each result compute the "actual score" (based on the histogram matching)
- for r in response.results:
- original_scores[r["track_id"]] = int(r["score"])
- actual_scores[r["track_id"]] = actual_matches(code_string, r["fp"], elbow = elbow)
+ for (i, r) in enumerate(response.results):
+ track_id = r["track_id"]
+ original_scores[track_id] = int(r["score"])
+ track_code = tcodes[i]
+ actual_scores[track_id] = actual_matches(code_string, track_code, elbow = elbow)
#logger.debug("Actual score for %s is %d (code_len %d), original was %d" % (r["track_id"], actual_scores[r["track_id"]], code_len, top_match_score))
# Sort the actual scores
@@ -210,6 +217,12 @@ def actual_matches(code_string_query, code_string_match, slop = 2, elbow = 10):
return actual_match_list[0][1]
return 0
+def get_tyrant():
+ global _tyrant
+ if _tyrant is None:
+ _tyrant = pytyrant.PyTyrant.open(*_tyrant_address)
+ return _tyrant
+
"""
fp can query the live production flat or the alt flat, or it can query and ingest in memory.
the following few functions are to support local query and ingest that ape the response of the live server
@@ -321,17 +334,24 @@ def delete(track_ids, do_commit=True, local=False):
# delete a code from FP flat
if local:
- print "not implemented yet"
- return
+ return local_delete(track_ids)
+
with solr.pooled_connection(_fp_solr) as host:
host.delete_many(track_ids)
+
+ try:
+ get_tyrant().multi_del(track_ids)
+ except KeyError:
+ pass
+
if do_commit:
commit()
def ingest(fingerprint_list, do_commit=True, local=False):
""" Ingest some fingerprints into the fingerprint database.
- The fingerprints should be of the form {"track_id": id, "fp": fp, "artist": artist, "release": release, "track": track, "length": length}
+ The fingerprints should be of the form
+ {"track_id": id, "fp": fp, "artist": artist, "release": release, "track": track, "length": length, "version": "codever"}
or a list of the same.
artist, release and track are not required but highly recommended.
length is the length of the track being ingested in milliseconds
@@ -344,12 +364,16 @@ def ingest(fingerprint_list, do_commit=True, local=False):
return local_ingest(fingerprint_list)
docs = []
+ codes = []
for fprint in fingerprint_list:
docs.append(fprint)
+ codes.append((fprint["track_id"], fprint["fp"]))
with solr.pooled_connection(_fp_solr) as host:
host.add_many(docs)
+ get_tyrant().multi_set(codes)
+
if do_commit:
commit()
Oops, something went wrong.

0 comments on commit 4db3068

Please sign in to comment.