Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

best-Match

  • Loading branch information...
commit eec42cbb901233a475104e44840d7a7ef387ab15 1 parent 5657ef0
@bwhitman bwhitman authored
Showing with 14 additions and 9 deletions.
  1. +2 −4 API/fp.py
  2. +12 −5 util/little_eval.py
View
6 API/fp.py
@@ -57,7 +57,7 @@ def match(self):
def inflate_code_string(s):
""" Takes an uncompressed code string consisting of 0-padded fixed-width
sorted hex and converts it to the standard code string."""
- n = int(len(s) / 9.0) # 4 bytes for hash, 5 bytes for time
+ n = int(len(s) / 13.0) # 8 bytes for hash, 5 bytes for time
def pairs(l, n=2):
"""Non-overlapping [1,2,3,4] -> [(1,2), (3,4)]"""
@@ -73,7 +73,7 @@ def pairs(l, n=2):
# Parse out n groups of 5 timestamps in hex; then n groups of 8 hash codes in hex.
end_timestamps = n*5
times = [int(''.join(t), 16) for t in pairs(s[:end_timestamps], 5)]
- codes = [int(''.join(t), 16) for t in pairs(s[end_timestamps:], 4)]
+ codes = [int(''.join(t), 16) for t in pairs(s[end_timestamps:], 8)]
assert(len(times) == len(codes)) # these should match up!
return ' '.join('%d %d' % (c, t) for c,t in zip(codes, times))
@@ -118,8 +118,6 @@ def best_match_for_query(code_string, elbow=8, local=False):
response = query_fp(code_string, rows=10, local=local, get_data=True)
logger.debug("solr qtime is %d" % (response.header["QTime"]))
- _debug_print_response(code_string, response, elbow=elbow)
-
if len(response.results) == 0:
return Response(Response.NO_RESULTS, qtime=response.header["QTime"], tic=tic)
View
17 util/little_eval.py
@@ -66,15 +66,16 @@ def get_winners(query_code_string, response, elbow=8):
def main():
- if not len(sys.argv)==3:
- print "usage: python little_eval.py [database_list | disk] query_list"
+ if not len(sys.argv)==4:
+ print "usage: python little_eval.py [database_list | disk] query_list [limit]"
sys.exit()
fp_codes = {}
+ limit = int(sys.argv[3])
if sys.argv[1] == "disk":
fp.local_load("disk.pkl")
else:
- database_list = open(sys.argv[1]).read().split("\n")
+ database_list = open(sys.argv[1]).read().split("\n")[0:limit]
for line in database_list:
(track_id, file) = line.split(" ### ")
print track_id
@@ -89,7 +90,8 @@ def main():
counter = 0
actual_win = 0
original_win = 0
- query_list = open(sys.argv[2]).read().split("\n")
+ bm_win = 0
+ query_list = open(sys.argv[2]).read().split("\n")[0:limit]
for line in query_list:
(track_id, file) = line.split(" ### ")
print track_id
@@ -98,11 +100,16 @@ def main():
counter+=1
response = fp.query_fp(fp.decode_code_string(j[0]["code"]), rows=20, local=True, get_data=True)
(winner_actual, winner_original) = get_winners(fp.decode_code_string(j[0]["code"]), response, elbow=8)
+ response = fp.best_match_for_query(j[0]["code"], local=True)
+ if(response.TRID == track_id):
+ bm_win+=1
if(winner_actual == track_id):
actual_win+=1
if(winner_original == track_id):
original_win+=1
- print "%d / %d actual (%2.2f%%) %d / %d original (%2.2f%%)" % (actual_win, counter, (float(actual_win)/float(counter))*100.0, original_win, counter, (float(original_win)/float(counter))*100.0)
+ print "%d / %d actual (%2.2f%%) %d / %d original (%2.2f%%) %d / %d bm (%2.2f%%)" % (actual_win, counter, (float(actual_win)/float(counter))*100.0, \
+ original_win, counter, (float(original_win)/float(counter))*100.0, \
+ bm_win, counter, (float(bm_win)/float(counter))*100.0)
if __name__ == '__main__':
main()
Please sign in to comment.
Something went wrong with that request. Please try again.