Skip to content

Commit

Permalink
Implement user_words algorithm, that assigns lambdas based on the com…
Browse files Browse the repository at this point in the history
…monly used words of each user. Gives a positive score so far, and a success of about 50%. Testing this now with run_algorithms.
  • Loading branch information
amrav committed Jan 26, 2012
1 parent 7e88d61 commit d47cb8f
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 3 deletions.
87 changes: 87 additions & 0 deletions algorithms/user_words.py
@@ -0,0 +1,87 @@
from __future__ import division
import re
import math

class user:
def __init__(self):
self.words = {}

params = []
ranges = []
steps = []

params += [10] #match_threshold
ranges += [(5,15)]
steps += [2]

params += [80] #lambda_threshold
ranges += [(20,99)]
steps += [5]

def build_user_words(statements):
users = {}
for stat in statements:
if stat.issued_by != '$$$':
if stat.issued_by not in users:
users[stat.issued_by] = user()
matches = re.findall(r"\W?(\w+)\W?", stat.text_str)
for match in matches:
if len(match)>3:
##print users[stat.issued_by]
if match not in users[stat.issued_by].words:
users[stat.issued_by].words[match] = 0
users[stat.issued_by].words[match] += 1
return users

def run(statements):

match_threshold = params[0]
lambda_threshold = params[1]/100

users = build_user_words(statements)

''' for luser in users:
maxes = sorted(users[luser].words, key = lambda x: users[luser].words[x], reverse=True)[:5]
print luser, ':',
for max in maxes:
print max, users[luser].words[max], ';' ,
print'''

userscore = {}
for stat in statements:
## stat.print_details()
matches = re.findall(r"\W?(\w+)\W?", stat.text_str)
if stat.issued_by == '$$$':
userscore = {}
for luser in users:
for match in matches:
if len(match) > 3 and match in users[luser].words and users[luser].words[match] > match_threshold:
if luser not in userscore:
userscore[luser] = 0
userscore[luser] += users[luser].words[match]

if len(userscore) != 0:
avg = sum([math.log(x) for x in userscore.values()])
avg /= len(userscore)
if avg != 0:
for luser in userscore:
sc = math.log(userscore[luser])/avg
if sc != 0:
stat.alg_lambda[luser] = sc
else:
stat.alg_lambda[luser] = {}
else:
stat.alg_lambda = {}

maxes = sorted(stat.alg_lambda.values(), reverse = True)
if len(maxes) > 2:
if maxes[1]/maxes[0] > lambda_threshold:
stat.alg_lambda = {}








7 changes: 4 additions & 3 deletions settings.py
Expand Up @@ -4,13 +4,14 @@
import line_context
import bracket
import addressal
import user_words

#for test_algorithm.py
prev_display_scope = 4
next_display_scope = 4

alg_list = [line_context, bracket, addressal]
test_alg = addressal
opt_alg = addressal
opt_runs = 3
test_alg = user_words
opt_alg = user_words
opt_runs = 5

0 comments on commit d47cb8f

Please sign in to comment.