Skip to content

Commit

Permalink
done with phase 1
Browse files Browse the repository at this point in the history
  • Loading branch information
chriskelvinlee committed Dec 7, 2011
1 parent ce917e6 commit 1e5dc44
Show file tree
Hide file tree
Showing 13 changed files with 256 additions and 98 deletions.
54 changes: 54 additions & 0 deletions determine.py
@@ -0,0 +1,54 @@
#!/usr/bin/env python
# encoding: utf-8
"""
determine.py
Created by Christopher K. Lee on 2011-12-07.
Copyright (c) 2011 __MyCompanyName__. All rights reserved.
"""
import time

def determineAnswer(results, choices, correct):
determine_start_time = time.time()
points = [0, 0, 0]
confidence = 0

# Loop through all score dict results
for r in results:
# Score set cannot be empty
if ( r != {}):

# Max and min values
highestChoice = ""
a1 = min(r.values())
z1 = max(r.values())

# Iterate through dict
for key, val in r.iteritems():
if val == z1: # If value is max
for c in choices:
if key == c: # Max key is choice
points[choices.index(c)] += 1 # Add point value
highestChoice = key
t = float(val)
if a1 != 0:
r[key] = t/a1 # Normalize

# Find normalized min val
a2 = min(r.values())
z2 = max(r.values())

# Multiplier of 1st vs 2nd
for val in r.itervalues():
if val != a2 and val != z2:
confidence += r[highestChoice]/val

determine_stop_time = time.time()
de_time = (determine_stop_time - determine_start_time)

# Return true if correct w/ confidence
answer_index = points.index(max(points))
if answer_index == correct:
return [[1, answer_index], confidence, de_time]
else:
return [[0, answer_index], confidence, de_time]
2 changes: 1 addition & 1 deletion old/starter_code.py
Expand Up @@ -478,7 +478,7 @@ def getSynonyms(keyword):
"""

compute_score(queryphrase="Whose favorite place to swim is in his money bin", answers=["Scrooge McDuck", "Richie Rich", "Ebenezer Scrooge"], output="results/query3.txt")
#compute_score(queryphrase="Whose favorite place to swim is in his money bin", answers=["Scrooge McDuck", "Richie Rich", "Ebenezer Scrooge"], output="results/query3.txt")

"""Results, with answer choices broken into fragments, with scores of 1 for fragment, 10 for whole answer, times 2 if near keywords:
['Whose', 'bin', 'favorite'] [WANT TO INCLUDE PLACE, SWIM]
Expand Down
50 changes: 37 additions & 13 deletions output.py
Expand Up @@ -7,23 +7,37 @@
Copyright (c) 2011 __MyCompanyName__. All rights reserved.
"""

def output(text, choices, correct, nltk_data, results, answer, conf,
nltk_time, ai_time, de_time, currentCount, outputSet):

f = open('query{ }.txt'.format(QUERYNUM), 'w')
url = nltk_time[0]
keyword_q = nltk_time[1]
keyword_a = nltk_time[2]
token_q = nltk_time[3]
token_url = nltk_time[4]
instances = nltk_time[5]
score1 = ai_time[0]
score2 = ai_time[1]
score3 = ai_time[2]
score4 = ai_time[3]
decide = de_time
nltk = keyword_q+keyword_a+token_q+token_url
ai = url+instances+score1+score2+score3+score4+decide
total = nltk+ai



# write to file
f = open(output, 'w')
print >>f, queryphrase
print >>f, keywords
print >>f, weightedquestionkeywords
print >>f, weightedanswerkeywords
print >>f, score_result1
print >>f, score_result2
print >>f, score_result3
print >>f, score_result4
f = open('results/query{}.txt'.format(currentCount), 'w')
print >>f, text
print >>f, choices
print >>f, correct
print >>f, "Answer:\t\t%d" % answer[1]
print >>f, "Confidence:\t%d" % conf
print >>f, '\n#####'
print >>f, "Total Time:\t\t%f" % total
print >>f, "NLTK Time:\t\t%f" % (keyword_q+keyword_a+token_q+token_url)
print >>f, "AI Time:\t\t%f" % (url+instances+score1+score2+score3+score4)
print >>f, "NLTK Time:\t\t%f" % nltk
print >>f, "AI Time:\t\t%f" % ai
print >>f, "URL Time:\t\t%f" % url
print >>f, "Keyword (Q) Time:\t%f" % keyword_q
print >>f, "Keyword (A) Time:\t%f" % keyword_a
Expand All @@ -34,5 +48,15 @@
print >>f, "Score2 Time:\t\t%f" % score2
print >>f, "Score3 Time:\t\t%f" % score3
print >>f, "Score4 Time:\t\t%f" % score4
print >>f, '#####'
print >>f, '#####\n'
print >>f, "Keywords (Q):\t\t%s" % str(nltk_data[0])
print >>f, "Keywords (Qw):\t\t%s" % str(nltk_data[1])
print >>f, "Keywords (Aw):\t\t%s" % str(nltk_data[2])
print >>f, "Tokens (Query):\t%s" % str(nltk_data[3])
print >>f, "Tokens (Combo):\t%s" % str(nltk_data[4])
print >>f, "score_result1:\t%s" % str(results[0])
print >>f, "score_result2:\t%s" % str(results[1])
print >>f, "score_result3:\t%s" % str(results[2])
print >>f, "score_result4:\t%s" % str(results[3])

f.close()
14 changes: 14 additions & 0 deletions questions.py
Expand Up @@ -5,3 +5,17 @@
["What is each member of a winning Super Bowl team given", ["a bronzed jersey", "a green jacket", "a ring"], 2],
["How much time does it take for the space shuttle to get into space", ["about 10 minutes", "4 hours", "2 days"], 0],
]


"""debug code for interpreter
queryphrase="What is each member of a winning Super Bowl team given"
answers=["a bronzed jersey", "a green jacket", "a ring"]
urls = getGoogleLinks(queryphrase, 1)
keywords = getSimpleQuestionKeywords(queryphrase)
weightedquestionkeywords = getWeightedQuestionKeywords(queryphrase)
weightedanswerkeywords = getAnswerKeywords(answers)
querytokens = nltk.word_tokenize(queryphrase)
combinedtokens = getTokens(urls)
instances = getInstances(keywords, combinedtokens)
"""
23 changes: 23 additions & 0 deletions results/prequery1.txt
@@ -0,0 +1,23 @@
Which of these describes the tail of a healthy platypus
['platypus', 'tail', 'describes', 'healthy']
{'healthy': (66, 'JJ'), 'tail': (24, 'NN'), 'describes': (26, 'NNS'), 'platypus': (0, 'NN')}
{'short': (619, 'JJ'), 'long': (1285, 'RB'), 'fat': (80, 'JJ'), 'strong': (569, 'JJ'), 'squishy': (0, 'JJ'), 'pinkish': (0, 'NN')}
{}
{'fat and strong': 31, 'short and pinkish': 21, 'long and squishy': 61}
{'fat and strong': 87436, 'short and pinkish': 43, 'long and squishy': 41256}
{'fat and strong': 35894, 'short and pinkish': 0, 'long and squishy': 0}
#####
Total Time: 94.278013
NLTK Time: 92.599376
AI Time: 1.678637
URL Time: 0.658323
Keyword (Q) Time: 26.682972
Keyword (A) Time: 21.475645
Tokens (Q) Time: 0.000251
Tokens (URL) Time: 44.440508
Map Instance Time: 0.023398
Score1 Time: 0.128347
Score2 Time: 0.101162
Score3 Time: 0.341729
Score4 Time: 0.425678
#####
24 changes: 24 additions & 0 deletions results/prequery2.txt
@@ -0,0 +1,24 @@
Which sport do players use a stick to cradle the ball
['sport', 'players', 'stick', 'ball']
{'players': (33, 'NNS'), 'sport': (20, 'NN'), 'stick': (59, 'NN'), 'ball': (109, 'NN')}
{'lacrosse': (0, 'NN'), 'field': (432, 'NN'), 'hockey': (1, 'NN'), 'ice': (56, 'NN')}
{'lacrosse': 156, 'field hockey': 19}
{'lacrosse': 156, 'ice hockey': 24, 'field hockey': 114}
{'lacrosse': 276910, 'ice hockey': 5484, 'field hockey': 71631}
{'lacrosse': 105350, 'ice hockey': 184708, 'field hockey': 210210}

#####
Total Time: 71.615634
NLTK Time: 70.140234
AI Time: 1.475400
URL Time: 0.645202
Keyword (Q) Time: 29.591298
Keyword (A) Time: 26.574528
Tokens (Q) Time: 0.000405
Tokens (URL) Time: 13.974003
Map Instance Time: 0.007040
Score1 Time: 0.032136
Score2 Time: 0.019258
Score3 Time: 0.151532
Score4 Time: 0.620232
#####
File renamed without changes.
51 changes: 30 additions & 21 deletions results/query1.txt

Large diffs are not rendered by default.

50 changes: 29 additions & 21 deletions results/query2.txt

Large diffs are not rendered by default.

19 changes: 9 additions & 10 deletions scoring.py
Expand Up @@ -7,19 +7,18 @@



keywords, weightedquestionkeywords, weightedanswerkeywords, querytokens, combinedtokens, instances

def score(answers, nd, scoringFunction):
def score(choices, nd, scoringFunction):
answers = choices
keywords = nd[0]
combinedtokens = nd[4]
instances = nd[5]
weightedquestionkeywords = nd[2]
weightedanswerkeywords = nd[1]
scoringFunction(answers, keywords, combinedtokens, instances, weightedquestionkeywords, weightedanswerkeywords)
weightedquestionkeywords = nd[1]
weightedanswerkeywords = nd[2]
return scoringFunction(answers, keywords, combinedtokens, instances, weightedquestionkeywords, weightedanswerkeywords)


# Use all scores
def useAllWeights(answers, keywords, combinedtokens, instances, weightedquestionkeywords, weightedanswerkeywords):
def useAllScores(answers, keywords, combinedtokens, instances, weightedquestionkeywords, weightedanswerkeywords):
time1_start_time = time.time()
weights1 = getSimpleAnswerPhraseScores(answers, keywords, combinedtokens, instances, weightedquestionkeywords, weightedanswerkeywords)
time1_stop_time = time.time()
Expand All @@ -29,7 +28,7 @@ def useAllWeights(answers, keywords, combinedtokens, instances, weightedquestion
time2_stop_time = time.time()

time3_start_time = time.time()
weights3 = getWeightedQuestionKeywordScores(answers, keywords, combinedtokens, instances, weightedquestionkeywords, weightedanswerkeywords)
weights3 = getWeightedQuestionKeywordScores(answers, keywords, combinedtokens, instances, weightedquestionkeywords, weightedanswerkeywords)
time3_stop_time = time.time()

time4_start_time = time.time()
Expand Down Expand Up @@ -108,7 +107,7 @@ def getSimpleAnswerKeywordScores(answers, keywords, combinedtokens, instances, w
#########

def getWeightedQuestionKeywordScores(answers, keywords, combinedtokens, instances, weightedquestionkeywords, weightedanswerkeywords):
rangevalue = 50
rangevalue = getRangeValue()
scores = {}
tokenrange = findrange(len(combinedtokens))
for answer in answers:
Expand Down Expand Up @@ -147,7 +146,7 @@ def getWeightedQuestionKeywordScores(answers, keywords, combinedtokens, instance
return scores

#########
## Score 3 ##
## Score 4 ##
# use function below to score, using question keywords and answer keywords
#
#########
Expand Down
55 changes: 28 additions & 27 deletions test.py
Expand Up @@ -2,52 +2,53 @@
from trivialpursuitfunctions import *
from scoring import *
from output import *
from decide import *
from determine import *

#What other options should test take in?
def test( questions = tp_Questions, scoringFunction = useAllWeights ):
numberCorrect = 0;
def runQuery( questions = tp_Questions, scoringFunction = useAllScores, outputSet = 1 ):
numberCorrect = 0
currentCount = 0
for question in questions:
currentCount += 1
print "*****"
print "Processing Query %d of %d " % (currentCount, len(questions))

# Read in question, choices, and correct answer
text = question[0]
choices = question[1]
correct = question[2]

print "Processing NLTK..."
# Parse urls, questions, answers and generate keywords
raw = NLTK_parse( queryphrase=text, answers=choices )
raw = NLTK_parse(queryphrase=text, answers=choices)
nltk_data = raw[0] # array of size 6
nltk_time = raw[1] # array of size 6


print "AI & Scoring..."
# Get answer weight with scoring function(s)
weights = score(answers, nltk_data, scoringFunction)
candidate = weights[0] # array of size 4
weights = score(choices, nltk_data, scoringFunction)
results = weights[0] # array of size 4
ai_time = weights[1] # array of size 4

print "Determining Answer..."
# Normalize to determine answer
correctness = determineAnswer(results, choices, correct)
answer = correctness[0]
conf = correctness[1]
de_time = correctness[2]

# Save the results sys.out txt
output(text, choices, correct, nltk_data, results, answer, conf,
nltk_time, ai_time, de_time, currentCount, outputSet)
print "*****"

# Determine correct results
# Print results live
bld = text + ": "
if getHighestResult(result, choices) == correct:
if answer[0] == 1:
numberCorrect += 1
bld += "Correct"
else:
bld += "Incorrect"
print bld

# Output results

print str(numberCorrect) + "/" + str(len(questions))


def getHighestResult(result, choices):
highestConfidence = 0
highestChoice = ""
for r in result:
if result[r] > highestConfidence:
highestChoice = r
highestConfidence = result[r]

index = 0
for c in choices:
if c == highestChoice:
break
index += 1
return index
6 changes: 3 additions & 3 deletions trivialpursuitfunctions.py
Expand Up @@ -108,7 +108,7 @@ def NLTK_parse(queryphrase="", answers=[], urls=[], scoringFunction = getSimpleA
# Get urls
url_start_time = time.time()
if urls == []:
urls = getGoogleLinks(queryphrase, 3) # may want to change this number
urls = getGoogleLinks(queryphrase, 1) # may want to change this number
url_stop_time = time.time()

# Get question keywords
Expand Down Expand Up @@ -147,10 +147,10 @@ def NLTK_parse(queryphrase="", answers=[], urls=[], scoringFunction = getSimpleA
keyword_a = (keyword_a_stop_time - keyword_a_start_time)
token_q = (tokens_q_stop_time - tokens_q_start_time)
token_url = (tokens_url_stop_time - tokens_url_start_time)
instances = (instances_stop_time - instances_start_time)
instances_t = (instances_stop_time - instances_start_time)

NLTK = [keywords, weightedquestionkeywords, weightedanswerkeywords, querytokens, combinedtokens, instances]
TIME = [url, keyword_q, keyword_a, token_q, token_url, instances]
TIME = [url, keyword_q, keyword_a, token_q, token_url, instances_t]

return [NLTK, TIME]

Expand Down
6 changes: 4 additions & 2 deletions weights.py
Expand Up @@ -18,7 +18,6 @@ def calculateAnswerKeywordWeight(answertoken, weightedanswerkeywords):
def calculateDistanceWeight(distance):
return (50 / (distance + 1)) # optimize


def calculateInstanceScore(answertoken, keywords, distances, weightedquestionkeywords, weightedanswerkeywords, full):
newscore = 0
for keyword in keywords:
Expand All @@ -28,4 +27,7 @@ def calculateInstanceScore(answertoken, keywords, distances, weightedquestionkey
newscore += calculateQuestionKeywordWeight(keyword, weightedquestionkeywords) * 10 * calculateDistanceWeight(distance) # optimize
else:
newscore += calculateQuestionKeywordWeight(keyword, weightedquestionkeywords) * calculateAnswerKeywordWeight(answertoken, weightedanswerkeywords) * calculateDistanceWeight(distance) # optimize
return newscore
return newscore

def getRangeValue():
return 50

0 comments on commit 1e5dc44

Please sign in to comment.