-
Notifications
You must be signed in to change notification settings - Fork 0
/
Possibilities.py
50 lines (38 loc) · 1.35 KB
/
Possibilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
lambda1 = 0.10
lambda2 = 0.15
lambda3 = 0.75
epsilon = 0.1
def P_unigram(word, M, dictData):
if word in dictData:
return dictData[word] / M
return 0.01
def P_bigram(secondWord, firstWord, dictData2D, dictData):
countFirstWord = 0
if firstWord in dictData:
countFirstWord = dictData[firstWord]
countPair = 0
if firstWord in dictData2D:
if secondWord in dictData2D[firstWord]:
countPair = dictData2D[firstWord][secondWord]
if countPair == 0:
return 0
return countPair / countFirstWord
def P_bigramNormal(secondWord, firstWord, dictData2D, dictData, M):
# lambda1 = 0.10
# lambda2 = 0.15
# lambda3 = 0.75
# epsilon = 0.1
return lambda3 * P_bigram(secondWord, firstWord, dictData2D, dictData) + \
lambda2 * P_unigram(secondWord, M, dictData) + lambda1 * epsilon
def P_WL(comment, dictData, dictData2D, M):
words = comment.split(" ")
possibility = P_unigram(words[0], M, dictData)
for i in range(1, len(words)):
possibility *= P_bigramNormal(words[i], words[i - 1], dictData2D, dictData, M)
return possibility
def P_WL_Uni(comment, dictData, M):
words = comment.split(" ")
possibility = P_unigram(words[0], M, dictData)
for i in range(0, len(words)):
possibility *= P_unigram(words[i], M, dictData)
return possibility