-
Notifications
You must be signed in to change notification settings - Fork 0
/
iragent.py
162 lines (132 loc) · 5.14 KB
/
iragent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import json
import os
from pprint import pprint
#import pdb
#pdb.set_trace()
from collections import Counter
from konlpy.tag import Mecab
import random
import shutil
from wordcloud import WordCloud
from .mchatcommon import makeJSON
from .tfidf_scikit import IRSystem
from .rel_word import RelWordsSystem
class IRAgent:
def __init__(self, name, doc_path, title_path, w2v_path, d2v_path):
self.name = name
projectdir = os.path.dirname(os.path.dirname(__file__))
self.font_path = projectdir + '/install/pytagcloud/NANUMGOTHIC.TTF'
self.irsys = IRSystem()
self.irsys.read_data(projectdir + doc_path)
self.irsys.read_title(projectdir + title_path)
self.irsys.tfidf()
self.rwsys = RelWordsSystem()
self.rwsys.read_title(projectdir + title_path)
#self.rwsys.read_data(projectdir + doc_path+'/stemmed/')
self.rwsys.word2vec_model_load(projectdir + w2v_path)
self.rwsys.doc2vec_model_load(projectdir + d2v_path)
self.wordclouddir = projectdir + "/mobis_chatbot/static/mobis_chatbot/img/"
self.requestAgent = ''
self.responseAgent = self.name
self.result = self.name + '_RESULT'
return
def contextMgr(self, action):
''' context를 수정할 부분 '''
if action == 'init':
keylist = self.variables.keys()
for key in list(self.variables):
# 이전에 이 에이전트에서 추가했다면 clear
if key.find(self.name) == 0:
del(self.variables[key])
# agent 결과 구조 초기화
self.variables[self.result] = {}
return
def interface(self, request):
self.variables = json.loads(request)
self.contextMgr(action='init')
self.requestAgent = self.variables['REQUEST_AGENT']
self.processing(self.requestAgent)
self.contextMgr(action='update')
return makeJSON(self.name, self.requestAgent, self.name, self.variables)
def processing(self, sender):
intent = self.variables[sender + '_RESULT']['INTENT']
if intent == 'request_search':
self.processing_request_search(sender)
elif intent == 'find_similar_document':
self.processing_find_similar_document(sender)
elif intent == 'recommend_document':
self.processing_recommend_document(sender)
return
def processing_find_similar_document(self, sender):
rwsys_topN=3
msg = self.variables[sender + '_RESULT']['QUERY']
response = self.rwsys.sim_doc_search(msg,rwsys_topN)
self.variables[self.result]['IR_RESULT'] = response
def processing_recommend_document(self, sender):
rwsys_topN=3
msg = self.variables['USER_CONTEXT']
msg = ' '.join(msg)
response = self.rwsys.sim_doc_search(msg,rwsys_topN)
self.variables[self.result]['IR_RESULT'] = response
def processing_request_search(self, sender):
rwsys_topN=100
msg = self.variables[sender + '_RESULT']['QUERY']
response = self.irsys.response_generation(msg)
#nounlist = self.postagger.nouns(msg)
#rel_word_response = self.rwsys.rel_word_with_wgt_search(msg,rwsys_topN)
#rel_doc_response = self.rwsys.sim_doc_search(msg,rwsys_topN)
"""
nounlist = response['nounlist']
relwordDict = {}
for noun in nounlist:
rel_word_response = self.rwsys.rel_word_with_wgt_search(noun,rwsys_topN)
# 면사가 연관어 대상에 없으면 skip
if noun not in rel_word_response:
continue
for relword in rel_word_response[noun]:
if relword not in relwordDict:
relwordDict[relword] = 0
relwordDict[relword] += int(rel_word_response[noun][relword]*8)
# wordcloud name 설정
randname = str(random.randrange(1,1000000))
wordcloudname = "wordcloud_" + self.variables['USER'] + '_' + \
self.variables['SESSION'] + '_' + randname +'.png'
relwordList = []
for n in relwordDict:
for idx in range(relwordDict[n]):
relwordList.append(n)
count = Counter(relwordList)
tag2 = count.most_common(20)
if len(relwordList) > 0 :
random.shuffle(relwordList)
wordcloud = WordCloud(font_path=self.font_path,width=400, height=200, background_color='black').generate(' '.join(relwordList))
wordcloud.to_file(self.wordclouddir + wordcloudname)
else:
pass
"""
wordcloudname = ""
nounlist = response['nounlist']
relwordDict = {}
for noun in nounlist:
rel_word_response = self.rwsys.rel_word_with_wgt_search(noun,rwsys_topN)
# 면사가 연관어 대상에 없으면 skip
if noun not in rel_word_response:
continue
randname = str(random.randrange(1,1000000))
relwordDict.update(rel_word_response[noun])
# wordcloud name 설정
if len(relwordDict) > 0 :
randname = str(random.randrange(1,1000000))
wordcloudname = "wordcloud_" + self.variables['USER'] + '_' + self.variables['SESSION'] + '_' + randname +'.png'
#random.shuffle(relwordList)
wordcloud = WordCloud(font_path=self.font_path,width=400, height=200, background_color='black').generate_from_frequencies(relwordDict)
wordcloud.to_file(self.wordclouddir + wordcloudname)
else:
pass
response['document_page_list'] = response['document_page_list'][0:3]
self.variables[self.result]['KEYWORDS'] = nounlist
self.variables[self.result]['IR_RESULT'] = response
self.variables[self.result]['RW_RESULT'] = relwordDict
self.variables[self.result]['WORDCLOUD'] = wordcloudname
#if __name__ == "__main__":
# main()