-
Notifications
You must be signed in to change notification settings - Fork 1
/
Utils.py
40 lines (33 loc) · 1.19 KB
/
Utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#from bert_serving.client import BertClient
from gensim.models import Word2Vec
import numpy as np
class Utils:
def __init__(self):
self.w2v_path = "./w2v_models/cnndm_w2v.model"
self.w2v = Word2Vec.load(self.w2v_path)
#self.bc = BertClient()
# Devuelve el documento partido por frases (separadas por "\n") hasta un limite de max_len frases #
def sentence_split(self, x, max_len):
return x.split("\n")[:max_len]
def sentence_embedding(self, x):
r = np.zeros((len(x), self.w2v.vector_size))
for i in range(len(x)):
sline = x[i].split()
c = 0.
for j in range(len(sline)):
if sline[j] in self.w2v:
r[i] += self.w2v[sline[j]]
c += 1.
if c != 0:
r[i] /= c
return r
def avg_doc_embedding(self, x):
emb = self.sentence_embedding(x)
return r.mean()
# Arreglar padding #
def compose(self, spl_document, action_seq):
composed_summ = []
for i in range(len(action_seq)):
if action_seq[i] == 1:
composed_summ.append(spl_document[i])
return composed_summ