/
utility.py
38 lines (30 loc) · 1.12 KB
/
utility.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import os
import math
import numpy as np
def sigmoid(x):
return float(1) / (1 + math.exp(-x))
def construct_doc_matrix(dict, paper_list):
"""
construct the learned embedding for document clustering
dict: {paper_index, numpy_array}
"""
D_matrix = dict[paper_list[0]]
for idx in xrange(1, len(paper_list)):
D_matrix = np.vstack((D_matrix, dict[paper_list[idx]]))
return D_matrix
def softmax(x):
"""Compute softmax values for each sets of scores in x."""
e_x = np.exp(x - np.max(x))
return e_x / e_x.sum(axis=0)
def save_embedding(dict, paper_list, num_dimen):
"""
save the final embedding results for each document
"""
embedding_file = open('../emb/doc_emb.txt','w')
embedding_file.write(str(len(paper_list)) + ' ' + str(num_dimen) + os.linesep)
D_matrix = dict[paper_list[0]]
for idx in xrange(1, len(paper_list)):
D_matrix = np.vstack((D_matrix, dict[paper_list[idx]]))
D_matrix = np.hstack((np.array([range(1, len(paper_list)+1)]).T, D_matrix))
np.savetxt(embedding_file, D_matrix,
fmt = ' '.join(['%i'] + ['%1.5f'] * num_dimen))