Skip to content

Commit 55580ea

Browse files
committed
add twitter sentiment as dataset and see examples in confusion matrix bug fixes
1 parent 8f84534 commit 55580ea

File tree

8 files changed

+28
-26245
lines changed

8 files changed

+28
-26245
lines changed

build_graph.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ def build_text_graph_dataset(dataset, window_size):
5353

5454

5555
def build_edges(doc_list, word_id_map, vocab, word_doc_freq, window_size=20):
56+
# constructing all windows
5657
windows = []
5758
for doc_words in doc_list:
5859
words = doc_words.split()
@@ -63,13 +64,15 @@ def build_edges(doc_list, word_id_map, vocab, word_doc_freq, window_size=20):
6364
for i in range(doc_length - window_size + 1):
6465
window = words[i: i + window_size]
6566
windows.append(window)
67+
# constructing all single word frequency
6668
word_window_freq = defaultdict(int)
6769
for window in windows:
6870
appeared = set()
6971
for word in window:
7072
if word not in appeared:
7173
word_window_freq[word] += 1
7274
appeared.add(word)
75+
# constructing word pair count frequency
7376
word_pair_count = defaultdict(int)
7477
for window in tqdm(windows):
7578
for i in range(1, len(window)):
@@ -101,7 +104,8 @@ def build_edges(doc_list, word_id_map, vocab, word_doc_freq, window_size=20):
101104
col.append(num_docs + j)
102105
weight.append(pmi)
103106

104-
doc_word_freq = defaultdict(int) # frequency of document word pair
107+
# frequency of document word pair
108+
doc_word_freq = defaultdict(int)
105109
for i, doc_words in enumerate(doc_list):
106110
words = doc_words.split()
107111
for word in words:

config.py

Lines changed: 0 additions & 160 deletions
This file was deleted.

0 commit comments

Comments
 (0)