forked from JasonKessler/scattertext
-
Notifications
You must be signed in to change notification settings - Fork 0
/
demo_phrase_machine.py
26 lines (23 loc) · 1.25 KB
/
demo_phrase_machine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import spacy
from scattertext import SampleCorpora, PhraseMachinePhrases
from scattertext import produce_scattertext_explorer
from scattertext.CorpusFromPandas import CorpusFromPandas
from scattertext.termcompaction.CompactTerms import CompactTerms
convention_df = SampleCorpora.ConventionData2012.get_data()
corpus = (CorpusFromPandas(convention_df,
category_col='party',
text_col='text',
feats_from_spacy_doc=PhraseMachinePhrases(),
nlp=spacy.load('en', parser=False))
.build()
.compact(CompactTerms(minimum_term_count=2)))
html = produce_scattertext_explorer(corpus,
category='democrat',
category_name='Democratic',
not_category_name='Republican',
minimum_term_frequency=2,
pmi_threshold_coefficient=0,
width_in_pixels=1000,
metadata=convention_df['speaker'])
open('./demo_phrase_machine.html', 'wb').write(html.encode('utf-8'))
print('Open ./demo_phrase_machine.html in Chrome or Firefox.')