In [1]:
import numpy as np
import pandas as pd
# pd.set_option('max_colwidth', 200)

import re

import spacy
from spacy.language import Language
from spacy.lang.en.stop_words import STOP_WORDS

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

from collections import Counter

from src.analyze import *
from src.vectorize import *
from src.fb_model import *
from src.model import *
from eda import *

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

import gensim
import gensim.corpora as corpora
from gensim.utils import simple_preprocess
from gensim.models import CoherenceModel

In [2]:
nlp = spacy.load('en_core_web_md')
data = process_fb_data()
data['ner'] = [doc.ents for doc in data['docs']]

In [3]:
lib_data, con_data = split_data(data)

## Liberal Data

In [4]:
lib_data['lemmas'] = clean_lemmas(lib_data['docs'])
lib_lda_model = run_lda_model(lib_data['lemmas'], 20, 20, 10, 'test_lib')
lib_topics = list(lib_lda_model.print_topics(num_words=10))

In [5]:
lib_topics

[(0,
  '0.021*"crisis" + 0.012*"people" + 0.012*"help" + 0.012*"company" + 0.011*"million" + 0.010*"climate" + 0.010*"government" + 0.009*"need" + 0.008*"stand" + 0.008*"work"'),
 (1,
  '0.032*"McConnell" + 0.032*"Senate" + 0.031*"Mitch" + 0.019*"help" + 0.013*"defeat" + 0.011*"win" + 0.011*"right" + 0.011*"poll" + 0.010*"race" + 0.009*"Trump"'),
 (2,
  '0.017*"big" + 0.016*"election" + 0.016*"state" + 0.015*"tool" + 0.013*"cut" + 0.013*"day" + 0.013*"answer" + 0.011*"country" + 0.011*"candidate" + 0.010*"support"'),
 (3,
  '0.036*"vote" + 0.034*"Court" + 0.033*"Supreme" + 0.031*"Kavanaugh" + 0.023*"Collins" + 0.019*"Brett" + 0.017*"Susan" + 0.015*"Senator" + 0.015*"right" + 0.014*"like"'),
 (4,
  '0.031*"Joe" + 0.029*"sticker" + 0.024*"sign" + 0.024*"Biden" + 0.018*"support" + 0.018*"vote" + 0.015*"petition" + 0.012*"free" + 0.010*"Intercept" + 0.009*"Americans"'),
 (5,
  '0.044*"Trump" + 0.022*"need" + 0.018*"Donald" + 0.017*"vote" + 0.015*"know" + 0.013*"President" + 0.011*"House" +

## Conservative Data

In [6]:
con_data['lemmas'] = clean_lemmas(con_data['docs'])
con_lda_model = run_lda_model(con_data['lemmas'], 20, 20, 10, 'test_con')
con_topics = list(con_lda_model.print_topics(num_words=10))

In [7]:
con_topics

[(0,
  '0.038*"vote" + 0.024*"Democrats" + 0.023*"american" + 0.020*"sign" + 0.019*"people" + 0.018*"support" + 0.017*"let" + 0.016*"didn" + 0.016*"Don" + 0.015*"gop"'),
 (1,
  '0.030*"day" + 0.026*"Trump" + 0.021*"watch" + 0.021*"imagine" + 0.021*"child" + 0.019*"CRTV" + 0.018*"stand" + 0.018*"free" + 0.018*"help" + 0.015*"medium"'),
 (2,
  '0.017*"care" + 0.016*"candidate" + 0.012*"fight" + 0.012*"conservative" + 0.011*"health" + 0.010*"date" + 0.010*"feel" + 0.010*"record" + 0.009*"deadline" + 0.009*"deliver"'),
 (3,
  '0.064*"child" + 0.048*"food" + 0.035*"water" + 0.034*"help" + 0.033*"clean" + 0.032*"care" + 0.031*"need" + 0.029*"provide" + 0.027*"doesn" + 0.026*"go"'),
 (4,
  '0.049*"Trump" + 0.048*"vote" + 0.047*"want" + 0.044*"know" + 0.037*"November" + 0.035*"President" + 0.032*"Republicans" + 0.030*"6th" + 0.028*"official" + 0.028*"commit"'),
 (5,
  '0.026*"com" + 0.022*"https" + 0.022*"prageru" + 0.022*"woman" + 0.019*"sign" + 0.018*"girl" + 0.016*"yard" + 0.016*"think" + 0