In [1]:
import numpy as np
import pandas as pd
# pd.set_option('max_colwidth', 200)

import re

import spacy
from spacy.language import Language
from spacy.lang.en.stop_words import STOP_WORDS

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

from collections import Counter

from src.analyze import *
from src.vectorize import *
from src.fb_model import *
from src.model import *
from eda import *

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

import gensim
import gensim.corpora as corpora
from gensim.utils import simple_preprocess
from gensim.models import CoherenceModel

In [2]:
nlp = spacy.load('en_core_web_md')
data = process_fb_data()
data['ner'] = [doc.ents for doc in data['docs']]

In [4]:
lib_data, con_data = split_data(data)

## Liberal Data

In [5]:
lib_data['lemmas'] = clean_lemmas(lib_data['docs'])
lib_lda_model = run_lda_model(lib_data['lemmas'], 10, 30, 10, 'test_lib')
lib_topics = list(lib_lda_model.print_topics(num_words=10))

In [6]:
lib_topics

[(0,
  '0.019*"Senate" + 0.018*"Mitch" + 0.017*"right" + 0.017*"fight" + 0.016*"McConnell" + 0.015*"abortion" + 0.011*"gun" + 0.011*"need" + 0.010*"year" + 0.009*"help"'),
 (1,
  '0.039*"Trump" + 0.014*"need" + 0.013*"Donald" + 0.013*"President" + 0.013*"people" + 0.012*"know" + 0.010*"add" + 0.009*"want" + 0.008*"big" + 0.008*"join"'),
 (2,
  '0.016*"need" + 0.016*"climate" + 0.013*"New" + 0.012*"help" + 0.011*"fight" + 0.009*"change" + 0.009*"Congress" + 0.009*"Stacey" + 0.009*"Green" + 0.008*"Deal"'),
 (3,
  '0.022*"campaign" + 0.021*"help" + 0.017*"win" + 0.015*"chip" + 0.013*"donation" + 0.011*"need" + 0.010*"million" + 0.009*"right" + 0.009*"today" + 0.009*"people"'),
 (4,
  '0.013*"people" + 0.012*"refugee" + 0.010*"child" + 0.010*"know" + 0.010*"day" + 0.010*"year" + 0.009*"state" + 0.009*"help" + 0.009*"mom" + 0.009*"today"'),
 (5,
  '0.025*"vote" + 0.019*"voter" + 0.014*"Trump" + 0.013*"election" + 0.012*"Beto" + 0.011*"November" + 0.010*"poll" + 0.010*"join" + 0.010*"state" 

## Conservative Data

In [7]:
con_data['lemmas'] = clean_lemmas(con_data['docs'])
con_lda_model = run_lda_model(con_data['lemmas'], 10, 30, 10, 'test_con')
con_topics = list(con_lda_model.print_topics(num_words=10))

In [8]:
con_topics

[(0,
  '0.025*"need" + 0.017*"Trump" + 0.015*"want" + 0.011*"President" + 0.011*"Joe" + 0.011*"say" + 0.010*"news" + 0.010*"fight" + 0.009*"sign" + 0.009*"Media"'),
 (1,
  '0.026*"today" + 0.022*"help" + 0.019*"time" + 0.014*"gift" + 0.011*"country" + 0.011*"people" + 0.011*"run" + 0.009*"fight" + 0.009*"know" + 0.009*"public"'),
 (2,
  '0.038*"vote" + 0.023*"com" + 0.022*"https" + 0.017*"Trump" + 0.014*"Don" + 0.013*"voter" + 0.013*"keepandbear" + 0.013*"didn" + 0.013*"gop" + 0.012*"Dennis"'),
 (3,
  '0.043*"child" + 0.040*"need" + 0.033*"food" + 0.033*"care" + 0.028*"help" + 0.027*"water" + 0.027*"provide" + 0.025*"clean" + 0.024*"doesn" + 0.018*"day"'),
 (4,
  '0.021*"people" + 0.018*"american" + 0.016*"think" + 0.015*"Democrats" + 0.014*"tell" + 0.014*"know" + 0.014*"official" + 0.011*"America" + 0.011*"President" + 0.010*"Trump"'),
 (5,
  '0.041*"want" + 0.029*"stand" + 0.022*"official" + 0.020*"change" + 0.018*"don" + 0.017*"November" + 0.016*"Republican" + 0.016*"Trump" + 0.016*