In [1]:
from ast import literal_eval
from gensim.corpora.dictionary import Dictionary
from gensim.models import LdaModel, ldamodel, LdaMulticore
from gensim.parsing.preprocessing import STOPWORDS
from gensim.test.utils import common_corpus, common_texts, common_dictionary
from gensim.utils import simple_preprocess
from nltk import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.stem.porter import *
from nltk.stem.snowball import SnowballStemmer
from nltk.tokenize import sent_tokenize, word_tokenize
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import GridSearchCV
from statistics import mean
import gensim
import nltk
import numpy as np
import os
import pandas as pd
import random
import warnings
warnings.filterwarnings("ignore")


In [2]:
stemmer = SnowballStemmer("english")
lemmatizer = WordNetLemmatizer()

In [3]:
overview = pd.read_csv("debate_data/data_overviews/overview_1.csv")

def string_2_row(df_w_col_name):
    count = 0
    for row in df_w_col_name:
        df_w_col_name[count] = literal_eval(row)
        count+=1
        
string_2_row(overview["moderator"])
string_2_row(overview["participants"])

In [4]:
# create stop_words list to be removed when LDA analysis is being conducted

nltk_stop = list(set(stopwords.words('english')))

political_stop_words = ["president", "american", "america", "govern", "government", "campaign",\
                        "country","applause","question","governor", "congresswoman", "congress",\
                       "congressman", "partisan", "republican", "democrat", "DNC","GOP","liberal",\
                       "conservative", "senate", "state", "left","right", "nomination","parti","party",
                       "democraci","constitut","politician","political","politics","pols","politi","federal",\
                       "feder", "polici","state","minister","govern","caucus","primary"]
                       
other_stop_words = ["think", "thank", "you","would", "peopl","work","reform", "family", "administr",\
                    "program","thing","peopl", "support","person","progress",\
                    "nominate""nomination","court","argument","process","man","men","woman","women", "legisl",\
                   "languag", "respect","report", "place", "today","title","issu","issue","titl", "year",\
                   "system", "record","world","laughter","laugh","farmer","earmark","office","offic",
                   "power","church", "someth", "reason", "promise", "promis", "tonight","everybodi","formosa",
                   "someon", "creat", "simpl", "percent", "faith", "mean", "need", "pocket" "percent", \
                    "secretari", "point", "noth", "ground", "build", "public", "middl", "mean", "centuri",\
                    "minut", "becom", "answer", "matter", "folk", "cours", "mean", "order" , "percent",\
                    "term", "posit", "communiti", "health", "mandat", "children", "public", "speech", \
                    "nomine", "differ", "month", "point", "everyon", "stand","everi","believ", "opportun",\
                   "understand","bring","choice", "histori", "servic", "background", "check", "nation", \
                    "littl", "friend", "critic","inequ", "realli", "somebodi", "problem", "share", "challeng", \
                    "togeth", "street", "start", "continu", "improv", "trust", "provid", "decis"]

names = ["michael", "donald", "chris", 'hillary', 'david', 'martha', 'hillari' ,"chang","barack","putin","jeb"]

for mods in overview["moderator"]:
    try:
        for mod in mods:
            names.append(mod)
    except:
        pass

for parts in overview["participants"]:
    try:
        for part in parts:
            names.append(part[:-4])
    except:
        pass

for title in overview["debate_name"]:
    t = title.replace("-"," ").split(" ")
    for z in t:
        names.append(str(z))

names = list(set(names))


states = ['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', \
          'Delaware', 'District of Columbia', 'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', \
          'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Montana', 'Nebraska', 'Nevada', \
          'New Hampshire', 'New Jersey', 'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio', \
          'Oklahoma', 'Oregon', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri',\
          'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 
          'Vermont', 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming']

splitted_states = []
for s in states:
    for x in s.split(" "):
        splitted_states.append(x)
splitted_states = list(set(splitted_states))

stop_words = list(set(nltk_stop + political_stop_words + other_stop_words + names + splitted_states))

stop_words = [lemmatizer.lemmatize(t) for t in stop_words]
stop_words = [stemmer.stem(t) for t in stop_words]
stop_words = [t.lower() for t in stop_words]

stop_words = list(set(stop_words))

In [6]:
# function below finds the best params for the LDA model

def best_params(filename):
    
    try:
        df = pd.read_csv(directory+"/"+filename)
    except:
        df = filename
        
    processed_docs = []
    
    for row in df["speech"]:
        sentence = []
        for word in row.split(" "):
            x = re.sub('[^a-zA-Z]+', '', word)
            if len(x) > 4:
                if str(nltk.pos_tag([x])[0][1])[0] in ["N","J"]:
                    if x not in stop_words:
                        sentence.append(x)
        processed_docs.append(sentence)
        
    flat_list = []
    for sublist in processed_docs:
        for item in sublist:
            flat_list.append(item)

    vectorizer = TfidfVectorizer(analyzer='word', min_df=10, stop_words='english', lowercase=True,\
                                 token_pattern='[a-zA-Z0-9]{4,}')
    data_vectorized = vectorizer.fit_transform(flat_list)
    
    # Define Search Param
    search_params = {'n_components': [8,10,12,14,15],
                     'learning_decay': [0.6, 0.7, 0.8, 0.9]}

    # Initiate the Model
    lda = LatentDirichletAllocation()

    # Init Grid Search Class
    model = GridSearchCV(lda, param_grid = search_params)

    # Do the Grid Search
    model.fit(data_vectorized)
    
    try:
        best_lda_model = model.best_estimator_
        print("Best Model's Params: ", model.best_params_)
        print("Best Log Likelihood Score: ", model.best_score_)
    except:
        pass

    return (filename, model.best_params_)

In [None]:
# total topics

all_speech = []
directory = "debate_data/debates_part"
for filename in os.listdir(directory):
    if filename.endswith(".csv"):
        df = pd.read_csv(directory+"/"+filename)
        for row in df["speech"]:
            all_speech.append(row)

            
df = pd.DataFrame(all_speech, columns = ["speech"])         
            
x = best_params(df)

In [None]:
# best params for debates

samples = []

while(list(set(samples))) < ((0.25)*(len(overview["coder"]))):
    samples.append(overview["coder"][random.randint(0, len(overview["coder"]))])    

print(samples)
    
directory = "debate_data/debates_part"

best_param_results = []
counter = 1
for filename in os.listdir(directory):
    
    if filename.endswith(".csv"):
        
        coder = str(filename.split(".")[0])
        
        if coder in samples:

            print("number ", counter, ":", coder)
            counter+=1
            x = best_params(filename)
            best_param_results.append(x)
            
print("RESULTS FOR DEBATE TOPICS\n")
ds = []
for i in range(len(best_param_results)):
    ds.append(best_param_results[i][1])

d = {}
for i in ds:
    for k in i.keys():
        d[k] = tuple(d[k] for d in ds)

for st,vals in d.items():
    print("Average value for {} is {}".format(st,mean(vals)))
    try:
        print("Std for {} is {}".format(st,np.std(vals)))
    except:
        pass

In [6]:
directory = "debate_data/debates_part"

for filename in os.listdir(directory):
    if filename.endswith(".csv"):
        df = pd.read_csv(directory+"/"+filename)
        coder = str(filename.split(".")[0])

        try:

            processed_docs = []

            for row in df["speech"]:

                sentence = []
                for word in row.split(" "):
                    x = re.sub('[^a-zA-Z]+', '', word)
                    if len(x) > 4:
                        if str(nltk.pos_tag([x])[0][1])[0] in ["N","J"]:
                            if x not in stop_words:
                                sentence.append(x)
                processed_docs.append(sentence)

            dictionary = gensim.corpora.Dictionary(processed_docs)
            dictionary.filter_extremes(no_above = 0.75)
            bow_corpus = [dictionary.doc2bow(doc) for doc in processed_docs]

            lda_model = LdaMulticore(bow_corpus,
                      num_topics = 4,
                      id2word = dictionary,
                      workers = 3,
                      passes = 15,
                      decay = 0.77,
                      minimum_probability = 0.0)        

            top_topics = lda_model.get_document_topics(bow_corpus, minimum_probability=0.0)

            topic_n = []
            topic_share = []
            for i in range(4):
                topic_n.append(i)
                count = 0
                n = 0
                for x in top_topics:
                    count += x[i][1]
                    n += 1
                avg = count / n
                topic_share.append(avg)

            sum_terms = []
            topic_terms = []

            for i in lda_model.print_topics(num_words = 8):
                words = []
                value = 0
                out_list = i[1].split("+")
                for x in out_list:
                    num = literal_eval(x.split("*")[0][1:])
                    value+=num

                sum_terms.append(value)

                for x in out_list:
                    t = x.replace('"', " ").strip(" ").split("*")[1]
                    words.append(t.strip(" "))
                stringed = ', '.join(words)
                topic_terms.append([stringed])

            df = pd.DataFrame(list(zip(topic_share, topic_terms, sum_terms)),
                              columns = ["topic_share", "topic_terms","sum_terms"]) 

            print(coder, "\n", df ,"\n")
            
            ###SAVE LDA MODEL UNIQUE FOR EACH DEBATE
            
        except:
            print("\n********", coder, " failed to be modelled \n")

D2000jan08 
    topic_share                                        topic_terms  sum_terms
0     0.390899  [medicar, import, secur, insur, protect, medic...      0.734
1     0.214880  [fight, second, futur, leadership, special, in...      0.818
2     0.256049  [school, teacher, propos, class, number, learn...      0.770
3     0.138170  [agricultur, access, money, futur, import, int...      0.817 

G1988oct05 
    topic_share                                        topic_terms  sum_terms
0     0.195656  [trade, market, foreign, tough, dollar, intere...      0.803
1     0.245706  [georg, commit, interest, futur, econom, major...      0.751
2     0.191602  [lloyd, futur, leadership, dollar, protect, ex...      0.716
3     0.367034  [secur, situat, defens, deficit, qualif, budge...      0.614 

R2007sep27 
    topic_share                                        topic_terms  sum_terms
0     0.217552  [individu, respons, militari, justic, equal, m...      0.710
1     0.254683  [import, school, 

D2000jan25 
    topic_share                                        topic_terms  sum_terms
0     0.210137  [import, prescript, negat, attack, benefit, ac...      0.907
1     0.380391  [medicaid, money, insur, propos, financ, medic...      0.876
2     0.169245  [fight, special, interest, import, money, insu...      0.918
3     0.240122  [budget, attack, negat, second, propos, medica...      0.887 

R2015aug06 
    topic_share                                        topic_terms  sum_terms
0     0.244017  [growth, econom, protect, actual, respons, cle...      0.587
1     0.300302  [secur, border, social, money, budget, elect, ...      0.689
2     0.191240  [proud, total, leader, illeg, israel, father, ...      0.705
3     0.264246  [militari, fight, megyn, chief, cours, command...      0.610 

R2007jun05 
    topic_share                                        topic_terms  sum_terms
0     0.254402  [nuclear, money, terror, weapon, major, fight,...      0.538
1     0.252882  [militari, saddam

R2011dec10 
    topic_share                                        topic_terms  sum_terms
0     0.249405  [number, insur, money, privat, anyth, model, c...      0.553
1     0.278303  [secur, social, economi, consist, elect, impor...      0.578
2     0.256238  [individu, oughta, obamacar, frank, import, fi...      0.598
3     0.216093  [truth, israel, palestinian, difficult, probab...      0.575 

D1999oct27 
    topic_share                                        topic_terms  sum_terms
0     0.367041  [school, financ, money, propos, import, though...      0.828
1     0.131286  [propos, futur, spend, money, school, import, ...      0.829
2     0.193962  [fight, tipper, financ, strong, spend, futur, ...      0.936
3     0.307700  [leader, import, futur, second, fight, thought...      0.959 

G2012oct11 
    topic_share                                        topic_terms  sum_terms
0     0.280148  [sanction, weapon, nuclear, troop, fight, rega...      0.559
1     0.307338  [class, incom, sm

R2012jan26 
    topic_share                                        topic_terms  sum_terms
0     0.266352  [freddi, actual, economi, fanni, medicin, seco...      0.452
1     0.244241  [legal, immigr, deport, illeg, grandmoth, sect...      0.477
2     0.266025  [insur, import, involv, space, particular, spe...      0.458
3     0.223384  [space, money, invest, trade, secur, actual, s...      0.499 

R2015nov10 
    topic_share                                        topic_terms  sum_terms
0     0.257913  [budget, militari, spend, import, china, dolla...      0.599
1     0.258982  [money, actual, bigger, energi, compani, dolla...      0.464
2     0.223632  [economi, growth, econom, higher, control, rep...      0.624
3     0.259455  [elect, capit, money, anyth, syria, frank, exe...      0.479 

G1984oct11 
    topic_share                                        topic_terms  sum_terms
0     0.246328  [soviet, union, negoti, nuclear, leader, reduc...      0.815
1     0.291171  [futur, interest,

D2008jan31 
    topic_share                                        topic_terms  sum_terms
0     0.269165  [immigr, economi, actual, comprehens, approach...      0.517
1     0.199464  [afford, insur, interest, number, actual, impe...      0.554
2     0.154938  [compani, insur, certain, negoti, import, begi...      0.590
3     0.376432  [import, clear, concern, respons, interest, ge...      0.479 

R2016mar10 
    topic_share                                        topic_terms  sum_terms
0     0.257037  [secur, social, muslim, worker, strong, budget...      0.454
1     0.237280  [elect, interest, listen, foreign, fight, fran...      0.521
2     0.257962  [trade, school, negoti, import, contribut, num...      0.478
3     0.246740  [israel, number, china, militari, generat, com...      0.448 

R2000mar02 
    topic_share                                        topic_terms  sum_terms
0     0.224264  [proud, import, clear, speak, school, exampl, ...      0.963
1     0.332374  [money, school, a

R2007nov28 
    topic_share                                        topic_terms  sum_terms
0     0.230999  [crime, mayor, school, number, abort, polic, r...      0.491
1     0.251963  [immigr, illeg, crime, stage, employ, legal, d...      0.520
2     0.188865  [enforc, pledg, spend, messag, general, indivi...      0.440
3     0.328394  [secur, money, militari, spend, anybodi, impor...      0.436 

G1976oct22 
    topic_share                                        topic_terms  sum_terms
0     0.274459  [general, decis, qualifi, militari, indic, sev...      0.625
1     0.169087  [leadership, respons, involv, purpos, present,...      0.606
2     0.238283  [increas, inflat, decis, import, elect, purpos...      0.582
3     0.318206  [unemploy, money, welfar, major, control, incr...      0.535 

D2016mar09 
    topic_share                                        topic_terms  sum_terms
0     0.266465  [industri, incom, school, import, economi, mon...      0.707
1     0.199560  [immigr, comprehe

D2015oct13 
    topic_share                                        topic_terms  sum_terms
0     0.222677  [economi, protect, actual, comprehens, money, ...      0.335
1     0.219386  [immigr, money, young, actual, billionair, inc...      0.378
2     0.252948  [syria, veteran, import, fought, econom, assad...      0.334
3     0.304979  [climat, social, secur, class, address, elect,...      0.281 

R2011sep07 
    topic_share                                        topic_terms  sum_terms
0     0.291560  [secur, social, border, commit, privat, econom...      0.544
1     0.227769  [parent, import, immigr, respons, human, solut...      0.626
2     0.267685  [ronald, interest, budget, border, money, lear...      0.546
3     0.212996  [secur, execut, number, econom, social, probab...      0.669 

R1999dec06 
    topic_share                                        topic_terms  sum_terms
0     0.260084  [trade, china, organ, secur, interest, liberti...      0.680
1     0.209572  [price, elect, fo

In [1]:
from gensim.test.utils import common_corpus, common_dictionary

lda = LdaMulticore(common_corpus,
                   id2word = common_dictionary,
                   num_topics = 5,
                   workers = 3,
                   passes = 10,
                   decay = 0.77)


def processed_docs(filename, count):
        
    df = pd.read_csv(directory+"/"+filename)
    
    processed_docs = []
    
    for row in df["speech"]:
        sentence = []
        for word in row.split(" "):
            x = re.sub('[^a-zA-Z]+', '', word)
            if len(x) > 4:
                if str(nltk.pos_tag([x])[0][1])[0] in ["N","J"]:
                    if x not in stop_words:
                        sentence.append(x)
        processed_docs.append(sentence)
        
    
    dictionary = gensim.corpora.Dictionary(processed_docs)
    dictionary.filter_extremes(no_above = 0.8)
    bow_corpus = [dictionary.doc2bow(doc) for doc in processed_docs]

    for i in range(len(rev_train)):
        top_topics = lda_train.get_document_topics(train_corpus[i], minimum_probability=0.0)
        topic_vec = [top_topics[i][1] for i in range(20)]
        topic_vec.extend([rev_train.iloc[i].real_counts]) # counts of reviews for restaurant
        topic_vec.extend([len(rev_train.iloc[i].text)]) # length review
        train_vecs.append(topic_vec)

    
    
    #lda_model = LdaMulticore(bow_corpus,
    #                 num_topics = 5,
    #                 id2word = dictionary,
    #                 workers = 3,
    #                 passes = 10,
    #                 decay = 0.74,
    #                 minimum_probability = 0.01)        
        
    
    topic_value = []
    for i in lda_model.print_topics():
        words = []
        value = 0
        out_list = i[1].split("+")
        for x in out_list:
            num = literal_eval(x.split("*")[0][1:])
            value+=num
        for x in out_list:
            t = x.replace('"', " ").strip(" ").split("*")[1]
            words.append(t.strip(" "))
        stringed = ','.join(words)
        value = round(value, 5)
        topic_value.append((value, stringed))
    
    topic_value = topic_value.sort(reverse=True)
    
    return topic_value

IndentationError: expected an indented block (<ipython-input-1-b51fa5211882>, line 33)

In [37]:
directory = "debate_data/debates_preprocessed"

iteration = 0
details = []
for filename in os.listdir(directory):
    this_file = []
    if filename.endswith(".csv"):
        print(filename)
        this_file.append(filename)
        result = processed_docs(filename, iteration)
        this_file.append(result)
        for i in result:
            print(i)
    details.append(this_file)
    iteration+=1

D2000jan08.csv
[(0.864, ' financ, control, elect, introduc, improv, trust, protect, medicar, provid, decis'), (0.858, ' medicar, fight, futur, child, budget, dollar, cover, interest, medicaid, protect'), (0.799, ' school, teacher, import, increas, propos, child, access, learn, night, europ'), (0.852, ' money, agricultur, leadership, secur, insur, number, militari, exampl, import, interest')]
G1988oct05.csv
[(0.728, ' interest, import, number, militari, poverti, dollar, advic, train, oppos, contribut'), (0.711, ' trade, experi, secur, budget, deficit, qualif, accomplish, hostag, invest, disagre'), (0.697, ' commit, futur, environment, defens, money, environ, union, protect, child, enforc'), (0.626, ' situat, market, clean, spend, respons, treati, concern, prepar, ticket, leadership')]
R2007sep27.csv
[(0.831, ' penalti, death, crime, school, involv, declar, control, level, round, individu'), (0.683, ' illeg, individu, welfar, immigr, border, anybodi, rememb, societi, economi, author'), (

[(0.5, ' trade, agreement, dollar, wealth, spread, decis, organ, import, focus, fought'), (0.566, ' import, energi, independ, increas, reduc, associ, plumber, floor, thought, fight'), (0.434, ' school, futur, economi, voucher, propos, teacher, crisi, mortgag, econom, disagre'), (0.52, ' spend, insur, money, provid, afford, budget, abort, child, exampl, employ')]
D2000jan25.csv
[(0.751, ' propos, attack, budget, welfar, price, medicar, protect, medicaid, benefit, number'), (0.746, ' child, insur, financ, import, increas, deficit, invest, prescript, welfar, advis'), (0.811, ' money, negat, fight, voucher, provid, attack, school, accept, provis, medicaid'), (0.722, ' choos, interest, incom, medicaid, compani, fought, substanc, exampl, pharmaceut, medic')]
R2015aug06.csv
[(0.614, ' border, elect, leader, respons, negoti, becam, exact, correct, spend, privat'), (0.542, ' immigr, illeg, israel, bless, import, advantag, enforc, frank, facebook, economi'), (0.495, ' militari, money, stage, tru

[(0.654, ' import, secur, number, regul, trust, energi, learn, illeg, singl, yahoo'), (0.603, ' speaker, money, obamacar, privat, insur, spend, amend, societi, growth, frank'), (0.619, ' individu, truth, stage, oughta, israel, respons, elect, interest, outsid, school'), (0.613, ' fight, economi, consist, citizen, model, solut, return, level, histor, decis')]
D1999oct27.csv
[(0.885, ' import, leader, pleas, futur, thought, parent, elect, leadership, generat, favor'), (0.804, ' teacher, child, provid, money, propos, experi, tipper, dartmouth, doctor, school'), (0.877, ' surplus, school, medicar, exampl, insur, spend, money, cover, futur, propos'), (0.892, ' financ, fight, pollut, protect, patient, interest, child, insur, favor, leadership')]
G2012oct11.csv
[(0.542, ' medicar, number, militari, budget, regard, russia, ayatollah, guarante, studi, success'), (0.636, ' weapon, sanction, incom, economi, cathol, attack, marin, hospit, ambassador, faster'), (0.572, ' secur, troop, propos, inter

[(0.47, ' growth, regul, import, repeal, interest, frank, capit, economi, econom, syria'), (0.447, ' economi, balanc, budget, militari, child, china, incom, elect, dream, number'), (0.49, ' dollar, spend, nobodi, leadership, econom, destroy, stori, reserv, marco, maria'), (0.508, ' money, execut, trade, financi, crisi, minimum, energi, import, singl, control')]
G1984oct11.csv
[(0.653, ' union, propos, negoti, interest, everyth, leadership, absolut, address, econom, control'), (0.527, ' leader, protect, deliv, patriot, interest, favor, speak, action, deficit, nicaragua'), (0.565, ' futur, school, agreement, number, lebanon, secur, covert, separ, intellig, suprem'), (0.577, ' terror, embassi, weapon, refer, human, reduc, economi, religion, abort, child')]
G2004oct05.csv
[(0.576, ' weapon, saddam, hussein, import, attack, terrorist, terror, medic, connect, specif'), (0.416, ' effort, terror, afghanistan, divid, signific, respons, success, truth, credibl, elect'), (0.515, ' compani, child,

[(0.678, ' money, china, taiwan, school, messag, comment, cathol, internet, receiv, requir'), (0.791, ' proud, roosevelt, theodor, independ, secur, heart, ronald, ambassador, exampl, messag'), (0.632, ' regret, import, limit, interest, anyth, missil, result, militari, ambassador, defens'), (0.688, ' child, account, trigger, enforc, sorri, parent, respons, polic, robertson, decis')]
G1992oct19.csv
[(0.552, ' money, trade, invest, increas, spend, agreement, incom, respons, glaspi, ambassador'), (0.598, ' econom, economi, trickl, deficit, relev, nobodi, pattern, leadership, suggest, break'), (0.507, ' mistak, spend, night, interest, anyth, budget, industri, saddam, hussein, child'), (0.451, ' thought, standard, elect, crisi, protect, taxpay, credit, lobbi, effici, interest')]
G2008sep26.csv
[(0.415, ' troop, spend, pakistan, crisi, qaeda, precondit, nobodi, elimin, agenc, petraeus'), (0.405, ' strategi, secur, afghanistan, interest, oppos, defeat, compani, judgment, respons, account'), (0

[(0.523, ' increas, indic, decis, respons, period, present, affair, interest, begin, sever'), (0.503, ' amend, handgun, crime, unemploy, sacrific, abort, growth, individu, member, decis'), (0.609, ' money, minor, burger, committe, energi, exampl, statement, enforc, instanc, addit'), (0.573, ' militari, inflat, import, accur, reduc, control, attorney, leadership, unemploy, elect')]
D2016mar09.csv
[(0.649, ' compani, bailout, overthrow, cuban, possibl, terrorist, attack, brother, oppos, learn'), (0.651, ' comprehens, immigr, incom, import, border, energi, number, secur, deserv, econom'), (0.724, ' money, immigr, interest, industri, karen, pleas, afford, respond, rescu, economi'), (0.693, ' child, school, absolut, deport, worker, stori, puerto, latino, everyth, improv')]
D2007oct30.csv
[(0.899, ' experi, child, provid, worker, advoc, specif, prevent, begin, author, secur'), (0.859, ' diplomaci, everyth, action, prevent, militari, author, colleagu, begin, provid, potenti'), (0.845, ' secur

[(0.575, ' privat, import, sector, school, budget, creation, gallon, balanc, gasolin, situat'), (0.586, ' regul, ronald, spend, broken, effort, provid, obamacar, interest, libya, execut'), (0.586, ' economi, solut, respons, market, energi, money, commit, revenu, incom, child'), (0.649, ' secur, border, number, immigr, illeg, write, benefit, experi, control, parent')]
R1999dec06.csv
[(0.698, ' interest, school, worker, hispan, product, number, effort, mother, benefit, money'), (0.722, ' trade, organ, secur, experi, particip, heart, spend, repres, return, encourag'), (0.687, ' money, import, internet, contribut, elect, school, corpor, voter, parent, anyth'), (0.748, ' incom, china, steve, child, price, natur, control, prosper, enjoy, futur')]
R2011sep05.csv
[(0.571, ' money, spend, capit, agenc, econom, adopt, growth, respons, child, corpor'), (0.614, ' union, level, suprem, oppos, bureaucrat, economi, entitl, solut, agenc, market'), (0.61, ' reserv, dollar, lincoln, number, jefferson, h

In [38]:
# manually inspect each topic and translate them to a new file