## Importing Classes & Libraries

In [86]:
import pickle
import pandas as pd
from nltk import sent_tokenize
from nltk import word_tokenize
from sklearn.feature_extraction.text import CountVectorizer 
import seaborn as sns
import matplotlib.pyplot as plt
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
import re
from nltk.tokenize import word_tokenize,sent_tokenize
import nltk 
from sklearn.decomposition import NMF
from sklearn.feature_extraction.text import TfidfVectorizer
from textblob import TextBlob
from nltk.stem.porter import PorterStemmer
from nltk.tokenize import ToktokTokenizer
from nltk.tokenize import regexp_tokenize
import spacy
import gensim.corpora as corpora
from gensim import corpora, models, similarities, matutils
# Plotting tools
import pyLDAvis
import pyLDAvis.gensim  
import matplotlib.pyplot as plt
%matplotlib inline
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import seaborn as sns
import pyLDAvis.sklearn
import warnings
warnings.filterwarnings("ignore")
from __future__ import division
from sklearn.manifold import TSNE
import numpy as np
import os
import pickle

In [87]:
## Defining Functions
nltk.download('stopwords', quiet=True, raise_on_error=True)
stopword_list = set(nltk.corpus.stopwords.words('english'))
tokenized_stop_words = nltk.word_tokenize(' '.join(nltk.corpus.stopwords.words('english')))

In [88]:
tokenizer = ToktokTokenizer()

def remove_stopwords(text):
    # convert sentence into token of words
    all_addresses = []
    for addresses in text:    
        tokens = tokenizer.tokenize(addresses)
        tokens = [token.strip() for token in tokens]
        # check in lowercase 
        t = [token for token in tokens if token.lower() not in stopword_list]
        text=' '.join(t)
        all_addresses.append(text)    
    return pd.Series(all_addresses)

In [89]:
def lemma(text,allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV']):
    # Initialize spacy 'en' model, keeping only tagger component needed for lemmatization
    nlp = spacy.load("en_core_web_sm", disable=['parser', 'ner'])
    # Create list to store all addresses in
    all_addresses = []
    for address in text:
        doc = nlp(address)
        t = " ".join([token.lemma_ for token in doc if token.pos_ in allowed_postags])
        all_addresses.append(t)
    return pd.Series(all_addresses)

In [90]:
# Plotting

def plot_top_words(model, feature_names, n_top_words, title,dim_1,dim_2):
    fig, axes = plt.subplots(dim_1, dim_2, figsize=(30, 15), sharex=True)
    axes = axes.flatten()
    for topic_idx, topic in enumerate(model.components_):
        top_features_ind = topic.argsort()[:-n_top_words - 1:-1]
        top_features = [feature_names[i] for i in top_features_ind]
        weights = topic[top_features_ind]

        ax = axes[topic_idx]
        ax.barh(top_features, weights, height=0.7)
        ax.set_title(f'Topic {topic_idx +1}',
                     fontdict={'fontsize': 30})
        ax.invert_yaxis()
        ax.tick_params(axis='both', which='major', labelsize=20)
        for i in 'top right left'.split():
            ax.spines[i].set_visible(False)
        fig.suptitle(title, fontsize=40)

    plt.subplots_adjust(top=0.90, bottom=0.05, wspace=0.90, hspace=0.3)
    plt.show()
    fig.tight_layout()

## Import Data

In [91]:
# Load Pickled Dataframe from Cleaning Notebook into a DataFrame
path = r"C:\Users\Andrew\Documents\Metis\NLP_Inaugural_Addresses\Pickled_Files\cleaned_addresses.pkl"

df = pickle.load(open(path,'rb'))
df.head(100)

Unnamed: 0,president_number,term,pres_name,pres_det,president_x,address,party,Year,time_period
0,1,1,Washington,1 Washington,01.Washington.1.txt,AMONG the vicissitudes incident to life no eve...,Nonpartisan,1789,pre-1800
1,1,2,Washington,2 Washington,01.Washington.2.txt,I AM again called upon by the voice of my coun...,Nonpartisan,1793,pre-1800
2,2,1,JAdams,1 JAdams,02.JAdams.1.txt,"WHEN it was first perceived, in early times, ...",Federalist,1797,pre-1800
3,3,1,Jefferson,1 Jefferson,03.Jefferson.1.txt,CALLED upon to undertake the duties of the fi...,Democratic-Republican,1801,1800-1850
4,3,2,Jefferson,2 Jefferson,03.Jefferson.2.txt,"PROCEEDING, fellow-citizens, to that qualific...",Democratic-Republican,1805,1800-1850
5,4,1,Madison,1 Madison,04.Madison.1.txt,UNWILLING to depart from examples of the most...,Democratic-Republican,1809,1800-1850
6,4,2,Madison,2 Madison,04.Madison.2.txt,ABOUT to add the solemnity of an oath to the o...,Democratic-Republican,1813,1800-1850
7,5,1,Monroe,1 Monroe,05.Monroe.1.txt,I SHOULD be destitute of feeling if I was not ...,Democratic-Republican,1817,1800-1850
8,5,2,Monroe,2 Monroe,05.Monroe.2.txt,I SHALL not attempt to describe the grateful...,Democratic-Republican,1821,1800-1850
9,6,1,JQAdams,1 JQAdams,06.JQAdams.1.txt,IN compliance with an usage coeval with the e...,Democratic-Republican,1825,1800-1850


## Locating Addtional Stop Words

In [92]:
# Create text array
text = df['address']

# Remove stopwords
text = remove_stopwords(text)

# Lemmatize
text = lemma(text)

# Define Count Vectorizer
vectorizer = CountVectorizer(analyzer='word',token_pattern=r'\b[^\d\W]+\b',ngram_range=(1,2),stop_words = 'english')

# call `fit` to build the vocabulary
vectorizer.fit(text)

# finally, call `transform` to convert text to a bag of words
x = vectorizer.transform(text)

In [93]:
# Convert sparse array to numpy array
x_back = x.toarray()

In [94]:
# View word Matrix

pd.DataFrame(x_back, columns=vectorizer.get_feature_names()).head()

Unnamed: 0,abandon,abandon act,abandon claim,abandon delusion,abandon enemy,abandon government,abandon great,abandon habit,abandon hope,abandon indignantly,...,zealous unceasing,zealously,zealously contend,zealously devote,zealously devoted,zealously enforce,zealously steadily,zealously unite,zone,zone extend
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [95]:
# Determine what are the most common words

sum_words = x.sum(axis=0)
words_freq = [(word, sum_words[0, idx]) for word, idx in vectorizer.vocabulary_.items()]
top_words = sorted(words_freq, key = lambda x: x[1], reverse=True)
#top_words

In [96]:
# Add additional stop words
stopwords = nltk.corpus.stopwords.words('english')
newStopWords = ['thing','year','ago','people','nation','states', 'make','long','come','day','know','day','way','fellow'
               ,'americans','citizens','citizen','united','america','shall','must','may','upon','every','let','one','would','great']
stopwords.extend(newStopWords)

## Create a new dataframe where each address is broken down by sentence

In [97]:
nlp = spacy.load("en_core_web_sm")

In [98]:
df['address'] = df['address'].apply(lambda x: [sent.text for sent in nlp(x).sents])

In [99]:
df_sentences = df.explode("address", ignore_index=True)

In [100]:
df_sentences.columns

Index(['president_number', 'term', 'pres_name', 'pres_det', 'president_x',
       'address', 'party', 'Year', 'time_period'],
      dtype='object')

In [101]:
df.rename(columns={"Unnamed: 0": "Dialogue ID"}, inplace=True)
df.index.name = "Sentence ID"

In [102]:
df_sentences['address'][5842]

'Sustained by faith, driven by conviction and devoted to one another and the country we love with all our hearts.'

### NMF Topic Modelling by Sentence

In [103]:
# Create text array
text = df_sentences['address']

In [104]:
# Remove stopwords
text = remove_stopwords(text)

In [105]:
# Lemmatize
text = lemma(text)

In [106]:
text.shape

(5844,)

In [107]:
# Create vectorizer and fit to text
vectorizer = CountVectorizer(analyzer='word',token_pattern=r'\b[^\d\W]+\b',stop_words = stopwords,ngram_range = (1,2))
doc_word = vectorizer.fit_transform(text)

In [108]:
# Fit and transform NMF
nmf_model = NMF(20)
doc_topic = nmf_model.fit_transform(doc_word)
doc_topic.shape

(5844, 20)

The **doc_topic** matrix shows us the documents we started with, and how each document is made up of the 2 resulting topics. We don't know yet what the topics are.

In [109]:
doc_topic

array([[0.00025527, 0.00825237, 0.00181139, ..., 0.00153946, 0.03817631,
        0.02225889],
       [0.0981657 , 0.02302109, 0.        , ..., 0.        , 0.        ,
        0.01273704],
       [0.0961081 , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.09273179, 0.02023127, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.00072759, 0.        , 0.        , ..., 0.00460196, 0.00435538,
        0.00118353]])

The **doc_topic** matrix shows us the documents we started with, and how each document is made up of the 2 resulting topics. We don't know yet what the topics are.

In [110]:
topic_word = nmf_model.components_
topic_word.shape 

(20, 49952)

The **topic_word** matrix shows us the resulting topics, and the terms that are associated with each topic. By looking at the words below, we an figure out what the topics are.

In [111]:
words = vectorizer.get_feature_names()
t = nmf_model.components_.argsort(axis=1)[:,-1:-7:-1]
topic_words = [[words[e] for e in l] for l in t]
topic_words

[['country', 'industry', 'foreign', 'whole', 'part', 'section'],
 ['spirit', 'honor', 'form', 'love', 'preserve', 'wish'],
 ['world', 'new', 'old', 'free', 'freedom', 'new world'],
 ['peace', 'world', 'equally', 'free', 'peace world', 'policy'],
 ['government', 'self', 'self government', 'free', 'local', 'form'],
 ['power', 'grant', 'exercise', 'give', 'executive', 'state'],
 ['law', 'enforce', 'equal', 'pass', 'respect', 'execute'],
 ['man', 'free', 'woman', 'man woman', 'life', 'hand'],
 ['liberty', 'right', 'foreign', 'stand', 'authority', 'resource'],
 ['hope', 'american', 'find', 'future', 'support', 'many'],
 ['public', 'service', 'expenditure', 'debt', 'money', 'opinion'],
 ['time', 'history', 'first', 'change', 'need', 'first time'],
 ['well', 'give', 'never', 'life', 'opportunity', 'system'],
 ['good', 'effort', 'common', 'office', 'secure', 'ability'],
 ['duty', 'call', 'take', 'high', 'principle', 'office'],
 ['right', 'respect', 'equal', 'constitutional', 'protect', 'mainta

## Topic Modelling by Paragraph

In [142]:
# Load Pickled Dataframe from Cleaning Notebook into a DataFrame
path = r"C:\Users\Andrew\Documents\Metis\NLP_Inaugural_Addresses\Pickled_Files\cleaned_addresses.pkl"

df_par = pickle.load(open(path,'rb'))
df_par.head(100)

Unnamed: 0,president_number,term,pres_name,pres_det,president_x,address,party,Year,time_period
0,1,1,Washington,1 Washington,01.Washington.1.txt,AMONG the vicissitudes incident to life no eve...,Nonpartisan,1789,pre-1800
1,1,2,Washington,2 Washington,01.Washington.2.txt,I AM again called upon by the voice of my coun...,Nonpartisan,1793,pre-1800
2,2,1,JAdams,1 JAdams,02.JAdams.1.txt,"WHEN it was first perceived, in early times, ...",Federalist,1797,pre-1800
3,3,1,Jefferson,1 Jefferson,03.Jefferson.1.txt,CALLED upon to undertake the duties of the fi...,Democratic-Republican,1801,1800-1850
4,3,2,Jefferson,2 Jefferson,03.Jefferson.2.txt,"PROCEEDING, fellow-citizens, to that qualific...",Democratic-Republican,1805,1800-1850
5,4,1,Madison,1 Madison,04.Madison.1.txt,UNWILLING to depart from examples of the most...,Democratic-Republican,1809,1800-1850
6,4,2,Madison,2 Madison,04.Madison.2.txt,ABOUT to add the solemnity of an oath to the o...,Democratic-Republican,1813,1800-1850
7,5,1,Monroe,1 Monroe,05.Monroe.1.txt,I SHOULD be destitute of feeling if I was not ...,Democratic-Republican,1817,1800-1850
8,5,2,Monroe,2 Monroe,05.Monroe.2.txt,I SHALL not attempt to describe the grateful...,Democratic-Republican,1821,1800-1850
9,6,1,JQAdams,1 JQAdams,06.JQAdams.1.txt,IN compliance with an usage coeval with the e...,Democratic-Republican,1825,1800-1850


In [143]:
df_par['address'] = [text.split("\n") for text in df_par['address']]

In [144]:
df_par_tokenized = df_par.explode("address", ignore_index=True)

In [145]:
# Create text array
text_par = df_par_tokenized['address']

In [146]:
# Remove stopwords
text_par = remove_stopwords(text_par)

In [147]:
# Lemmatize
text_par = lemma(text_par)

In [148]:
# Create vectorizer and fit to text
vectorizer_par = CountVectorizer(analyzer='word',token_pattern=r'\b[^\d\W]+\b',stop_words = stopwords,ngram_range = (1,2))
doc_word_par = vectorizer_par.fit_transform(text_par)

In [149]:
# Fit and transform NMF
nmf_model_par = NMF(20)
doc_topic_par = nmf_model_par.fit_transform(doc_word_par)
doc_topic_par.shape

(5820, 20)

In [150]:
topic_word_par = nmf_model_par.components_
topic_word_par.shape 

(20, 52456)

In [179]:
words_par = vectorizer_par.get_feature_names()
t_par = nmf_model_par.components_.argsort(axis=1)[:,-1:-7:-1]
topic_words_par = [[words_par[e] for e in l] for l in t_par]
topic_words_par

[['country', 'find', 'much', 'high', 'part', 'many'],
 ['power', 'grant', 'sovereignty', 'grant power', 'right', 'possess'],
 ['spirit', 'liberty', 'power', 'character', 'government', 'free'],
 ['world', 'new', 'freedom', 'american', 'work', 'time'],
 ['power', 'control', 'state', 'executive', 'government', 'officer'],
 ['law', 'man', 'enforce', 'pass', 'amendment', 'support'],
 ['party', 'political', 'peace', 'time', 'war', 'government'],
 ['revenue', 'interest', 'protection', 'duty', 'home', 'equally'],
 ['spirit', 'honor', 'interest', 'preserve', 'love', 'wish'],
 ['peace', 'policy', 'war', 'world', 'foreign', 'treaty'],
 ['institution', 'interest', 'never', 'subject', 'political', 'agitation'],
 ['power', 'executive', 'act', 'judiciary', 'well', 'want'],
 ['government', 'island', 'already', 'order', 'inhabitant', 'self'],
 ['war', 'force', 'invasion', 'power', 'time', 'naval'],
 ['party', 'whole', 'liberty', 'interest', 'spirit', 'country'],
 ['public', 'duty', 'good', 'service', '

### These topics seem to be relativley distincy with some overlap, I will group together similar columns

In [281]:
text_par[0]

'vicissitude incident life event fill great anxiety notification transmit order receive 14th day present month hand summon country voice never hear veneration love retreat choose fond predilection flatter hope immutable decision asylum decline year retreat render day necessary dear addition habit inclination frequent interruption health gradual waste commit time hand magnitude difficulty trust voice country call sufficient awaken wise experienced citizen distrustful scrutiny qualification overwhelm despondence inherit inferior endowment nature unpractice duty civil administration peculiarly conscious deficiency conflict emotion dare aver faithful study collect duty appreciation circumstance affected dare hope execute task much swayed grateful remembrance former instance affectionate sensibility transcendent proof confidence fellow citizen thence little consult incapacity disinclination weighty untried care error palliate motive mislead consequence judge country share partiality origina

In [261]:
df_topics = pd.DataFrame(doc_topic_par, columns = ['other1','power_granted1','american_spirit1','world_freedom1',
                        'power_granted2','upholding_ammendments1','war/peace/internal1','protecting_us_interests1',
                        'preserve_values1','ware/peace/external1','public_service1','other2','other3','war','american_spirit2',
                        'political_agitation1','protecting_us_interests2','businesss1'
                        ,'power_granted3','upholding_constitution'])

In [262]:
# Group "other"
other_sum = df_topics['other1'] +  df_topics['other2'] + df_topics['other3'] 

In [263]:
# Group power granted
power_granted_sum = df_topics['power_granted1'] + df_topics['power_granted2'] + df_topics['power_granted3']

In [264]:
# Group american spirit
american_spirit_sum = df_topics['american_spirit1'] + df_topics['american_spirit2']

In [265]:
# Group protecting  us interests
protect_us_interests_sum = df_topics['protecting_us_interests1'] + df_topics['protecting_us_interests2']

In [266]:
# Make grouped topics equal to df_topics
df_grouped_topics = df_topics.copy()

In [267]:
# Drop columns to be added in grouped form
df_grouped_topics = df_grouped_topics.drop(['other1','other2','other3','power_granted1','power_granted2','power_granted3'
                                           ,'american_spirit1','american_spirit2',
                                           'protecting_us_interests1','protecting_us_interests2'],axis = 1) 

In [268]:
# Adding grouped columns to dataframe

df_grouped_topics['other'] = other_sum
df_grouped_topics['power_granted'] = power_granted_sum
df_grouped_topics['american_spirit'] = american_spirit_sum
df_grouped_topics['protect_us_interests'] = protect_us_interests_sum 

In [269]:
df_grouped_topics

Unnamed: 0,world_freedom1,upholding_ammendments1,war/peace/internal1,preserve_values1,ware/peace/external1,public_service1,war,political_agitation1,businesss1,upholding_constitution,other,power_granted,american_spirit,protect_us_interests
0,0.000000,0.0,0.000000,0.000000,0.000000,0.049552,0.043313,0.092621,0.000000,0.122130,0.364176,0.011362,0.037719,0.023693
1,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.100936,0.0,0.000000,0.000000,0.000000,0.000000,0.036412,0.645812,0.000000,0.016013,0.662633,0.037310,0.093600,0.012687
3,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.030213,0.0,0.000000,0.052168,0.065415,0.029714,0.000000,0.363000,0.000000,0.068867,0.525592,0.402150,0.424098,0.173195
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5815,0.163729,0.0,0.006876,0.058955,0.000000,0.000000,0.000000,0.000000,0.000000,0.085092,0.009344,0.016544,0.000000,0.000000
5816,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
5817,0.118475,0.0,0.013242,0.000000,0.003353,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.051681,0.021431
5818,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [294]:
df_grouped_topics.columns

Index(['world_freedom1', 'upholding_ammendments1', 'war/peace/internal1',
       'preserve_values1', 'ware/peace/external1', 'public_service1', 'war',
       'political_agitation1', 'businesss1', 'upholding_constitution', 'other',
       'power_granted', 'american_spirit', 'protect_us_interests', 'sum'],
      dtype='object')

In [288]:
# Merge df_grouped_topics

df_topic_vectors = pd.merge(df_par_tokenized,df_grouped_topics,left_index=True, right_index=True)

In [290]:
# Remove pandas rows were sum is equal to 0 - this is situations where the text is blank

df_topic_vectors = df_topic_vectors[(df_topic_vectors[['sum']] != 0).all(axis=1)]

In [295]:
df_topic_vectors.groupby(['president_x'])['world_freedom1', 'upholding_ammendments1', 'war/peace/internal1',
       'preserve_values1', 'ware/peace/external1', 'public_service1', 'war',
       'political_agitation1', 'businesss1', 'upholding_constitution', 'other',
       'power_granted', 'american_spirit', 'protect_us_interests'].mean().reset_index()

Unnamed: 0,president_x,world_freedom1,upholding_ammendments1,war/peace/internal1,preserve_values1,ware/peace/external1,public_service1,war,political_agitation1,businesss1,upholding_constitution,other,power_granted,american_spirit,protect_us_interests
0,01.Washington.1.txt,0.026359,0.0,0.003579,0.021634,0.010903,0.022267,0.020157,0.324565,0.019194,0.054992,0.349849,0.131877,0.107122,0.049662
1,01.Washington.2.txt,0.001862,0.010584,0.0,0.033924,0.0,0.021721,0.005175,0.044521,0.005636,0.037284,0.074601,0.003439,0.0,0.0
2,02.JAdams.1.txt,0.018293,0.012883,0.031532,0.38132,0.030275,0.009011,0.02533,0.065006,0.020098,0.009714,0.179392,0.07468,0.04445,0.038008
3,03.Jefferson.1.txt,0.097554,0.219542,0.084458,0.07844,0.129924,0.02131,0.004979,0.230491,0.017337,0.009325,0.436401,0.279989,0.036569,0.094016
4,03.Jefferson.2.txt,0.033919,0.06108,0.017381,0.038806,0.045002,0.037716,0.068072,0.20448,0.020171,0.008904,0.077662,0.123399,0.042136,0.100049
5,04.Madison.1.txt,0.021737,0.024262,0.007075,0.041772,0.094691,0.018549,0.033468,0.230267,0.021723,0.019232,0.109711,0.249087,0.049657,0.043653
6,04.Madison.2.txt,0.016379,0.002422,0.010194,0.020083,0.014585,0.025826,0.228105,0.055097,0.0,0.005813,0.069925,0.055029,0.020372,0.023528
7,05.Monroe.1.txt,0.006492,0.02003,0.011166,0.033525,0.049027,0.029101,0.203837,0.131595,0.035773,0.016985,0.12945,0.130807,0.04352,0.100497
8,05.Monroe.2.txt,0.008578,0.0391,0.022224,0.031442,0.058469,0.025934,0.208096,0.045221,0.079876,0.019834,0.138539,0.135259,0.062998,0.080288
9,06.JQAdams.1.txt,0.01794,0.052562,0.494866,0.038449,0.074877,0.025983,0.058477,0.228248,0.0,0.033472,0.227195,0.337472,0.05008,0.055755


## Pickling

In [157]:
# Pickle Model
path = r"C:\Users\Andrew\Documents\Metis\NLP_Inaugural_Addresses\Pickled_Files\doc_word_par.pkl"
with open(path, 'wb') as f:
    pickle.dump(doc_word_par, f)

In [160]:
# Pickle nmf
path = r"C:\Users\Andrew\Documents\Metis\NLP_Inaugural_Addresses\Pickled_Files\nmf_model_par.pkl"
with open(path, 'wb') as f:
    pickle.dump(nmf_model_par, f)

In [153]:
# Pickle Doc topic
path = r"C:\Users\Andrew\Documents\Metis\NLP_Inaugural_Addresses\Pickled_Files\doc_topic_par.pkl"
with open(path, 'wb') as f:
    pickle.dump(doc_topic_par, f)

In [154]:
# Pickle Vectorizer par topic
path = r"C:\Users\Andrew\Documents\Metis\NLP_Inaugural_Addresses\Pickled_Files\vectorizer_par.pkl"
with open(path, 'wb') as f:
    pickle.dump(vectorizer_par, f)

In [155]:
# Pickle topics words par
path = r"C:\Users\Andrew\Documents\Metis\NLP_Inaugural_Addresses\Pickled_Files\topic_words_par.pkl"
with open(path, 'wb') as f:
    pickle.dump(topic_words_par, f)

In [156]:
# Pickle topics words par
path = r"C:\Users\Andrew\Documents\Metis\NLP_Inaugural_Addresses\Pickled_Files\topic_word_par.pkl"
with open(path, 'wb') as f:
    pickle.dump(topic_word_par, f)

# Testing without model

In [158]:
# Load Pickled doc words
path = r"C:\Users\Andrew\Documents\Metis\NLP_Inaugural_Addresses\Pickled_Files\doc_word_par.pkl"

doc_word_par2 = pickle.load(open(path,'rb'))

In [161]:
# Load NMF

path = r"C:\Users\Andrew\Documents\Metis\NLP_Inaugural_Addresses\Pickled_Files\nmf_model_par.pkl"

nmf_model_par2 = pickle.load(open(path,'rb'))

In [163]:
# Loac Doc topic
path = r"C:\Users\Andrew\Documents\Metis\NLP_Inaugural_Addresses\Pickled_Files\doc_topic_par.pkl"

doc_topic_par2 = pickle.load(open(path,'rb'))

In [165]:
# Load Vectorizer
path = r"C:\Users\Andrew\Documents\Metis\NLP_Inaugural_Addresses\Pickled_Files\vectorizer_par.pkl"

vectorizer_par2 = pickle.load(open(path,'rb'))

In [169]:
# Load topic worsd par
path = r"C:\Users\Andrew\Documents\Metis\NLP_Inaugural_Addresses\Pickled_Files\topic_words_par.pkl"

topic_words_par2 = pickle.load(open(path,'rb'))

In [171]:
# Pickle topic word par
path = r"C:\Users\Andrew\Documents\Metis\NLP_Inaugural_Addresses\Pickled_Files\topic_word_par.pkl"

topic_word_par2 = pickle.load(open(path,'rb'))