### Importing Libraries

In [6]:
import os
import gensim
import spacy
from president_helper import read_file, process_speeches, merge_speeches, get_president_sentences, get_presidents_sentences, most_frequent_words

### Reading all Presidential speech files

In [7]:
# get list of all speech files
files = sorted([file for file in os.listdir() if file[-4:] == '.txt'])
files

['1789-Washington.txt',
 '1793-Washington.txt',
 '1797-John-Adams.txt',
 '1801-Jefferson.txt',
 '1805-Jefferson.txt',
 '1809-Madison.txt',
 '1813-Madison.txt',
 '1817-Monroe.txt',
 '1821-Monroe.txt',
 '1825-John-Q-Adams.txt',
 '1829-Jackson.txt',
 '1833-Jackson.txt',
 '1837-VanBuren.txt',
 '1841-William-Harrison.txt',
 '1845-Polk.txt',
 '1849-Taylor.txt',
 '1853-Pierce.txt',
 '1857-Buchanan.txt',
 '1861-Lincoln.txt',
 '1865-Lincoln.txt',
 '1869-Grant.txt',
 '1873-Grant.txt',
 '1877-Hayes.txt',
 '1881-Garfield.txt',
 '1885-Cleveland.txt',
 '1889-Benjamin-Harrison.txt',
 '1893-Cleveland.txt',
 '1897-McKinley.txt',
 '1901-McKinley.txt',
 '1905-Theodore-Roosevelt.txt',
 '1909-Taft.txt',
 '1913-Wilson.txt',
 '1917-Wilson.txt',
 '1921-Harding.txt',
 '1925-Coolidge.txt',
 '1929-Hoover.txt',
 '1933-Franklin-D-Roosevelt.txt',
 '1937-Franklin-D-Roosevelt.txt',
 '1941-Franklin-D-Roosevelt.txt',
 '1945-Franklin-D-Roosevelt.txt',
 '1949-Truman.txt',
 '1953-Eisenhower.txt',
 '1957-Eisenhower.txt',
 

In [8]:
# read each speech file
speeches = [read_file(doc) for doc in files]
print(speeches)



### Preprocess each Presidential Speech

In [9]:
# preprocess each speech
processed_speeches = process_speeches(speeches)
print(processed_speeches)



In [10]:
# merge speeches
all_sentences = merge_speeches(processed_speeches)
print(all_sentences)



### Frequently used words for all Presidents

In [11]:
# view most frequently used words
most_freq_words = most_frequent_words(all_sentences)
print(most_freq_words)



### Word Embeddings Model

In [12]:
# create gensim model of all speeches
all_prez_embeddings = gensim.models.Word2Vec(all_sentences, vector_size = 96, window = 5, min_count = 1, workers = 2, sg = 1)
print(all_prez_embeddings)

Word2Vec(vocab=9818, vector_size=96, alpha=0.025)


### Words similar to Freedom

In [13]:
# view words similar to freedom
similar_to_freedom = all_prez_embeddings.wv.most_similar('freedom', topn = 20)
print(similar_to_freedom)

[('human', 0.9734501242637634), ('independence', 0.9662564992904663), ('political', 0.9652782678604126), ('institutions', 0.9636476039886475), ('life', 0.9629980325698853), ('race', 0.9623410701751709), ('benefits', 0.9611639380455017), ('preservation', 0.9600702524185181), ('destruction', 0.9596888422966003), ('governments', 0.9586676359176636), ('individual', 0.9573930501937866), ('increase', 0.9571884274482727), ('citizenship', 0.9569110870361328), ('security', 0.9568981528282166), ('dignity', 0.9566564559936523), ('parts', 0.9554083943367004), ('confederacy', 0.9551719427108765), ('defense', 0.9545403122901917), ('sovereignty', 0.9543224573135376), ('forms', 0.9542988538742065)]


### Words similar to Government

In [14]:
# view words similar to government
similar_to_government = all_prez_embeddings.wv.most_similar('government', topn = 20)
print(similar_to_government)

[('federal', 0.9464930891990662), ('system', 0.9368741512298584), ('union', 0.9355406761169434), ('form', 0.9331250190734863), ('general', 0.9313462376594543), ('policy', 0.9289358258247375), ('local', 0.9288527965545654), ('revenue', 0.9251707792282104), ('itself', 0.9230284094810486), ('territory', 0.9228314757347107), ('instrument', 0.9196411967277527), ('authority', 0.9183145761489868), ('provided', 0.9182892441749573), ('self', 0.9176406860351562), ('law', 0.9171513915061951), ('party', 0.9165438413619995), ('action', 0.9163489937782288), ('independent', 0.9161813259124756), ('where', 0.9156767725944519), ('spirit', 0.915462076663971)]


### President Roosevelt's speeches

In [15]:
# get President Roosevelt sentences
roosevelt_sentences = get_president_sentences('franklin-d-roosevelt')
print(roosevelt_sentences)

[['i', 'am', 'certain', 'that', 'my', 'fellow', 'americans', 'expect', 'that', 'on', 'my', 'induction', 'into', 'the', 'presidency', 'i', 'will', 'address', 'them', 'with', 'a', 'candor', 'and', 'a', 'decision', 'which', 'the', 'present', 'situation', 'of', 'our', 'nation', 'impels'], ['this', 'is', 'preeminently', 'the', 'time', 'to', 'speak', 'the', 'truth', 'the', 'whole', 'truth', 'frankly', 'and', 'boldly'], ['nor', 'need', 'we', 'shrink', 'from', 'honestly', 'facing', 'conditions', 'in', 'our', 'country', 'today'], ['this', 'great', 'nation', 'will', 'endure', 'as', 'it', 'has', 'endured', 'will', 'revive', 'and', 'will', 'prosper'], ['so', 'first', 'of', 'all', 'let', 'me', 'assert', 'my', 'firm', 'belief', 'that', 'the', 'only', 'thing', 'we', 'have', 'to', 'fear', 'is', 'fear', 'itself', 'nameless', 'unreasoning', 'unjustified', 'terror', 'which', 'paralyzes', 'needed', 'efforts', 'to', 'convert', 'retreat', 'into', 'advance'], ['in', 'every', 'dark', 'hour', 'of', 'our', 'nat

In [16]:
# view most frequently used words of Roosevelt
roosevelt_most_freq_words = most_frequent_words(roosevelt_sentences)
print(roosevelt_most_freq_words)

[('the', 375), ('of', 321), ('and', 179), ('to', 158), ('we', 131), ('a', 121), ('in', 119), ('that', 102), ('our', 90), ('it', 71), ('is', 67), ('have', 56), ('for', 47), ('be', 41), ('i', 40), ('this', 40), ('not', 40), ('by', 38), ('will', 35), ('as', 33), ('all', 33), ('are', 32), ('which', 29), ('with', 28), ('on', 27), ('has', 27), ('they', 27), ('but', 27), ('nation', 26), ('people', 25), ('their', 25), ('government', 23), ('can', 23), ('us', 20), ('shall', 20), ('democracy', 20), ('from', 19), ('an', 19), ('men', 18), ('its', 18), ('must', 17), ('who', 17), ('been', 16), ('know', 16), ('life', 15), ('spirit', 15), ('no', 15), ('because', 15), ('there', 15), ('if', 15), ('so', 14), ('at', 14), ('more', 13), ('those', 13), ('upon', 13), ('national', 12), ('years', 12), ('may', 12), ('new', 12), ('world', 12), ('my', 11), ('every', 11), ('these', 11), ('through', 11), ('states', 11), ('way', 11), ('good', 11), ('or', 11), ('today', 10), ('great', 10), ('only', 10), ('power', 10), 

In [17]:
# create gensim model for Roosevelt
roosevelt_embeddings = gensim.models.Word2Vec(roosevelt_sentences, vector_size = 96, window = 5, min_count = 1, workers = 2, sg = 1)
print(roosevelt_embeddings)

Word2Vec(vocab=1514, vector_size=96, alpha=0.025)


In [18]:
# view words similar to freedom for Roosevelt
roosevelt_similar_to_freedom = roosevelt_embeddings.wv.most_similar('freedom', topn = 20)
roosevelt_similar_to_freedom

[('must', 0.9970712065696716),
 ('in', 0.9967290163040161),
 ('people', 0.9966597557067871),
 ('an', 0.9966448545455933),
 ('that', 0.9965879321098328),
 ('on', 0.9965408444404602),
 ('or', 0.9965154528617859),
 ('the', 0.9964438676834106),
 ('not', 0.9963942766189575),
 ('our', 0.9963911175727844),
 ('which', 0.996362566947937),
 ('it', 0.9963566064834595),
 ('shall', 0.9963180422782898),
 ('into', 0.9962964653968811),
 ('we', 0.996296226978302),
 ('to', 0.9962660670280457),
 ('are', 0.9962241649627686),
 ('upon', 0.9961763024330139),
 ('one', 0.9961669445037842),
 ('and', 0.9961580038070679)]

### Presidents Washington, Jefferson, Lincoln and Roosevelt's speeches

In [19]:
# get sentences of multiple presidents
rushmore_prez_sentences = get_presidents_sentences(["washington","jefferson","lincoln","theodore-roosevelt"])
rushmore_prez_sentences

[['fellow',
  'citizens',
  'of',
  'the',
  'senate',
  'and',
  'of',
  'the',
  'house',
  'of',
  'representatives',
  'among',
  'the',
  'vicissitudes',
  'incident',
  'to',
  'life',
  'no',
  'event',
  'could',
  'have',
  'filled',
  'me',
  'with',
  'greater',
  'anxieties',
  'than',
  'that',
  'of',
  'which',
  'the',
  'notification',
  'was',
  'transmitted',
  'by',
  'your',
  'order',
  'and',
  'received',
  'on',
  'the',
  '14th',
  'day',
  'of',
  'the',
  'present',
  'month'],
 ['on',
  'the',
  'one',
  'hand',
  'i',
  'was',
  'summoned',
  'by',
  'my',
  'country',
  'whose',
  'voice',
  'i',
  'can',
  'never',
  'hear',
  'but',
  'with',
  'veneration',
  'and',
  'love',
  'from',
  'a',
  'retreat',
  'which',
  'i',
  'had',
  'chosen',
  'with',
  'the',
  'fondest',
  'predilection',
  'and',
  'in',
  'my',
  'flattering',
  'hopes',
  'with',
  'an',
  'immutable',
  'decision',
  'as',
  'the',
  'asylum',
  'of',
  'my',
  'declining',
  '

In [20]:
# view most frequently used words of presidents
rushmore_most_freq_words = most_frequent_words(rushmore_prez_sentences)
rushmore_most_freq_words

[('the', 779),
 ('of', 500),
 ('and', 391),
 ('to', 385),
 ('in', 202),
 ('that', 163),
 ('be', 155),
 ('a', 138),
 ('which', 128),
 ('it', 124),
 ('by', 115),
 ('i', 113),
 ('is', 109),
 ('with', 99),
 ('as', 87),
 ('all', 85),
 ('our', 85),
 ('have', 84),
 ('not', 84),
 ('we', 72),
 ('this', 70),
 ('for', 68),
 ('will', 67),
 ('on', 59),
 ('no', 57),
 ('or', 57),
 ('from', 56),
 ('their', 55),
 ('but', 53),
 ('them', 51),
 ('are', 50),
 ('my', 49),
 ('can', 48),
 ('government', 46),
 ('they', 45),
 ('shall', 42),
 ('its', 41),
 ('may', 41),
 ('who', 38),
 ('any', 38),
 ('you', 37),
 ('so', 35),
 ('constitution', 34),
 ('if', 33),
 ('people', 33),
 ('us', 32),
 ('has', 31),
 ('citizens', 30),
 ('union', 29),
 ('one', 28),
 ('an', 28),
 ('public', 28),
 ('should', 28),
 ('me', 27),
 ('these', 27),
 ('there', 27),
 ('than', 26),
 ('been', 26),
 ('now', 26),
 ('your', 25),
 ('other', 25),
 ('his', 25),
 ('own', 25),
 ('would', 25),
 ('those', 25),
 ('must', 25),
 ('every', 24),
 ('such',

In [21]:
# create gensim model for the presidents
rushmore_embeddings = gensim.models.Word2Vec(rushmore_prez_sentences, vector_size = 96, window = 5, min_count = 1, workers = 2, sg = 1)
print(rushmore_embeddings)

Word2Vec(vocab=2394, vector_size=96, alpha=0.025)


In [22]:
# view words similar to freedom for presidents
rushmore_similar_to_freedom = rushmore_embeddings.wv.most_similar('freedom', topn = 20)
rushmore_similar_to_freedom

[('a', 0.9982308149337769),
 ('had', 0.9979985356330872),
 ('never', 0.9978899955749512),
 ('yet', 0.9978470206260681),
 ('more', 0.997833788394928),
 ('without', 0.9978325963020325),
 ('truth', 0.9978071451187134),
 ('can', 0.9977568984031677),
 ('must', 0.9976964592933655),
 ('will', 0.9976963996887207),
 ('either', 0.9976611137390137),
 ('for', 0.9976347088813782),
 ('justice', 0.9976271390914917),
 ('time', 0.9976249933242798),
 ('may', 0.9976241588592529),
 ('every', 0.9976229667663574),
 ('they', 0.9976216554641724),
 ('no', 0.9976145029067993),
 ('state', 0.9976073503494263),
 ('all', 0.9976064562797546)]

In [23]:
# view words similar to government
rushmore_similar_to_government = rushmore_embeddings.wv.most_similar('government', topn = 20)
rushmore_similar_to_government

[('in', 0.9986762404441833),
 ('not', 0.9984861612319946),
 ('on', 0.9984666705131531),
 ('while', 0.9984122514724731),
 ('for', 0.9984028935432434),
 ('you', 0.9983763098716736),
 ('shall', 0.9983751773834229),
 ('at', 0.9983706474304199),
 ('union', 0.9983521103858948),
 ('law', 0.9983483552932739),
 ('now', 0.9983378052711487),
 ('must', 0.9983025789260864),
 ('people', 0.9983019828796387),
 ('can', 0.998271644115448),
 ('from', 0.9982634782791138),
 ('is', 0.9982537031173706),
 ('peace', 0.9982487559318542),
 ('that', 0.9982441067695618),
 ('again', 0.9982392191886902),
 ('under', 0.9982339143753052)]