In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import requests
from bs4 import BeautifulSoup as soup

header = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:124.0) Gecko/20100101 Firefox/124.0"
}

# Getting The Dataframe Setup

In [2]:
url = "https://www.presidency.ucsb.edu/documents/app-categories/spoken-addresses-and-remarks/presidential/inaugural-addresses?items_per_page=60"
raw = requests.get(url, headers=header)

In [3]:
bsObj = soup(raw.content, "html.parser")  # Parse the html
links = bsObj.find_all("div", class_="field-title")
names = bsObj.find_all("div", class_="col-sm-4 margin-top")

In [4]:
president_names = [name.p.a.text for name in names]

speech_links = [title.find("a")["href"] for title in links]

In [5]:
# add second list
url = "https://www.presidency.ucsb.edu/documents/app-categories/spoken-addresses-and-remarks/presidential/inaugural-addresses?items_per_page=60&page=1"
raw = requests.get(url, headers=header)

In [6]:
bsObj = soup(raw.content, "html.parser")
links = bsObj.find_all("div", class_="field-title")
names = bsObj.find_all("div", class_="col-sm-4 margin-top")

In [7]:
president_names.extend([name.p.a.text for name in names])

speech_links.extend([title.find("a")["href"] for title in links])

In [8]:
data = {"Name": president_names, "Link": speech_links}

# Create DataFrame
df = pd.DataFrame(data)

df

Unnamed: 0,Name,Link
0,Donald J. Trump (2nd Term),/documents/inaugural-address-54
1,"Joseph R. Biden, Jr.",/documents/inaugural-address-53
2,Donald J. Trump (1st Term),/documents/inaugural-address-14
3,Barack Obama,/documents/inaugural-address-15
4,Barack Obama,/documents/inaugural-address-5
...,...,...
58,Thomas Jefferson,/documents/inaugural-address-20
59,Thomas Jefferson,/documents/inaugural-address-19
60,John Adams,/documents/inaugural-address-18
61,George Washington,/documents/inaugural-address-17


# Each President Speech

In [9]:
base_url = "https://www.presidency.ucsb.edu/"

In [10]:
# Method to search

unifying_words = [
    "together", "unity", "united", "one nation", "one people", "common good", 
    "shared purpose", "cooperation", "reconciliation", "harmony", "bridge divides", 
    "come together", "join hands", "heal", "bond", "mutual respect", "compromise", 
    "bipartisan", "consensus", "all Americans", "fellow citizens", "national spirit", 
    "collective effort", "civic spirit", "shared prosperity", "common destiny", 
    "national renewal", "rebuilding trust", "strength in diversity", "enduring values", 
    "our shared history", "bridging differences", "fostering dialogue", 
    "shared responsibility", "working side by side", "democratic principles", 
    "collective resilience", "moral courage", "upholding traditions", "healing divisions", 
    "inclusive leadership", "neighborly love", "faith in each other", 
    "building for future generations", "honoring our past", "a more perfect union", 
    "promise of America", "shared sacrifice", "rising together", 
    "responsibility to one another", "mutual understanding", "forging a new path", 
    "hand in hand", "strengthening democracy", "joining forces", "seeking harmony", 
    "one future", "unity of purpose", "guiding principles", "national stability", 
    "rekindling hope", "national strength", "common values", "renewed commitment", 
    "working as one", "enduring freedom", "standing together", "our duty", 
    "greater purpose", "moral foundation", "common ground", "hope for the future", 
    "courage and faith", "building a better future", "respect for all", 
    "principles of justice", "national character", "spirit of service", 
    "duty to country", "honoring sacrifice", "prosperity for all", "defending democracy", 
    "equal opportunity", "vision for tomorrow", "lifting each other up", 
    "shared dreams", "democratic ideals", "a nation of laws", "service to others", 
    "public virtue", "union of states", "our Constitution", "divine Providence", 
    "fraternal bonds", "sacred trust", "republican virtue", "justice and wisdom", 
    "faithful execution", "preservation of peace", "prosperity and liberty", 
    "honorable government", "wise counsel", "our liberties", "common defense", 
    "general welfare", "mutual concession", "domestic tranquility", 
    "prudence and moderation", "cordial cooperation", "blessings of liberty", 
    "equal justice", "public confidence", "principles of freedom", "civic harmony", 
    "our great republic", "the people's voice", "national prosperity", 
    "virtuous government", "devotion to the Union", "sacred duty", 
    "free institutions", "equal laws", "public happiness", "grateful service", 
    "safety and honor", "peace and prosperity", "national honor", "domestic peace", 
    "preserving unity", "wise leadership", "faithful stewardship", "great experiment", 
    "civic responsibility", "spirit of fraternity", "dignity of the people", 
    "safeguarding liberty", "harmony among states", "solemn obligation", 
    "devotion to peace", "public faith", "government by the people", 
    "respect for tradition", "national concord", "brotherhood of citizens", 
    "binding ties", "common sacrifices", "securing blessings", "promoting welfare", 
    "virtue and duty", "principles of order", "equal and exact justice", 
    "sacred honor", "voice of the people", "path of peace", "covenant of liberty", 
    "tranquil government", "firm reliance", "brethren", "justice and honor", 
    "spirit of amity", "enlightened government", "patriotic attachment", 
    "public tranquility", "mutual duty", "peaceful relations", "common interests", 
    "domestic safety", "federal harmony", "civil liberty", "national dignity", 
    "righteous cause", "spirit of conciliation", "lasting peace", "respect for rights", 
    "enlightened patriotism", "public unity", "moral obligation", "equal footing", 
    "safeguarding our liberties", "harmony of interests", "constitutional faith", 
    "ordered liberty", "mutual protection", "brotherhood", "public welfare", 
    "fidelity to the nation"
]

polarizing_words = [
    "silent majority", "real Americans", "true patriots", "taking back", 
    "making America great again", "ideological battle", "radical", "corrupt", 
    "enemy", "betrayal", "stolen", "attack", "disgrace", "destroy", "defeat", 
    "overthrow", "rigged", "illegitimate", "danger", "threat", "invasion", 
    "catastrophe", "collapse", "disaster", "crime wave", "carnage", "poisoned", 
    "crisis", "war on", "taking away", "crushing", "oppressors", "false narratives", 
    "treasonous", "swamp", "deep state", "elite class", "failed policies", "tyranny", 
    "deception", "conspiracy", "internal sabotage", "recklessness", "reckless", 
    "failure", "incompetence", "disgraceful actions", "puppet masters", 
    "dark forces", "traitorous", "anti-American", "illegitimate rulers", 
    "selling out our country", "globalist agenda", "failed leadership", 
    "two-faced politicians", "hypocrites", "reckoning day", "stolen future", 
    "collapsing system", "deep-rooted rot", "false leaders", "bureaucratic swamp", 
    "anti-democratic forces", "dismantling our freedoms", "forced submission", 
    "puppet class", "those who seek to control us", "abandoned values", 
    "destabilization", "surrendering sovereignty", "political fraud", 
    "rotten to the core", "weak leadership", "orchestrated chaos", "national decline", 
    "ceding our power", "hidden agenda", "unwavering resolve", "history is on our side", 
    "holding the line", "staying vigilant", "rising tide", "the people's mandate", 
    "true defenders", "restoring what was lost", "holding power accountable", 
    "demanding justice", "aliens", "criminals", "rapist", "failing institutions", 
    "a war against us", "plotting against the people", "manipulated elections", 
    "shadow government", "ruining our country", "enemies of freedom", 
    "destroying our heritage", "exposing the lies", "traitors within", 
    "sham elections", "rigging the system", "stealing our rights", 
    "subverting our nation", "forces against us", "dark times ahead", 
    "fight for survival", "battle for America", "struggle for our nation", 
    "usurpation", "traitors", "subversion", "despotism", "factions", 
    "insurrection", "anarchy", "peril", "wickedness", "falsehood", "sedition", 
    "treason", "malice", "discord", "false prophets", "treachery", "schemes", 
    "dangerous doctrines", "disloyalty", "agitators", "dissolution", "lawlessness", 
    "disunion", "intrigue", "menace", "evil designs", "manipulation", "shadows", 
    "deceit", "intrusion", "anarchists", "false patriots", "disruptors", "misrule", 
    "violators", "dissenters", "enemies within", "seductive influence", 
    "arrogance of power", "perverters of justice", "subjugation", "illegitimate rule", 
    "unlawful ambition", "ruin", "infamy", "calamity", "cunning", "unworthy leaders", 
    "conspirators", "unholy influence", "political decay", "ruinous measures", 
    "dangerous excess", "dissensions", "plotters", "ruthless ambition", "usurpers", 
    "sowers of discord", "violators of rights", "threats to the republic", 
    "treacherous men", "designs against liberty", "delusions", "dangerous alliances", 
    "injustice", "depravity", "hidden dangers", "chaos", "malign influences", 
    "puppet rulers", "sacrilege", "tyrannical force", "destructive ambition", 
    "selfish factions", "desperate measures", "blind fanaticism", "alien influence", 
    "internal ruin", "opposing forces", "destructive counsels", "plunderers",  
    "destruction of principle", "sinister forces", "lawless ambition", 
    "false doctrines", "fearful threats", "machinations", "threatening forces", 
    "radical experiment", "dangerous hands", "wanton", "self-serving interests", 
    "perils of ambition", "untrustworthy leaders", "enemies of the Constitution", 
    "the great crime", "vicious ambition", "plotters of ruin", "unholy", 
    "destructive spirits", "ill-fated ambition", "irresponsible leadership", 
    "opportunistic", "false preachers", "cynical manipulators", "blind arrogance", 
    "cunning deceivers", "undermining order", "depraved rule", "unrestrained ambition", 
    "misguided leaders", "tyrannical aspirations", "shattered peace", 
    "ravenous ambition", "betrayers", "lawless forces", "dismantling justice", 
    "enemies of order", "shameful violations", "dangerous incompetence", 
    "zealots", "blind hatred", "degrading influences", "pernicious doctrines", 
    "subversion of truth", "enemies of peace", "unfit", "tyrannical rulers",  
    "destructive disunion", "ruinous disruption", "dark intrigues", 
    "poisonous influences", "dark plots", "unrelenting ambition", 
    "unworthy politicians", "destructive rulers", "ruthless oppression", 
    "traitorous leaders", "unprincipled power", "perverting justice", 
    "faithless stewards", "shameless manipulators", "unlawful intrusions", 
    "perversion", "insatiable ambition", "blind destruction", "tyrannical designs", 
    "lawless destruction", "wicked dominion", "evildoers", "robbers", 
    "demagogue", "saboteurs"
]
    
def get_unifying_words(text):
    total = 0
    for phrase in unifying_words:
        count = text.count(phrase)
        total += count

    return total


def get_polarizing_words(text):
    total = 0
    for phrase in polarizing_words:
        count = text.count(phrase)
        total += count

    return total


def get_total_word_count(text):
    total = 0
    words = text.strip().split(" ")  # Split the text into words
    # print(words)
    for word in words:
        if word != "":
            total += 1
    return total

In [11]:
# This takes several seconds to run
unifying_words_count = []
polarizing_words_count = []
total_word_count = []

for end in df["Link"].values.tolist():
    url = base_url + end
    # print(url)
    raw = requests.get(url, headers=header)
    bsObj = soup(raw.content, "html.parser")

    speech = bsObj.find_all(class_="field-docs-content")

    # replace all new lines and get rid of empty spaces and make sure every thing is lower case before putting it into a string
    speech_text = ("".join([s.get_text().replace("\n", " ") for s in speech])).lower()
    speech_text = speech_text.replace("(applause.)", "")  # gets rid of applause pauses
    unifying_words_count.append(get_unifying_words(speech_text))
    polarizing_words_count.append(get_polarizing_words(speech_text))
    total_word_count.append(get_total_word_count(speech_text))
    # print(speech_text)
    # print(get_unifying_words(speech_text))
    # print(get_polarizing_words(speech_text))
    # df

In [12]:
df["Unifying Words Count"] = unifying_words_count
df["Polarizing Words Count"] = polarizing_words_count
df["Total Words Count"] = total_word_count

# Adding Additional Columns 

### Adding Political Party Affiliation

In [13]:
# Create dictionary with each president's party
party_mapping = {
    "Donald J. Trump (2nd Term)": "Republican",
    "Joseph R. Biden, Jr.": "Democrat",
    "Donald J. Trump (1st Term)": "Republican",
    "Barack Obama": "Democrat",
    "George W. Bush": "Republican",
    "William J. Clinton": "Democrat",
    "George Bush": "Republican",
    "Ronald Reagan": "Republican",
    "Jimmy Carter": "Democrat",
    "Richard Nixon": "Republican",
    "Lyndon B. Johnson": "Democrat",
    "John F. Kennedy": "Democrat",
    "Dwight D. Eisenhower": "Republican",
    "Harry S Truman": "Democrat",
    "Franklin D. Roosevelt": "Democrat",
    "Herbert Hoover": "Republican",
    "Calvin Coolidge": "Republican",
    "Warren G. Harding": "Republican",
    "Woodrow Wilson": "Democrat",
    "William Howard Taft": "Republican",
    "Theodore Roosevelt": "Republican",
    "William McKinley": "Republican",
    "Grover Cleveland": "Democrat",
    "Benjamin Harrison": "Republican",
    "Chester A. Arthur": "Republican",
    "James A. Garfield": "Republican",
    "Rutherford B. Hayes": "Republican",
    "Ulysses S. Grant": "Republican",
    "Andrew Johnson": "Democrat (Union)",
    "Abraham Lincoln": "Republican",
    "James Buchanan": "Democrat",
    "Franklin Pierce": "Democrat",
    "Zachary Taylor": "Whig",
    "James K. Polk": "Democrat",
    "John Tyler": "Whig (later Unaffiliated)",
    "William Henry Harrison": "Whig",
    "Martin van Buren": "Democrat",
    "Andrew Jackson": "Democrat",
    "John Quincy Adams": "Democratic-Republican",
    "James Monroe": "Democratic-Republican",
    "James Madison": "Democratic-Republican",
    "Thomas Jefferson": "Democratic-Republican",
    "John Adams": "Federalist",
    "George Washington": "No formal party",
}

# Map political party onto president
df["Political Party"] = df["Name"].map(party_mapping)
df.head(3)

Unnamed: 0,Name,Link,Unifying Words Count,Polarizing Words Count,Total Words Count,Political Party
0,Donald J. Trump (2nd Term),/documents/inaugural-address-54,21,22,2905,Republican
1,"Joseph R. Biden, Jr.",/documents/inaugural-address-53,29,11,2532,Democrat
2,Donald J. Trump (1st Term),/documents/inaugural-address-14,10,4,1455,Republican


### Combining "Democrat" and "Democrat (Union)" values in Political Party 
These parties are essentially the same and combining them will simplify our analysis.

In [14]:
df["Political Party"] = df["Political Party"].replace("Democrat (Union)", "Democrat")

### Adding Overall Word Count 

In [15]:
df["Overall Language"] = df["Unifying Words Count"] > df["Polarizing Words Count"]
df["Overall Language"] = df["Overall Language"].replace(
    {True: "unifying", False: "polarizing"}
)


df.head(3)

Unnamed: 0,Name,Link,Unifying Words Count,Polarizing Words Count,Total Words Count,Political Party,Overall Language
0,Donald J. Trump (2nd Term),/documents/inaugural-address-54,21,22,2905,Republican,polarizing
1,"Joseph R. Biden, Jr.",/documents/inaugural-address-53,29,11,2532,Democrat,unifying
2,Donald J. Trump (1st Term),/documents/inaugural-address-14,10,4,1455,Republican,unifying


### Adding Unifying/Polarizing Word Count Ratios

In [17]:
# Calculate ratios
df["Unifying Words Ratio"] = round(
    df["Unifying Words Count"] / df["Total Words Count"], 5
)
df["Polarizing Words Ratio"] = round(
    df["Polarizing Words Count"] / df["Total Words Count"], 5
)
df.head(3)

Unnamed: 0,Name,Link,Unifying Words Count,Polarizing Words Count,Total Words Count,Political Party,Overall Language,Unifying Words Ratio,Polarizing Words Ratio
0,Donald J. Trump (2nd Term),/documents/inaugural-address-54,21,22,2905,Republican,polarizing,0.00723,0.00757
1,"Joseph R. Biden, Jr.",/documents/inaugural-address-53,29,11,2532,Democrat,unifying,0.01145,0.00434
2,Donald J. Trump (1st Term),/documents/inaugural-address-14,10,4,1455,Republican,unifying,0.00687,0.00275


### Adding Overall Ratio 

Overall Ratio = Unifying Words Ratio / Polarizing Words Ratio 
So, a higher overall ratio means the president used more unifying language. 
Specifically, overall ratio > 1 means more unifying than polarizing language, and overall ratio <= 1 means more polarizing than unifying language. 
We calculate the overall ratio from the unifying and polarizing ratios rather than just dividing unifying or polarizing words by total words for each observation because this accounts for some presidents using more words in their speeches than others. 

In [18]:
df["Overall Ratio"] = round(
    df["Unifying Words Ratio"] / df["Polarizing Words Ratio"], 4
)
df.head(3)

Unnamed: 0,Name,Link,Unifying Words Count,Polarizing Words Count,Total Words Count,Political Party,Overall Language,Unifying Words Ratio,Polarizing Words Ratio,Overall Ratio
0,Donald J. Trump (2nd Term),/documents/inaugural-address-54,21,22,2905,Republican,polarizing,0.00723,0.00757,0.9551
1,"Joseph R. Biden, Jr.",/documents/inaugural-address-53,29,11,2532,Democrat,unifying,0.01145,0.00434,2.6382
2,Donald J. Trump (1st Term),/documents/inaugural-address-14,10,4,1455,Republican,unifying,0.00687,0.00275,2.4982


# Handle Missing and Infinite Values 

In [19]:
# Check for missing values
df.isna().sum()

Name                      0
Link                      0
Unifying Words Count      0
Polarizing Words Count    0
Total Words Count         0
Political Party           0
Overall Language          0
Unifying Words Ratio      0
Polarizing Words Ratio    0
Overall Ratio             0
dtype: int64

In [20]:
# View row with missing value

In [21]:
# Overall Ratio is missing for this row because no polarizing or unifying words were found, so we'll replace this with 0.
df["Overall Ratio"] = df["Overall Ratio"].fillna(0)

In [22]:
# Check that there are no more missing values
df.isna().sum()

Name                      0
Link                      0
Unifying Words Count      0
Polarizing Words Count    0
Total Words Count         0
Political Party           0
Overall Language          0
Unifying Words Ratio      0
Polarizing Words Ratio    0
Overall Ratio             0
dtype: int64

In [23]:
# Check for infinite values
inf_count = df.isin([float("inf"), float("-inf")]).sum()
inf_count

Name                      0
Link                      0
Unifying Words Count      0
Polarizing Words Count    0
Total Words Count         0
Political Party           0
Overall Language          0
Unifying Words Ratio      0
Polarizing Words Ratio    0
Overall Ratio             3
dtype: int64

In [24]:
# View rows with infinite values
inf_rows = df["Overall Ratio"].isin([float("inf"), float("-inf")])
df[inf_rows]

Unnamed: 0,Name,Link,Unifying Words Count,Polarizing Words Count,Total Words Count,Political Party,Overall Language,Unifying Words Ratio,Polarizing Words Ratio,Overall Ratio
20,Franklin D. Roosevelt,/documents/inaugural-address-6,3,0,545,Democrat,unifying,0.0055,0.0,inf
46,Zachary Taylor,/documents/inaugural-address-31,5,0,1087,Whig,unifying,0.0046,0.0,inf
61,George Washington,/documents/inaugural-address-17,2,0,135,No formal party,unifying,0.01481,0.0,inf


In [25]:
# The infinite values result from diving by 0, so we will replace them with 0's
df.replace([np.inf, -np.inf], 0, inplace=True)

# Check
df.isin([float("inf"), float("-inf")]).sum()

Name                      0
Link                      0
Unifying Words Count      0
Polarizing Words Count    0
Total Words Count         0
Political Party           0
Overall Language          0
Unifying Words Ratio      0
Polarizing Words Ratio    0
Overall Ratio             0
dtype: int64

# Drop Columns 

In [26]:
# Drop Link column
df.drop(["Link"], axis=1, inplace=True)
df.head(3)

Unnamed: 0,Name,Unifying Words Count,Polarizing Words Count,Total Words Count,Political Party,Overall Language,Unifying Words Ratio,Polarizing Words Ratio,Overall Ratio
0,Donald J. Trump (2nd Term),21,22,2905,Republican,polarizing,0.00723,0.00757,0.9551
1,"Joseph R. Biden, Jr.",29,11,2532,Democrat,unifying,0.01145,0.00434,2.6382
2,Donald J. Trump (1st Term),10,4,1455,Republican,unifying,0.00687,0.00275,2.4982


# Export Data 

In [27]:
df.to_csv("./speech_data.csv", index=False)