# Trump press releases

#### Load python tools

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import os
import datetime as dt
import smtplib
from email.message import EmailMessage

In [2]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

In [3]:
today = pd.to_datetime("today").strftime("%Y-%m-%d")

---

In [4]:
trump_url = "https://www.donaldjtrump.com/news/P"

In [5]:
headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) "
    "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"
}

In [6]:
soups = []

for i in range(0, 10, 10):
    r = requests.get(trump_url + str(i), headers=headers)
    soups.append(BeautifulSoup(r.text, "html.parser"))

In [7]:
dicts = []

for soup in soups:
    for s in soup.find_all("a", class_="item"):
        url = s["href"]
        date = s.find("p").text
        headline = s.find("h2", class_="title-med-2").text
        data_dict = {
            "date": date,
            "headline": headline,
            "url": str("https://www.donaldjtrump.com") + url,
        }
        dicts.append(data_dict)

In [8]:
df = pd.DataFrame(dicts).drop_duplicates()

In [9]:
df["date"] = pd.to_datetime(df["date"])

In [10]:
df.head(10)

Unnamed: 0,date,headline,url
0,2022-04-18,ICYMI: “Biden’s insulting response to our inflation crisis”,https://www.donaldjtrump.com/news/news-vfeuperd9m0
1,2022-04-18,ICYMI: “Inflation hits another 40-year high as recession fears loom”,https://www.donaldjtrump.com/news/news-y8emd3mjhe0
2,2022-04-18,"Statement by Donald J. Trump, 45th President of the United States of America",https://www.donaldjtrump.com/news/news-rnct2f8cmm0
3,2022-04-17,"Statement by Donald J. Trump, 45th President of the United States of America",https://www.donaldjtrump.com/news/news-qcqzbjgwxf0
4,2022-04-17,"Statement by Donald J. Trump, 45th President of the United States of America",https://www.donaldjtrump.com/news/news-zqxen5324v0
5,2022-04-16,ICYMI: “‘America First’ Policy Group Calls on Wisconsin Legislature to Follow Special Counsel’s Election Integrity Reforms”,https://www.donaldjtrump.com/news/news-awucgmvrd40
6,2022-04-16,ICYMI: “Georgia Gov. Brian Kemp confronted on election integrity at Fulton County GOP event”,https://www.donaldjtrump.com/news/news-ssscmj2csr0
7,2022-04-15,PRESIDENT DONALD J. TRUMP ENDORSES J.D. VANCE FOR THE UNITED STATES SENATE IN OHIO,https://www.donaldjtrump.com/news/news-ub5egtsvmq0
8,2022-04-14,ICYMI: “Poll: Paxton Establishes Wide Lead in AG Runoff”,https://www.donaldjtrump.com/news/news-exghayqwkc1871
9,2022-04-14,ICYMI: “Letitia James takes eye off the ball and other commentary”,https://www.donaldjtrump.com/news/news-nqtehrbqtb1872


In [11]:
urls = df["url"].to_list()

release_soup = []

for url in urls:
    release_page = requests.get(url, headers=headers, timeout=15)
    url_text_dict = {
        'url': url,
        'page': BeautifulSoup(release_page.text, "html.parser"),
    }
    release_soup.append(url_text_dict)

In [12]:
body_text_dicts = []

for release in release_soup:
    for rl in release['page'].find_all("main", class_="vp-80"):
        try:
            headline = rl.find("h1", class_="title").text
            date = rl.find("p", class_="date").text
            body_text = rl.find("div", class_="body").text
            url = release['url']
            text_dict = {
                "headline": headline,
                "date": date,
                "body_text": body_text,
                "url": url,
            }
            body_text_dicts.append(text_dict)
        except:
            continue

In [13]:
text_df = pd.DataFrame(body_text_dicts).drop_duplicates()

In [14]:
text_df.head()

Unnamed: 0,headline,date,body_text,url
0,ICYMI: “Biden’s insulting response to our inflation crisis”,04/18/22,\nRead the full article from the New York Post here. \n,https://www.donaldjtrump.com/news/news-vfeuperd9m0
1,ICYMI: “Inflation hits another 40-year high as recession fears loom”,04/18/22,"\nRead the full article by Tom Howell Jr., Haris Alic, and Mica Soellner with the Washington Times here. \n",https://www.donaldjtrump.com/news/news-y8emd3mjhe0
2,"Statement by Donald J. Trump, 45th President of the United States of America",04/18/22,"\nWith the horrible Subway Shootings and Violent Crime in New York being at an all-time high, where people are afraid to walk the streets, the racist and highly partisan Attorney General of New York State, failed Gubernatorial candidate Letitia James, should focus her efforts on saving the State of New York and ending its reputation as a Crime Capital of the World, instead of spending millions of dollars and utilizing a large portion of her office in going after Donald J. Trump and the Trump Organization (for many years!), who have probably done more for New York than virtually any other person or group, including employing many wonderful people and paying millions and millions of dollars in taxes. This never-ending Witch Hunt must stop. We don’t need racist political hacks going after good, hardworking people for highly partisan political gain. The people of our Country see right through it all, and won’t take this Radical Left “sickness” anymore. Make New York Great Again! \n",https://www.donaldjtrump.com/news/news-rnct2f8cmm0
3,"Statement by Donald J. Trump, 45th President of the United States of America",04/17/22,"\nHappy Easter to failed gubernatorial candidate and racist Attorney General Letitia James. May she remain healthy despite the fact that she will continue to drive business out of New York while at the same time keeping crime, death, and destruction in New York!\n",https://www.donaldjtrump.com/news/news-qcqzbjgwxf0
4,"Statement by Donald J. Trump, 45th President of the United States of America",04/17/22,"\nHappy Easter to all including the Radical Left Maniacs who are doing everything possible to destroy our Country. May they not succeed, but let them, nevertheless, be happy, healthy, wealthy, and well!\n",https://www.donaldjtrump.com/news/news-zqxen5324v0


In [15]:
text_df["body_text"] = text_df["body_text"].str.replace("\n", "")
text_df["date"] = pd.to_datetime(text_df["date"])

In [16]:
new_df = pd.merge(df, text_df, on=["date", "headline", "url"])

In [18]:
new_df["year"] = new_df["date"].dt.year
new_df["month"] = new_df["date"].dt.month_name()
new_df["weekday"] = new_df["date"].dt.day_name()
new_df["month_year"] = pd.to_datetime(new_df["date"]).dt.to_period("M")

In [19]:
new_df.to_csv(f"data/processed/all_press_releases_latest.csv", index=False)

In [20]:
archive_df = pd.read_csv('data/processed/all_press_releases_archive.csv')

In [21]:
new_archive_df = pd.concat([archive_df, new_df]).drop_duplicates().reset_index(drop='True')

In [22]:
new_archive_df['date'] = pd.to_datetime(new_archive_df['date'])

In [23]:
new_archive_df = new_archive_df.sort_values('date', ascending=False).reset_index(drop='True')

In [24]:
new_archive_df.to_csv("data/processed/archives_timeseries/all_press_releases_archive.csv", index=False)
new_archive_df.to_csv(f"data/processed/archives_timeseries/all_press_releases_archive_{today}.csv", index=False)
new_archive_df.to_csv(f"data/processed/all_press_releases_archive.csv", index=False)

In [25]:
len(new_archive_df)

1526

In [26]:
diff = (len(new_archive_df) - len(archive_df)) - len(new_df)

In [27]:
# if diff >= 1:
#     if diff > 1:
#         email = f"We've scraped {diff} new items from the former president's news site. See the latest here: https://github.com/gridviz/trump-releases/blob/main/data/processed/all_press_releases_latest.csv"
#         subject = f'New Trump scraper result: {diff} new items!'
#     else: 
#         email = f"We've scraped a new item from the former president's news site. See the latest here: https://github.com/gridviz/trump-releases/blob/main/data/processed/all_press_releases_latest.csv"
#         subject = f'New Trump scraper result: one new item!'
# else: 
#     email = 'The scrape turned up nothing new.'
#     subject = 'New scraper result: Nothing to see here.'

In [28]:
# # get email and password from environment variables
# EMAIL_ADDRESS = os.environ.get('EMAIL_ADDRESS')
# EMAIL_PASSWORD = os.environ.get('EMAIL_PASSWORD')
# EMAIL_RECIPIENT = os.environ.get('EMAIL_RECIPIENT')
    
# # set up email content
# msg = EmailMessage()
# msg['Subject'] = subject
# msg['From'] = EMAIL_ADDRESS
# msg['To'] = EMAIL_RECIPIENT
# msg.set_content(f'{email}')
    
# # send email
# with smtplib.SMTP_SSL('smtp.gmail.com', 465) as smtp:
#     smtp.login(EMAIL_ADDRESS, EMAIL_PASSWORD)
#     smtp.send_message(msg)