In [1]:
#!/usr/bin/env python
# coding: utf-8

In [2]:
import pandas as pd
import requests
import os
import datetime as dt
import smtplib
from email.message import EmailMessage

In [3]:
# Get latest Biden trends from 539
url = "https://projects.fivethirtyeight.com/biden-approval-data/approval_topline.csv"

In [4]:
src = pd.read_csv(url)

In [5]:
src.head()

Unnamed: 0,president,subgroup,modeldate,approve_estimate,approve_hi,approve_lo,disapprove_estimate,disapprove_hi,disapprove_lo,timestamp
0,Joe Biden,All polls,3/1/2022,41.077404,45.172793,36.982015,53.449495,58.263632,48.635358,09:39:17 1 Mar 2022
1,Joe Biden,Adults,3/1/2022,41.286335,45.765989,36.806681,53.006933,56.937209,49.076656,09:39:22 1 Mar 2022
2,Joe Biden,Voters,3/1/2022,41.074998,45.153974,36.996022,53.244756,58.275744,48.213769,09:39:26 1 Mar 2022
3,Joe Biden,All polls,2/28/2022,40.663074,44.745382,36.580765,53.301438,58.194941,48.407935,14:56:20 28 Feb 2022
4,Joe Biden,Adults,2/28/2022,40.764375,45.196005,36.332745,52.617486,56.504165,48.730806,14:56:26 28 Feb 2022


In [6]:
src.rename(columns={'modeldate':'date', 'approve_estimate':'approve', 'disapprove_estimate':'disapprove'} , inplace=True)

In [7]:
src['spread'] = src['approve'] - src['disapprove']

In [8]:
src['disapprove'] = src['disapprove'].round(2)
src['approve'] = src['approve'].round(2)
src['spread'] = src['spread'].round(2)

In [9]:
src['datetime'] = pd.to_datetime(src['timestamp'])
src['date'] = src['datetime'].dt.strftime('%m-%d-%y')
src['date_display'] = src['datetime'].dt.strftime('%B %d, %Y')

In [10]:
src.head()

Unnamed: 0,president,subgroup,date,approve,approve_hi,approve_lo,disapprove,disapprove_hi,disapprove_lo,timestamp,spread,datetime,date_display
0,Joe Biden,All polls,03-01-22,41.08,45.172793,36.982015,53.45,58.263632,48.635358,09:39:17 1 Mar 2022,-12.37,2022-03-01 09:39:17,"March 01, 2022"
1,Joe Biden,Adults,03-01-22,41.29,45.765989,36.806681,53.01,56.937209,49.076656,09:39:22 1 Mar 2022,-11.72,2022-03-01 09:39:22,"March 01, 2022"
2,Joe Biden,Voters,03-01-22,41.07,45.153974,36.996022,53.24,58.275744,48.213769,09:39:26 1 Mar 2022,-12.17,2022-03-01 09:39:26,"March 01, 2022"
3,Joe Biden,All polls,02-28-22,40.66,44.745382,36.580765,53.3,58.194941,48.407935,14:56:20 28 Feb 2022,-12.64,2022-02-28 14:56:20,"February 28, 2022"
4,Joe Biden,Adults,02-28-22,40.76,45.196005,36.332745,52.62,56.504165,48.730806,14:56:26 28 Feb 2022,-11.85,2022-02-28 14:56:26,"February 28, 2022"


In [11]:
src.drop_duplicates(subset=['date', 'subgroup'], keep='last', inplace=True)

In [12]:
src.head()

Unnamed: 0,president,subgroup,date,approve,approve_hi,approve_lo,disapprove,disapprove_hi,disapprove_lo,timestamp,spread,datetime,date_display
0,Joe Biden,All polls,03-01-22,41.08,45.172793,36.982015,53.45,58.263632,48.635358,09:39:17 1 Mar 2022,-12.37,2022-03-01 09:39:17,"March 01, 2022"
1,Joe Biden,Adults,03-01-22,41.29,45.765989,36.806681,53.01,56.937209,49.076656,09:39:22 1 Mar 2022,-11.72,2022-03-01 09:39:22,"March 01, 2022"
2,Joe Biden,Voters,03-01-22,41.07,45.153974,36.996022,53.24,58.275744,48.213769,09:39:26 1 Mar 2022,-12.17,2022-03-01 09:39:26,"March 01, 2022"
3,Joe Biden,All polls,02-28-22,40.66,44.745382,36.580765,53.3,58.194941,48.407935,14:56:20 28 Feb 2022,-12.64,2022-02-28 14:56:20,"February 28, 2022"
4,Joe Biden,Adults,02-28-22,40.76,45.196005,36.332745,52.62,56.504165,48.730806,14:56:26 28 Feb 2022,-11.85,2022-02-28 14:56:26,"February 28, 2022"


In [13]:
for g in src["subgroup"].unique():
    src[src["subgroup"] == g].to_csv(
        f"data/processed/biden_approval_trend_538_{g.replace(' ', '_').lower()}.csv",
        index=False,
    )

In [14]:
df_long = pd.melt(
    src,
    id_vars="date",
    value_vars=["approve", "disapprove", "spread"],
    var_name="value",
    value_name="variable",
)

In [15]:
src['date'] = pd.to_datetime(src['date'])
latest_df = src[src['date'] == src['date'].max()]

In [16]:
for g in latest_df["subgroup"].unique():
    latest_df[latest_df["subgroup"] == g].to_csv(
        f"data/processed/biden_approval_trend_538_latest_{g.replace(' ', '_').lower()}.csv",
        index=False,
    )

In [17]:
latest_df

Unnamed: 0,president,subgroup,date,approve,approve_hi,approve_lo,disapprove,disapprove_hi,disapprove_lo,timestamp,spread,datetime,date_display
0,Joe Biden,All polls,2022-03-01,41.08,45.172793,36.982015,53.45,58.263632,48.635358,09:39:17 1 Mar 2022,-12.37,2022-03-01 09:39:17,"March 01, 2022"
1,Joe Biden,Adults,2022-03-01,41.29,45.765989,36.806681,53.01,56.937209,49.076656,09:39:22 1 Mar 2022,-11.72,2022-03-01 09:39:22,"March 01, 2022"
2,Joe Biden,Voters,2022-03-01,41.07,45.153974,36.996022,53.24,58.275744,48.213769,09:39:26 1 Mar 2022,-12.17,2022-03-01 09:39:26,"March 01, 2022"


In [18]:
date = latest_df.iloc[0, 12]
approve = latest_df.iloc[0, 3].round(1)
disapprove = latest_df.iloc[0, 6].round(1)
spread = latest_df.iloc[0, 10].round(1)

In [19]:
email = f"Yes! We've scraped President Biden's latest polling average from 538. As of {date}, his approval rating is {approve}%. His dissapproval rating is {disapprove}%. That's a spread of {spread} percentage points. Get the latest data here: https://github.com/stiles/biden-polls/blob/main/data/processed/biden_approval_trend_all_polls.csv"
  

In [20]:
  
# get email and password from environment variables
EMAIL_ADDRESS = os.environ.get('EMAIL_ADDRESS')
EMAIL_PASSWORD = os.environ.get('EMAIL_PASSWORD')
EMAIL_RECIPIENT = os.environ.get('EMAIL_RECIPIENT')

In [21]:
# set up email content
msg = EmailMessage()
msg['Subject'] = 'Github Actions: New Biden polling results from 538!'
msg['From'] = EMAIL_ADDRESS
msg['To'] = EMAIL_RECIPIENT
msg.set_content(f'{email}')

In [22]:
# send email
with smtplib.SMTP_SSL('smtp.gmail.com', 465) as smtp:
    smtp.login(EMAIL_ADDRESS, EMAIL_PASSWORD)
    smtp.send_message(msg)