In [1]:
import re
import warnings

import pandas as pd
import requests
from bs4 import BeautifulSoup

warnings.filterwarnings("ignore")


In [2]:
def district(text):
    "rename the district/state"

    text_list = text.split("-")

    try:
        evaluated_value = eval(text_list[1])
        if evaluated_value < 10:
            text = text_list[0] + "0%s" % evaluated_value
        else:
            text = text_list[0] + text_list[1]
    except NameError:
        text = text_list[0] + "00"

    return text


def progressive_punch_scores(house: str) -> pd.DataFrame:

    """This function extracts the progressive score of a legislator, and according to progressivepunch.org,
    a progressive score indicates a legislator's voting history during ideologically polarized debates.
    state_tilt refers to the likelihood of the representing district/state voters voting for a liberal Democrat.

    parameters
    ----------
    input house: str
        either senate or congress

    return df: pd. Dataframe
        pandas df
    """

    url = "https://progressivepunch.org/scores.htm?house=%s" % house
    df_list = pd.read_html(url)
    df = df_list[3]

    df = df[[1, 2, 3, 6, 8]]

    df.rename(
        columns={
            1: "name_punch",
            2: "Party",
            3: "St/Dis",
            6: "progressive_score",
            8: "state_tilt",
        },
        inplace=True,
    )

    if house == "house":
        df["St/Dis"] = df["St/Dis"].apply(district)
        df["name_acronym"] = df.name_punch.apply(
            lambda name: (name.split(",")[0][:3]).capitalize()
        )
    else:
        df["name_acronym"] = df.name_punch.apply(
            lambda name: name.split(",")[0][:3] + "," + name.split(",")[1][:3]
        )
    return df

In [197]:
def get_house_reps():
    """A function which scrapes for the twitter handles of congress members

    ------------
    attributes

    return: list
    list of tuples of each house rep and the party
    ------------
    """
    # link us reps website
    url = "https://pressgallery.house.gov/member-data/members-official-twitter-handles"

    # read the housereps and pass into a dataframe
    print("***Fetching house reps ***")
    dfs = pd.read_html(url)
    print("***House reps response received***")
    house_reps = dfs[0]

    # make the first row as columns
    house_reps.columns = house_reps.iloc[1]

    df = house_reps.drop(index=[0, 1], inplace=False)
    df[["LastName", "FirstName"]].astype(str)

    df["name"] = df.LastName.map(str) + ", " + df.FirstName.map(str)
    df["Twitter Handle"] = df["Twitter Handle"].str.replace("@", "")
    # create list of tuples from the columns of dataframes
    # house_rep_lists = list(zip(df["Twitter Handle"], df.Party))
    df.reset_index(inplace=True, drop=True)

    return df

In [198]:
def get_senators_handles():

    """Parse HTML page from https://ucsd.libguides.com containing U.S. senator's Twitter handles.
    This function utilizes the requests and beautiful library to extract senators' names,
    Twitter ids, political affiliation, and state they represent.
    """

    url = "https://ucsd.libguides.com/congress_twitter/senators"

    r = requests.get(url)
    soup = BeautifulSoup(r.content, "lxml")

    tables = soup.findAll("tbody")

    senators_info = []
    for table in tables:
        rows = soup.findAll("tr")

        for row in rows:
            link = row.find("a")
            senator_dict = {}
            if link:
                tds = row.findAll("td")
                senator_dict["Twitter Handle"] = (link["href"]).split("/")[-1]

                name = link.text
                name_split = name.split(",")
                name_acronym = name_split[0][:3] + "," + name_split[1][:3]

                senator_dict["name"] = name
                senator_dict["name_acronym"] = name_acronym
                for i, td in enumerate(tds):
                    if i == 1:
                        senator_dict["St/Dis"] = td.text
                    if i == 2:
                        senator_dict["Party"] = td.text
            if senator_dict:
                senators_info.append(senator_dict)

    senator_df = pd.DataFrame(senators_info)
    senator_df = senator_df.drop_duplicates()

    return senator_df

### Member Scores from GovTrack.us



In [199]:
def govt_track_score_cards(house):

    """Get the scores of each member of the house of Representatives based on the co-sponsors of their bills.
    Govtrack.us compute the scores
    """

    url = (
        "https://www.govtrack.us/congress/members/report-cards/2020/{}/ideology".format(
            house
        )
    )

    r = requests.get(url)
    soup = BeautifulSoup(r.content, "lxml")

    table = soup.find("tbody")

    members_info = []

    rows = soup.findAll("tr")

    for row in rows:
        link = row.find("a")
        member_dict = {}
        if link:
            tds = row.findAll("td")

            name_link = (link["href"]).split("/")[3]
            name_split = name_link.split("_")  # name link
            party_state_list = (link.text).split("-")
            st = party_state_list[-1][:-1]  # state/dist

            try:
                name = name_split[1].capitalize() + ", " + name_split[0].capitalize()
                name_acronym = (
                    (name_split[1][:3]).capitalize()
                    + ", "
                    + (name_split[0][:2]).capitalize()
                )
                if house == "house":
                    name_acronym = (name_split[1][:3]).capitalize()
                    state = "".join([i for i in st if not i.isdigit()])
                    district_code = "".join([i for i in st if i.isdigit()])

                    if district_code:
                        district_number = int(district_code)
                        if district_number < 10:
                            district_code = f"0{district_number}"
                        else:
                            district_code = str(district_number)
                    else:
                        district_code = "00"

                    st = state + district_code

            except IndexError:
                break

            member_dict["name_gov"] = name
            member_dict["name_acronym"] = name_acronym

            member_dict["St/Dis"] = st
            member_dict["Party"] = party_state_list[0][-1]

            for i, td in enumerate(tds):
                if i == 0:
                    member_dict["rank"] = (td.text).strip()
                if i == 1:
                    member_dict["score"] = eval((td.text).strip())

        if member_dict:
            members_info.append(member_dict)

    members_score_df = pd.DataFrame(members_info)
    members_score_df = members_score_df.drop_duplicates()

    return members_score_df

In [200]:
# senators handles

senator_handles = get_senators_handles()

# scores from progressive punch
senators_progressive_scores = progressive_punch_scores("senate")

# scores from gov track
senators_govtrack_scores = govt_track_score_cards("senate")

# merge the three dfs
df_senate = senator_handles.merge(
    senators_progressive_scores,
    left_on=["name_acronym", "St/Dis", "Party"],
    right_on=["name_acronym", "St/Dis", "Party"],
    how="left",
).merge(
    senators_govtrack_scores,
    left_on=["name_acronym", "St/Dis", "Party"],
    right_on=["name_acronym", "St/Dis", "Party"],
    how="left",
)

In [201]:
# progressive scores of congress
congress_progressive_score = progressive_punch_scores("house")

# twitter handles of congress members
congress_handles = get_house_reps()

# scores from gov track
congress_govtrack_scores = govt_track_score_cards("house")


***Fetching house reps ***
***House reps response received***


In [202]:
# merge twitter handles of congress reps with their progressive score

df_house = congress_handles.merge(
    congress_progressive_score,
    left_on=["Party", "St/Dis"],
    right_on=["Party", "St/Dis"],
    how="left",
).merge(
    congress_govtrack_scores,
    left_on=["name_acronym", "St/Dis", "Party"],
    right_on=["name_acronym", "St/Dis", "Party"],
    how="left",
)

In [203]:
df_house

Unnamed: 0,FirstName,LastName,Twitter Handle,St/Dis,Party,name,name_punch,progressive_score,state_tilt,name_acronym,name_gov,rank,score
0,Alma,Adams,RepAdams,NC12,D,"Adams, Alma","Adams, Alma",96.91,Strong Dem,Ada,"Adams, Alma",#372,0.22
1,Robert,Aderholt,Robert_Aderholt,AL04,R,"Aderholt, Robert","Aderholt, Robert",5.96,Strong Rep,Ade,"Aderholt, Robert",#146,0.65
2,Pete,Aguilar,RepPeteAguilar,CA31,D,"Aguilar, Pete","Aguilar, Pete",92.43,Strong Dem,Agu,"Aguilar, Pete",#296,0.30
3,Rick,Allen,RepRickAllen,GA12,R,"Allen, Rick","Allen, Rick",0.64,Strong Rep,All,"Allen, Rick",#26,0.82
4,Colin,Allred,RepColinAllred,TX32,D,"Allred, Colin","Allred, Colin",95.00,Leans Dem,All,"Allred, Colin",#217,0.44
...,...,...,...,...,...,...,...,...,...,...,...,...,...
439,Joe,Wilson,RepJoeWilson,SC02,R,"Wilson, Joe","Wilson, Joe",2.22,Strong Rep,Wil,"Wilson, Joe",#61,0.75
440,Robert,Wittman,RobWittman,VA01,R,"Wittman, Robert","Wittman, Rob",4.77,Leans Rep,Wit,"Wittman, Robert",#75,0.74
441,Steve,Womack,rep_stevewomack,AR03,R,"Womack, Steve","Womack, Steve",5.09,Strong Rep,Wom,"Womack, Steve",#157,0.63
442,John,Yarmuth,RepJohnYarmuth,KY03,D,"Yarmuth, John","Yarmuth, John",96.14,Strong Dem,Yar,"Yarmuth, John",#311,0.29


In [204]:
df_senate

Unnamed: 0,Twitter Handle,name,name_acronym,St/Dis,Party,name_punch,progressive_score,state_tilt,name_gov,rank,score
0,SenatorBaldwin,"Baldwin, Tammy","Bal, Ta",WI,D,"Baldwin, Tammy",95.27,Swing,"Baldwin, Tammy",#86,0.21
1,SenJohnBarrasso,"Barrasso, John","Bar, Jo",WY,R,"Barrasso, John",2.51,Strong Rep,"Barrasso, John",#17,0.88
2,SenatorBennet,"Bennet, Michael","Ben, Mi",CO,D,"Bennet, Michael",88.85,Strong Dem,"Bennet, Michael",#67,0.32
3,MarshaBlackburn,"Blackburn, Marsha","Bla, Ma",TN,R,"Blackburn, Marsha",1.45,Strong Rep,"Blackburn, Marsha",#1,1.00
4,SenBlumenthal,"Blumenthal, Richard","Blu, Ri",CT,D,"Blumenthal, Richard",96.42,Strong Dem,"Blumenthal, Richard",#93,0.13
...,...,...,...,...,...,...,...,...,...,...,...
95,SenWarren,"Warren, Elizabeth","War, El",MA,D,"Warren, Elizabeth",98.84,Strong Dem,"Warren, Elizabeth",#85,0.21
96,SenWhitehouse,"Whitehouse, Sheldon","Whi, Sh",RI,D,"Whitehouse, Sheldon",94.17,Strong Dem,"Whitehouse, Sheldon",#83,0.22
97,SenatorWicker,"Wicker, Roger","Wic, Ro",MS,R,"Wicker, Roger",7.94,Strong Rep,"Wicker, Roger",#21,0.84
98,RonWyden,"Wyden, Ron","Wyd, Ro",OR,D,"Wyden, Ron",92.68,Strong Dem,"Wyden, Ron",#88,0.19


In [205]:
# concat house and senate dataframes

df = pd.concat(
    [
        df_house[
            [
                "name",
                "name_punch",
                "name_gov",
                "Twitter Handle",
                "St/Dis",
                "Party",
                "progressive_score",
                "state_tilt",
                "rank",
                "score"
            ]
        ],
        df_senate[
            [
                "name",
                "name_punch",
                "name_gov",
                "Twitter Handle",
                "St/Dis",
                "Party",
                "progressive_score",
                "state_tilt",
                "rank",
                "score"
            ]
        ],
    ]
)

# save the dataframe

df.to_csv("handles_scores.csv", index=False)