In [35]:
%load_ext jupyter_black

In [2]:
import re
import json
import time
import requests
from datetime import datetime, date

from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from bs4 import BeautifulSoup
import tabula
import pandas as pd

## The Govtrack ideology scores downloaded from Govtrack.us
The 2020 report dates from Jan 3 -2019 to Jan 3 - 2021 whereas the 2022 report is dated Jan 3 - 2021 to Jan 3 - 2022. The Govtrack 
report of the current legislators is yet to be released.

In [39]:
# read the 2020 reports and merge
df_2020_rep = pd.read_csv("raw_data/govtrack-stats-2020-house-ideology.csv")
df_2020_sen = pd.read_csv("raw_data/govtrack-stats-2020-senate-ideology.csv")

df_2020 = pd.concat([df_2020_rep, df_2020_sen])
df_2020.district = df_2020.district.fillna("Senator")
df_2020["period"] = 2020
df_2020.district = df_2020.district.apply(lambda dis: dis if type(dis) == str else dis)

# read the 2022 reports
df_2022_rep = pd.read_csv("raw_data/govtrack-stats-2022-house-ideology.csv")
df_2022_sen = pd.read_csv("raw_data/govtrack-stats-2022-senate-ideology.csv")

df_2022 = pd.concat([df_2022_rep, df_2022_sen])
df_2022["period"] = 2022
df_2022.district = df_2022.district.fillna("Senator")
df_2022.district = df_2022.district.apply(lambda dis: dis if type(dis) == str else dis)

### Religious denominations in Congress

In [40]:
# read religion composition tables

dfs_religion_2020 = tabula.read_pdf(
    "raw_data/Faith-on-the-Hill-116-detailed-tables.pdf", pages="all"
)
dfs_religion_2022 = tabula.read_pdf(
    "raw_data/01.04.21_faith_on_the_hill_detailed.table_.update.pdf", pages="all"
)

In [41]:
def concat_pdf(dfs_, year):
    "Concatenate religious denominations"
    dfs = []
    for i, df in enumerate(dfs_):
        if i == 0 and year == 2020:
            pass
        else:
            df = pd.concat([df, df.columns.to_frame().T], ignore_index=True)

            if year == 2022:
                df.columns = [
                    "State",
                    "District",
                    "Name",
                    "Party",
                    "Freshman",
                    "Denominational family",
                ]
            else:
                df.columns = dfs_religion_2020[0].columns

        df.District = df.District.apply(
            lambda x: "Senator"
            if x == "Sen" or x == "Senator"
            else "0"
            if x == "At-Large" or x == "At-large"
            else x
        )

        dfs.append(df)

    return pd.concat(dfs)

In [42]:
df_religion_2020 = concat_pdf(dfs_religion_2020, 2020)
df_religion_2022 = concat_pdf(dfs_religion_2022, 2022)

# concatenate last and full name
df_religion_2020["Full_Name"] = df_religion_2020.apply(
    lambda row: row["First/middle"] + " " + row["Last"], axis=1
)

## Veterans in Congress - Data from (militarytimes.com)

In [43]:
def get_veterans(url):
    # using selenium to acquire veterans in the congress. Data source infogram

    # Create a new Chrome session
    driver = webdriver.Chrome()
    wait = WebDriverWait(driver, 20)

    # Load the web page
    driver.get(url)  # Wait for the page to fully load
    driver.maximize_window()

    wait.until(
        EC.visibility_of_element_located((By.CSS_SELECTOR, ".igc-table-container"))
    )
    # short pause added in order to make sure all the elements are loaded after we know the first element was loaded
    time.sleep(0.5)

    soup = BeautifulSoup(driver.page_source, "lxml")

    div = soup.select_one("div.igc-table-container")

    df = pd.read_html(str(div))[0]

    df["State"] = df.District.apply(
        lambda dis: dis.split(" ")[0] if dis != "Georgia" else "GA"
    )
    df["District"] = df.District.apply(
        lambda dis: "0"
        if len(dis.split(" ")) == 1 or dis.split(" ")[1] == "(House)"
        else str(dis.split(" ")[1])
    )
    df["District"] = df.District.apply(
        lambda dis: "Senator" if dis == "Sen" or dis == "Senate" else dis
    )
    df["Party"] = df.Party.apply(
        lambda party: "R"
        if party == "GOP" or party == "Republican"
        else "D"
        if party == "Dem"
        else party
    )

    return df

In [44]:
df_mil_2020 = get_veterans(
    "https://e.infogram.com/b28274cb-26b9-441f-9994-a8eb8558839b?src=embed"
)
df_mil_2020.to_csv("raw_data/veterans_in_congress_116.csv", index=False)

df_mil_2022 = get_veterans(
    "https://infogram.com/veterans-in-117th-congress-1hxj48ppv9zz52v"
)
df_mil_2022.to_csv("raw_data/veterans_in_congress_117.csv", index=False)

In [46]:
# Merge the Congressmen religion and "is veteran" dataframes

df_mil_2020

Unnamed: 0,District,Party,Name,In office now?,Gender,Service,Era,Combat tours,State
0,9,R,Doug Collins,Yes,Male,AF Reserve,2000s-2010s,OIF,GA
1,Senator,R,Mike Enzi,Yes,Male,Air Force,1960s-1970s,none,WY
2,Senator,R,Lindsey Graham,Yes,Male,Air Force,1980s-2010s,OIF,SC
3,6,D,Chrissy Houlahan,No,Female,Air Force,1980s,none,PA
4,5,R,Denver Riggleman,No,Male,Air Force,1990s,none,VA
...,...,...,...,...,...,...,...,...,...
91,1,R,Andy Harris,Yes,Male,Navy Reserve,1980s-2010s,Desert Storm,MD
92,20,D,Jimmy Panetta,Yes,Male,Navy Reserve,2000s,OEF,CA
93,3,R,Jim Banks,Yes,Male,Navy Reserve,2010s,OEF,IN
94,4,R,Steven Palazzo,Yes,Male,Marine Corps/Army,1980s-2010s,"Desert Storm, OIF",MS


In [47]:
# match the names and merge the dataframes using Levenshtein logic

import fuzzy_pandas as fpd

In [48]:
# rename religion dataframe

df_religion_2020.rename(
    columns={"Full_Name": "Name_Rel", "Last": "Last_Name"}, inplace=True
)
df_mil_2020.rename(columns={"Name": "Name_Mil"}, inplace=True)

matches = fpd.fuzzy_merge(
    df_religion_2020,
    df_mil_2020,
    left_on=["Name_Rel", "State", "District", "Party"],
    right_on=["Name_Mil", "State", "District", "Party"],
    method="levenshtein",
    threshold=0.55,
    ignore_case=True,
    ignore_nonalpha=True,
    ignore_nonlatin=True,
    ignore_order_words=True,
)
matches["Military"] = "Yes"

# left join df_religion with matches

df_religion_2020 = df_religion_2020.merge(
    matches[["Name_Rel", "Name_Mil", "Last_Name", "Military"]], how="left"
)
df_religion_2020.Military = df_religion_2020.Military.fillna("No")
df_religion_2020.Last_Name = df_religion_2020.Last_Name.apply(
    lambda name: name.split(" ")[0]
)

## Other Congress biographies and social links - source https://github.com/unitedstates/congress-legislators

In [49]:
congress_historical = pd.read_csv("raw_data/legislators-historical.csv")
congress_current = pd.read_csv("raw_data/legislators-current.csv")
congress_data = pd.concat([congress_historical, congress_current])
congress_data.reset_index(inplace=True, drop=True)

congress_2020_senate = pd.read_csv(
    "raw_data/116th Congress Members Guide with Elections and Demographic Data by District - Senate.csv"
)
congress_2020_house = pd.read_csv(
    "raw_data/116th Congress Members Guide with Elections and Demographic Data by District - House.csv"
)

In [50]:
congress_data.head(4)

Unnamed: 0,last_name,first_name,middle_name,suffix,nickname,full_name,birthday,gender,type,state,...,opensecrets_id,lis_id,fec_ids,cspan_id,govtrack_id,votesmart_id,ballotpedia_id,washington_post_id,icpsr_id,wikipedia_id
0,Bassett,Richard,,,,,1745-04-02,M,sen,DE,...,,,,,401222,,,,507.0,Richard Bassett (Delaware politician)
1,Bland,Theodorick,,,,,1742-03-21,M,rep,VA,...,,,,,401521,,,,786.0,Theodorick Bland (congressman)
2,Burke,Aedanus,,,,,1743-06-16,M,rep,SC,...,,,,,402032,,,,1260.0,Aedanus Burke
3,Carroll,Daniel,,,,,1730-07-22,M,rep,MD,...,,,,,402334,,,,1538.0,Daniel Carroll


In [51]:
congress_2020_house["Distr"] = congress_2020_house.Code.apply(
    lambda code: 0 if code.split("-")[1] == "AL" else code.split("-")[1]
)
congress_2020_house["State"] = congress_2020_house.Code.apply(
    lambda code: code.split("-")[0]
)

In [52]:
congress_2020_senate.head()

Unnamed: 0,State,Year,Class,Party,First_Name,Last_Name,First Elected,Birth Year,Gender,Race_Ethnicity,Religion,LGBTQ,Pre_2018_Incumbent
0,Alabama,2017,II,Democratic,Doug,Jones,2017 Special,1954,Man,White - Non-Hispanic,Christian - Methodist,No,Lost in primary
1,Alabama,2016,III,Republican,Richard,Shelby,1986,1934,Man,White - Non-Hispanic,Christian - Presbyterian,No,Not up
2,Alaska,2014,II,Republican,Dan,Sullivan,2014,1964,Man,White - Non-Hispanic,Christian - Roman Catholic,No,Not up
3,Alaska,2016,III,Republican,Lisa,Murkowski,2002 Appointment,1957,Woman,White - Non-Hispanic,Christian - Roman Catholic,No,Not up
4,Arizona,2018,I,Democratic,Kyrsten,Sinema,2018,1976,Woman,White - Non-Hispanic,Unaffiliated,Bisexual,Open - Retired


In [53]:
# A python dictionary to abbr US states

us_state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "U.S. Virgin Islands": "VI",
}

In [54]:
congress_2020_senate.State = congress_2020_senate.State.apply(
    lambda x: us_state_to_abbrev[x]
)

In [55]:
# concatenate the senate and the house dataframes
df_bios_2020 = pd.concat([congress_2020_senate, congress_2020_house])

df_bios_2020.drop(["Year", "Code", "District"], inplace=True, axis=1)
df_bios_2020.rename(
    columns={"Distr": "District", "Class": "Senate Class"}, inplace=True
)

df_bios_2020.District.fillna("Senator", inplace=True)
df_bios_2020["Type"] = df_bios_2020.District.apply(
    lambda district: "sen" if district == "Senator" else "rep"
)

df_bios_2020.reset_index(inplace=True, drop=True)
df_bios_2020.Gender = df_bios_2020.Gender.apply(
    lambda gender: "M" if gender == "Man" else "F" if gender == "Woman" else gender
)

In [56]:
congress_data.birthday = pd.to_datetime(congress_data.birthday)
congress_data["Birth Year"] = congress_data.birthday.dt.year

In [57]:
congress_data[congress_data.last_name.str.contains("Manchin")]

Unnamed: 0,last_name,first_name,middle_name,suffix,nickname,full_name,birthday,gender,type,state,...,lis_id,fec_ids,cspan_id,govtrack_id,votesmart_id,ballotpedia_id,washington_post_id,icpsr_id,wikipedia_id,Birth Year
12162,Manchin,Joe,,III,,"Joe Manchin, III",1947-08-24,M,sen,WV,...,S338,S0WV00090,62864.0,412391,7547.0,Joe Manchin III,,40915.0,Joe Manchin,1947.0


In [58]:
# Join the historical and bios data of the congress members

matches = df_bios_2020.merge(
    congress_data,
    right_on=["last_name", "state", "type", "gender", "Birth Year"],
    left_on=["Last_Name", "State", "Type", "Gender", "Birth Year"],
    how="left",
)

new_df = matches[~matches.govtrack_id.isna()]

In [59]:
# merge the new dataframe with the govtract scores using govtrack ids

df_ideology = new_df.merge(
    df_2020[["ideology", "id", "bioguide_id"]],
    left_on=["govtrack_id", "bioguide_id"],
    right_on=["id", "bioguide_id"],
    how="left",
)

In [60]:
def age(born):
    try:
        today = date.today()
        return (
            today.year - born.year - ((today.month, today.day) < (born.month, born.day))
        )
    except (ValueError, TypeError):
        return born  # leave unchanged


df_ideology["age"] = df_ideology.birthday.apply(age)

In [61]:
df_ideology[~df_ideology.twitter.isna()]

Unnamed: 0,State,Senate Class,Party,First_Name,Last_Name,First Elected,Birth Year,Gender,Race_Ethnicity,Religion,...,cspan_id,govtrack_id,votesmart_id,ballotpedia_id,washington_post_id,icpsr_id,wikipedia_id,ideology,id,age
2,AK,II,Republican,Dan,Sullivan,2014,1964.0,M,White - Non-Hispanic,Christian - Roman Catholic,...,1023262.0,412665.0,114964.0,Daniel S. Sullivan,,41500.0,Dan Sullivan (U.S. senator),0.714706,412665.0,58
3,AK,III,Republican,Lisa,Murkowski,2002 Appointment,1957.0,F,White - Non-Hispanic,Christian - Roman Catholic,...,1004138.0,300075.0,15841.0,Lisa Murkowski,,40300.0,Lisa Murkowski,0.568338,300075.0,66
4,AZ,I,Democratic,Kyrsten,Sinema,2018,1976.0,F,White - Non-Hispanic,Unaffiliated,...,68489.0,412509.0,28338.0,Kyrsten Sinema,,21300.0,Kyrsten Sinema,0.675805,412509.0,46
6,AR,II,Republican,Tom,Cotton,2014,1977.0,M,White - Non-Hispanic,Christian - Methodist,...,63928.0,412508.0,135651.0,Tom Cotton,,21301.0,Tom Cotton,0.933535,412508.0,46
7,AR,III,Republican,John,Boozman,2010,1950.0,M,White - Non-Hispanic,Christian - Southern Baptist,...,92069.0,400040.0,27958.0,John Boozman,,20101.0,John Boozman,0.839497,400040.0,72
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499,WI,,Democratic,Mark,Pocan,2012,1964.0,M,White - Non-Hispanic,Unknown/Refused,...,79688.0,412585.0,26238.0,Mark Pocan,,21370.0,Mark Pocan,0.174459,412585.0,58
501,WI,,Democratic,Gwen,Moore,2004,1951.0,F,Black - African-American,Christian - Baptist,...,42548.0,400661.0,3457.0,Gwen Moore,,20537.0,Gwen Moore,0.126118,400661.0,72
503,WI,,Republican,Glenn,Grothman,2014,1955.0,M,White - Non-Hispanic,Christian - Lutheran,...,77282.0,412661.0,3493.0,Glenn Grothman,,21559.0,Glenn Grothman,0.816250,412661.0,67
504,WI,,Republican,Tom,Tiffany,2020 Special,1957.0,M,White - Non-Hispanic,Christian,...,,456791.0,,Tom Tiffany,,,Tom Tiffany,0.578802,456791.0,65


In [62]:
# save the ideology dataframe

df_ideology.to_csv("raw_data/congress_2020_bios.csv", index=False)

### Twitter data and merge with the Biography data

In [63]:
# read Twitter data

twitter_data = pd.read_csv("raw_data/vader_sentiment_tweets.csv")

In [64]:
twitter_data.head(5)

Unnamed: 0,tweet_id,username,party,tweet,clean_text,favorite_count,retweet_count,created_at,source,social_policy,...,rank,score,govtrack_cluster,progressive_cluster,govtrack_class,positive_sentiment,neutral_sentiment,negative_sentiment,compound_sentiment,sentiment_text
0,1453097115664662536,RepMikeGarcia,R,I joined fellow China Task Force lawmakers in ...,joined fellow china task force lawmakers in s...,43,29,2021-10-26 20:32:04+00:00,Twitter Web App,,...,#218,0.44,2,3,Centrist,1e-06,0.807001,0.193001,-0.771699,negative
1,1511713375218245640,RepGallagher,R,"Rep. Gallagher: ""If we do not invest in hard p...",rep gallagher if we do not invest in hard powe...,25,11,2022-04-06 14:32:09+00:00,Twitter Web App,,...,#52,0.76,4,4,Far Right,0.113001,0.786001,0.101001,-0.128999,negative
2,1381021863313350658,RepDLesko,R,"CBP is currently encountering over 5,000 peopl...",cbp is currently encountering over 000 people ...,283,82,2021-04-10 23:10:45+00:00,Twitter for iPhone,Social Policies,...,#7,0.91,4,4,Far Right,0.029001,0.860001,0.110001,-0.678299,negative
3,1362468292569534465,RepDebDingell,D,It’s past time our country ban discrimination ...,it is past time our country ban discrimination...,92,21,2021-02-18 18:25:29+00:00,Twitter Web App,Social Policies,...,#351,0.25,1,0,Left Centrist,0.086001,0.799001,0.115001,-0.226299,negative
4,1551368623142363138,RepCarbajal,D,Proud to present the Woman of the Year Award t...,proud to present the woman of the year award t...,17,1,2022-07-25 00:47:57+00:00,Twitter for iPhone,Social Policies,...,#283,0.32,1,0,Left Centrist,0.329001,0.671001,1e-06,0.940301,positive


In [65]:
# merge the the biographies with the twitter dataset on username

df = twitter_data.merge(df_ideology, left_on="username", right_on="twitter", how="left")

df.Party = df.Party.apply(
    lambda party: "R"
    if party == "Republican"
    else "D"
    if party == "Democratic"
    else "I"
    if party == "Independent"
    else party
)

In [66]:
df = df[~df.govtrack_id.isna()]

In [67]:
# include the military and religion

# df = df.merge(df_religion_2020[["Last_Name", "District", "State", "Party", "Military"]], on=["State", "Last_Name", "District", "Party"], how="left")
# df.Military = df["Military"].fillna("No")

## **Multiple Regression Test**

The 2020 Congress report data used in this section.
<br />
The output of the model is the sentiment of the tweet whereas the independent variables 
are the ideology score, age, gender identity (LGBTQ), religion and military history of the legislator.


In [1]:
import statsmodels.formula.api as smf
import numpy as np

In [15]:
def model(data):
    "model"
    mod = smf.ols(
        formula="compound_sentiment ~ ideology + Party + LGBTQ + Religion + Race_Ethnicity + age",
        data=data,
    )

    return mod.fit()

In [3]:
df = pd.read_csv("vader_sentiment_tweets.csv")

In [11]:
christian_categories = {
    "Christian": "Christian",
    "Christian - Roman Catholic": "Christian",
    "Christian - Baptist": "Christian",
    "Christian - Mormon": "Christian",
    "Hindu": "Hindu",
    "Jewish": "Jewish",
    "Christian - Catholic":"Christian",
    "Christian - Evangelical Protestant": "Christian",
    "Christian - Church of God": "Christian",
    "Unknown/Refused": "Unknown",
    "Unaffiliated": "Unaffiliated",
    "Christian - Presbyterian": "Christian",
    "Christian - Methodist": "Christian",
    "Christian - Lutheran": "Christian",
    "Christian - Episcopalian": "Christian",
    "Christian - Southern Baptist": "Christian",
    "Christian - Protestant": "Christian",
    "Christian - Restorationist": "Christian",
    "Christian - Disciples of Christ": "Christian",
    "Christian - Nondenominational": "Christian",
    "Unitarian Universalist": "Christian",
    "Christian - Eastern Orthodox": "Orthodox",
    "Christian - Evangelical Free Church": "Christian",
    "Christian - Pentecostal": "Christian",
    "Christian - Seventh-Day Adventist": "Christian",
    "Christian - Nondenominational Protestant": "Christian",
    "Buddhist - Soka Gakkai": "Buddhist",
    "Muslim": "Muslim",
    "Christian - African Methodist Episcopal": "Christian",
    "Christian - Chaldean Catholic": "Christian",
    "Christian - Congregationalist": "Christian",
    "Christian - United Brethren": "Christian",
    "Christian - Reformed (Calvinist)": "Christian",
}

In [12]:
ethnicity = {
    "Hispanic - Mexican": "Hispanic",
    "White - Non-Hispanic": "White",
    "Asian - Thai": "Asian",
    "Asian - Indian": "Asian",
    "Black - African-American": "Black",
    "Black - African-American/Asian - Filipino": "Black",
    "Hispanic": "Hispanic",
    "Hispanic - Puerto Rican": "Hispanic",
    "White - Portuguese-American": "White",
    "Native American - Chickasaw": "Native American",
    "Black - Jamaican": "Black",
    "Hispanic - Mexican / Jewish": "Hispanic",
    "Hispanic - Cuban": "Hispanic",
    "Asian - Korean": "Asian",
    "Hispanic - Dominican": "Hispanic",
    "Black - Eritrean": "Black",
    "Black - African-American/Trinidadian": "Black",
    "Hispanic - Mexican/Colombian": "Hispanic",
    "Asian - Chinese": "Asian",
    "Asian - Japanese": "Asian",
    "Hispanic - Guatemalan": "Hispanic",
    "Asian - Taiwanese": "Asian",
    "Native American - Ho-Chunk": "Native American",
    "White - Middle Eastern - Lebanese": "White",
    "White - Middle Eastern - Palestinian": "White",
}

In [13]:
#  collapse religion and ethnicity

df["Religion"] = df.Religion.apply(lambda x: christian_categories[x])
df["Race_Ethnicity"] = df.Race_Ethnicity.apply(lambda x: ethnicity[x])

In [16]:
res = model(df)

In [17]:
df.Party.unique()

array(['Democratic', 'Republican', 'Independent - Dem Caucus'],
      dtype=object)

In [18]:
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:     compound_sentiment   R-squared:                       0.010
Model:                            OLS   Adj. R-squared:                  0.010
Method:                 Least Squares   F-statistic:                     88.76
Date:                Fri, 16 Jun 2023   Prob (F-statistic):               0.00
Time:                        21:56:22   Log-Likelihood:            -1.4273e+05
No. Observations:              151338   AIC:                         2.855e+05
Df Residuals:                  151319   BIC:                         2.857e+05
Df Model:                          18                                         
Covariance Type:            nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------
Interc

In [19]:
# regression tests per policy

import json
import string

# open json file consisting of policies and its sub-groups
with open("data.json") as json_file:
    categories = json.load(json_file)

In [20]:
articles = ["and", "LGBTQ", "the", "of", "CHIPS", "Ukraine-Russia"]

# policies
policies = {**categories["Geo Political Policies"], **categories["Social Policies"]}
policies = list(policies.keys())
policies.remove("All")


for policy in sorted(policies, key=lambda x: x.lower()):
    str_list = string.ascii_uppercase

    title = " ".join(
        [
            word.capitalize() if word not in articles else word
            for word in policy.split(" ")
        ]
    )

    try:
        # category df
        cat_df = df[df.policies.str.contains(r"%s" % policy, re.I, regex=True)]

        res = model(cat_df)

        print(title)
        print(res.summary())
        print(" ")
    except ValueError:
        pass

Abortion
                            OLS Regression Results                            
Dep. Variable:     compound_sentiment   R-squared:                       0.015
Model:                            OLS   Adj. R-squared:                  0.013
Method:                 Least Squares   F-statistic:                     6.417
Date:                Fri, 16 Jun 2023   Prob (F-statistic):           1.46e-14
Time:                        21:57:01   Log-Likelihood:                -6030.0
No. Observations:                6741   AIC:                         1.209e+04
Df Residuals:                    6724   BIC:                         1.221e+04
Df Model:                          16                                         
Covariance Type:            nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------------