In [None]:
# inspiration: https://docs.google.com/spreadsheets/d/1wZhPLMCHKJvwOkP4juclhjFgqIY8fQFMemwKL2c64vk/htmlview row 1252
# senate data: https://senatestockwatcher.com/api
# house data: https://housestockwatcher.com/api
# historical legiislator data: https://github.com/unitedstates/congress-legislators

In [3]:
import pandas as pd

## Process legislator file

In [81]:
congress_historical = pd.read_csv("https://theunitedstates.io/congress-legislators/legislators-historical.csv",
                                  usecols=["first_name","middle_name", "last_name", "full_name", "type", "party"])

congress_historical = congress_historical[(congress_historical.type == "sen") & (congress_historical.full_name.notna())]

congress_current = pd.read_csv("https://theunitedstates.io/congress-legislators/legislators-current.csv",
                               usecols=["first_name","middle_name", "last_name", "full_name", "type", "party"])

congress_current = congress_current[(congress_current.type == "sen") & (congress_current.full_name.notna())]

congress = pd.concat([congress_historical, congress_current])

congress.head()

Unnamed: 0,last_name,first_name,middle_name,full_name,type,party
11634,Inouye,Daniel,K.,Daniel K. Inouye,sen,Democrat
11635,Akaka,Daniel,Kahikina,Daniel K. Akaka,sen,Democrat
11636,Bingaman,Jeff,,Jeff Bingaman,sen,Democrat
11637,Conrad,Kent,Kent,Kent Conrad,sen,Democrat
11638,Hutchison,Kay,Bailey,Kay Bailey Hutchison,sen,Republican


In [83]:
congress["name_cleaned"] = congress["full_name"].replace({", IV": "", "III": ""}, regex=True)

congress.name_cleaned.unique()

array(['Daniel K. Inouye', 'Daniel K. Akaka', 'Jeff Bingaman',
       'Kent Conrad', 'Kay Bailey Hutchison', 'Herb Kohl',
       'Joseph I. Lieberman', 'Richard G. Lugar', 'Ben Nelson',
       'Olympia J. Snowe', 'Jim Webb', 'Scott P. Brown', 'Jim DeMint',
       'John F. Kerry', 'Frank R. Lautenberg', 'William M. Cowan',
       'Jeff Chiesa', 'Max Baucus', 'Mark Begich', 'Saxby Chambliss',
       'Kay R. Hagan', 'Tom Harkin', 'Tim Johnson', 'Mary L. Landrieu',
       'Carl Levin', 'Mark L. Pryor', 'John D. Rockefeller', 'Mark Udall',
       'Tom Coburn', 'John E. Walsh', 'Mike Johanns', 'Kelly Ayotte',
       'Barbara Boxer', 'Daniel Coats', 'Mark Kirk',
       'Barbara A. Mikulski', 'Harry Reid', 'David Vitter',
       'Jeff Sessions', 'Al Franken', 'Luther Strange', 'Thad Cochran',
       'John McCain', 'Jon Kyl', 'Bob Corker', 'Orrin G. Hatch',
       'Claire McCaskill', 'Bill Nelson', 'Joe Donnelly', 'Jeff Flake',
       'Dean Heller', 'Heidi Heitkamp', 'Johnny Isakson',
       'M

In [84]:
def clean_congress_name(fullname):
    names = fullname.split(" ")
    fullname = names[0] + " " + names[-1]
    return fullname

congress["fullname_cleaned"] = congress["name_cleaned"].apply(clean_congress_name)
congress.head()

Unnamed: 0,last_name,first_name,middle_name,full_name,type,party,name_cleaned,fullname_cleaned
11634,Inouye,Daniel,K.,Daniel K. Inouye,sen,Democrat,Daniel K. Inouye,Daniel Inouye
11635,Akaka,Daniel,Kahikina,Daniel K. Akaka,sen,Democrat,Daniel K. Akaka,Daniel Akaka
11636,Bingaman,Jeff,,Jeff Bingaman,sen,Democrat,Jeff Bingaman,Jeff Bingaman
11637,Conrad,Kent,Kent,Kent Conrad,sen,Democrat,Kent Conrad,Kent Conrad
11638,Hutchison,Kay,Bailey,Kay Bailey Hutchison,sen,Republican,Kay Bailey Hutchison,Kay Hutchison


In [85]:
congress.to_csv("data/senators.csv", index=False)

## Get daily senate trade summary file 

In [37]:
# url = "https://senate-stock-watcher-data.s3-us-west-2.amazonaws.com/aggregate/all_daily_summaries.json"
txns = pd.read_json("https://senate-stock-watcher-data.s3-us-west-2.amazonaws.com/aggregate/all_transactions.json")

print(txns.shape)
txns.head(1)

(8831, 11)


Unnamed: 0,transaction_date,owner,ticker,asset_description,asset_type,type,amount,comment,senator,ptr_link,disclosure_date
0,08/30/2021,Spouse,BABA,Alibaba Group Holding Limited American Depositary,Stock,Sale (Full),"$1,001 - $15,000",Due to standing separation agreement with fina...,Sheldon Whitehouse,https://efdsearch.senate.gov/search/view/ptr/2...,09/13/2021


In [38]:
txns = txns[["senator", "transaction_date", "disclosure_date", "ticker", "asset_description", "asset_type", "amount"]]
txns.head()

Unnamed: 0,senator,transaction_date,disclosure_date,ticker,asset_description,asset_type,amount
0,Sheldon Whitehouse,08/30/2021,09/13/2021,BABA,Alibaba Group Holding Limited American Depositary,Stock,"$1,001 - $15,000"
1,Sheldon Whitehouse,08/27/2021,09/13/2021,--,"MACYS RETAIL HLDGS INC <div class=""text-muted""...",Corporate Bond,"$15,001 - $50,000"
2,Sheldon Whitehouse,08/30/2021,09/13/2021,MCHP,Microchip Technology Incorporated - Common Stock,Stock,"$15,001 - $50,000"
3,Sheldon Whitehouse,08/30/2021,09/13/2021,T,AT&amp;T Inc.,Stock,"$15,001 - $50,000"
4,Patrick J Toomey,01/16/2021,09/09/2021,--,Fitso (Exchanged) <br> Zomato (Received) <div ...,Non-Public Stock,"$1,001 - $15,000"


In [69]:
txns["name_cleaned"] = txns["senator"].replace({", Jr.": "", ", Iv" : "", "Ladda ": "", "Rafael": "Ted", ", Iii": ""}, regex=True)
txns.name_cleaned.unique()


array(['Sheldon Whitehouse', 'Patrick J Toomey', 'Thomas H Tuberville',
       'Mark R Warner', 'Thomas R Carper', 'Christopher A Coons',
       'Roger W Marshall', 'Shelley M Capito', 'John Hoeven', 'Rand Paul',
       'John W Hickenlooper', 'Gary C Peters', 'Angus S King',
       'Susan M Collins', 'Michael F Bennet', 'Ron L Wyden',
       'William F Hagerty', 'Rick Scott', 'John Boozman',
       'A. Mitchell Mcconnell', 'Mike Rounds', 'Jerry Moran,',
       'Richard M Burr', 'Tina Smith', 'Tammy Duckworth',
       'Daniel S Sullivan', 'James M Inhofe', 'Pat Roberts',
       'Jacklyn S Rosen', 'William Cassidy', 'Richard Blumenthal',
       'David A Perdue , Jr', 'Kelly Loeffler', 'Timothy M Kaine',
       'Jeanne Shaheen', 'Ron Johnson', 'Roger F Wicker',
       'Dianne Feinstein', 'Lamar Alexander', 'Roy Blunt',
       'John N Kennedy', 'Ted E Cruz', 'Thomas Udall', 'John F Reed',
       'Thomas R Tillis', 'John A Barrasso', 'Robert P Casey',
       'Robert J Portman', 'Benjamin L 

In [76]:
def get_first_last_name(row):
    fullname = row["name_cleaned"]
    names = fullname.split(" ")
    first_name = names[0]
    last_name = names[-1]
    full_name = f"{first_name} {last_name}"
    return full_name, first_name, last_name

In [77]:
txns[["full_name", "first_name", "last_name"]] = txns.apply(get_first_last_name, axis=1, result_type="expand")
txns.head()

Unnamed: 0,senator,transaction_date,disclosure_date,ticker,asset_description,asset_type,amount,name_cleaned,first_name,last_name,full_name
0,Sheldon Whitehouse,08/30/2021,09/13/2021,BABA,Alibaba Group Holding Limited American Depositary,Stock,"$1,001 - $15,000",Sheldon Whitehouse,Sheldon,Whitehouse,Sheldon Whitehouse
1,Sheldon Whitehouse,08/27/2021,09/13/2021,--,"MACYS RETAIL HLDGS INC <div class=""text-muted""...",Corporate Bond,"$15,001 - $50,000",Sheldon Whitehouse,Sheldon,Whitehouse,Sheldon Whitehouse
2,Sheldon Whitehouse,08/30/2021,09/13/2021,MCHP,Microchip Technology Incorporated - Common Stock,Stock,"$15,001 - $50,000",Sheldon Whitehouse,Sheldon,Whitehouse,Sheldon Whitehouse
3,Sheldon Whitehouse,08/30/2021,09/13/2021,T,AT&amp;T Inc.,Stock,"$15,001 - $50,000",Sheldon Whitehouse,Sheldon,Whitehouse,Sheldon Whitehouse
4,Patrick J Toomey,01/16/2021,09/09/2021,--,Fitso (Exchanged) <br> Zomato (Received) <div ...,Non-Public Stock,"$1,001 - $15,000",Patrick J Toomey,Patrick,Toomey,Patrick Toomey


In [86]:
txns.to_csv("data/txns_09132021.csv", index=False)


## Join congress to txns

In [87]:
merged = txns.merge(congress,
                    left_on="full_name",
                    right_on="fullname_cleaned",
                    how="left")
merged.head()

Unnamed: 0,senator,transaction_date,disclosure_date,ticker,asset_description,asset_type,amount,name_cleaned_x,first_name_x,last_name_x,full_name_x,last_name_y,first_name_y,middle_name,full_name_y,type,party,name_cleaned_y,fullname_cleaned
0,Sheldon Whitehouse,08/30/2021,09/13/2021,BABA,Alibaba Group Holding Limited American Depositary,Stock,"$1,001 - $15,000",Sheldon Whitehouse,Sheldon,Whitehouse,Sheldon Whitehouse,Whitehouse,Sheldon,,Sheldon Whitehouse,sen,Democrat,Sheldon Whitehouse,Sheldon Whitehouse
1,Sheldon Whitehouse,08/27/2021,09/13/2021,--,"MACYS RETAIL HLDGS INC <div class=""text-muted""...",Corporate Bond,"$15,001 - $50,000",Sheldon Whitehouse,Sheldon,Whitehouse,Sheldon Whitehouse,Whitehouse,Sheldon,,Sheldon Whitehouse,sen,Democrat,Sheldon Whitehouse,Sheldon Whitehouse
2,Sheldon Whitehouse,08/30/2021,09/13/2021,MCHP,Microchip Technology Incorporated - Common Stock,Stock,"$15,001 - $50,000",Sheldon Whitehouse,Sheldon,Whitehouse,Sheldon Whitehouse,Whitehouse,Sheldon,,Sheldon Whitehouse,sen,Democrat,Sheldon Whitehouse,Sheldon Whitehouse
3,Sheldon Whitehouse,08/30/2021,09/13/2021,T,AT&amp;T Inc.,Stock,"$15,001 - $50,000",Sheldon Whitehouse,Sheldon,Whitehouse,Sheldon Whitehouse,Whitehouse,Sheldon,,Sheldon Whitehouse,sen,Democrat,Sheldon Whitehouse,Sheldon Whitehouse
4,Patrick J Toomey,01/16/2021,09/09/2021,--,Fitso (Exchanged) <br> Zomato (Received) <div ...,Non-Public Stock,"$1,001 - $15,000",Patrick J Toomey,Patrick,Toomey,Patrick Toomey,Toomey,Patrick,J.,Patrick J. Toomey,sen,Republican,Patrick J. Toomey,Patrick Toomey


In [91]:
merged[merged.fullname_cleaned.isna()].full_name_x.unique()

array(['Thomas Tuberville', 'Angus King', 'William Hagerty',
       'A. Mcconnell', 'Jerry Moran,', 'Daniel Sullivan', 'Jacklyn Rosen',
       'William Cassidy', 'David Jr', 'Timothy Kaine', 'Thomas Udall',
       'John Reed', 'Thomas Tillis', 'Robert Casey', 'Robert Portman',
       'Joseph Manchin', 'Michael Crapo', 'Ronald Wyden'], dtype=object)