In [1]:
import pandas as pd

### Read in tables pulled from Wikipedia

In [30]:
house = pd.read_csv("wiki_house_of_rep.csv")
house['District'] = house['District'].str.replace(r'[0-9]+', '', regex=True)
house.head(3)

Unnamed: 0,District,Member,Party,Party.1,Prior experience,Education,Assumed office,Residence,Born[2]
0,Alabama,Jerry Carl,,Republican,Mobile County Commission,Florida Gateway College,2021,Mobile,"June 17, 1958 (age 63)"
1,Alabama,Barry Moore,,Republican,Alabama House of Representatives,Enterprise State Community College (AS)Auburn ...,2021,Enterprise,"September 26, 1966 (age 55)"
2,Alabama,Mike Rogers,,Republican,Calhoun County CommissionerAlabama House of Re...,"Jacksonville State University (BA, MPA)Birming...",2003,Anniston[3],"July 16, 1958 (age 63)"


In [5]:
senators = pd.read_csv("wiki_senators.csv")
senators.head(3)

Unnamed: 0,State,Portrait,Senator,Party,Party.1,Born,Occupation(s),Previous electiveoffice(s),Education,Assumed office,Term up,Residence[2]
0,Alabama,,Richard Shelby,,Republican[d],"May 6, 1934 (age 87)",Lawyer,U.S. HouseAlabama Senate,"University of Alabama (BA, LLB) Birmingham Sch...","January 3, 1987",2022,Tuscaloosa
1,Alabama,,Tommy Tuberville,,Republican,"September 18, 1954 (age 67)","College football coachPartner, investment mana...",,Southern Arkansas University (BS),"January 3, 2021",2026,Auburn[3]
2,Alaska,,Lisa Murkowski,,Republican,"May 22, 1957 (age 64)",Lawyer,Alaska House of Representatives,Georgetown University (AB) Willamette Universi...,"December 20, 2002[e]",2022,Girdwood


In [7]:
governors = pd.read_csv("wiki_governors.csv")
governors.columns = governors.iloc[0] #export error, change column names to first row
governors = governors.drop(0)
governors.dropna(axis=1, how='all', inplace=True) #drop columns of only null values
governors.head(3)

Unnamed: 0,State,Governor,Party,Born,Prior public experience,Inauguration,End of term,Past governors
1,Alabama,Kay Ivey,Republican,"October 15, 1944 (age 77)","Lieutenant Governor, Treasurer","April 10, 2017",2023,List
2,Alaska,Mike Dunleavy,Republican,"May 5, 1961 (age 60)",Alaska Senate,"December 3, 2018",2022,List
3,Arizona,Doug Ducey,Republican,"April 9, 1964 (age 57)",Treasurer,"January 5, 2015",2023 (term limits),List
4,Arkansas,Asa Hutchinson,Republican,"December 3, 1950 (age 71)",Under Secretary of Homeland Security for Borde...,"January 13, 2015",2023 (term limits),List
5,California,Gavin Newsom,Democratic,"October 10, 1967 (age 54)","Lieutenant Governor, Mayor of San Francisco","January 7, 2019",2023,List


In [24]:
military = pd.read_csv("wiki_US_military_officers.csv")
military.dropna(axis=1, how='all', inplace=True)
military['Title'] = military.Incumbent.str[:7]
military['Incumbent'] = military.Incumbent.str[7:]
military.head(3)

Unnamed: 0,Position,Incumbent,Service branch,Title
0,Chairman of the Joint Chiefs of Staff (CJCS),Mark A. Milley,U.S. Army,General
1,Vice Chairman of the Joint Chiefs of Staff (VJCS),Christopher W. Grady,U.S. Navy,Admiral
2,"Commander, U.S. Africa Command (USAFRICOM)",Stephen J. Townsend,U.S. Army,General


In [59]:
world = pd.read_csv("wiki_world_leaders.csv")
world.head(3)

Unnamed: 0,State,Head of state,Head of government
0,Afghanistan,Leader – Hibatullah Akhundzada,Acting Prime Minister – Hasan Akhund
1,Albania,President – Ilir Meta,Prime Minister – Edi Rama
2,Algeria,President – Abdelmadjid Tebboune,Prime Minister – Aymen Benabderrahmane


In [60]:
#split world into head of states, head of government to turn into dictionary later
headofstate = world.drop('Head of government', axis=1)
headofstate[['Position', 'Name']] = headofstate['Head of state'].str.split('–', 1, expand=True)
headofstate.dropna(axis=0, inplace=True) 
headofstate.head(3)

Unnamed: 0,State,Head of state,Position,Name
0,Afghanistan,Leader – Hibatullah Akhundzada,Leader,Hibatullah Akhundzada
1,Albania,President – Ilir Meta,President,Ilir Meta
2,Algeria,President – Abdelmadjid Tebboune,President,Abdelmadjid Tebboune


In [72]:
headofgov = world.drop('Head of state', axis=1)
headofgov[['Position', 'Name']] = headofgov['Head of government'].str.split('–', 1, expand=True)
headofgov.dropna(axis=0, inplace=True)
headofgov.head(3)

Unnamed: 0,State,Head of government,Position,Name
0,Afghanistan,Acting Prime Minister – Hasan Akhund,Acting Prime Minister,Hasan Akhund
1,Albania,Prime Minister – Edi Rama,Prime Minister,Edi Rama
2,Algeria,Prime Minister – Aymen Benabderrahmane,Prime Minister,Aymen Benabderrahmane


## Create Dictionary 

In [73]:
dict_house = house.set_index('Member').T.to_dict('list') 
for key, value in dict_house.items():
    #print(value[0], value[2])
    dict_house[key] = value[0] + " " + value[2] + " House Representative"
print(dict_house) #not sure where '\xa0' is coming from? it's not there if you print value[0] and value[2]

{'Jerry Carl': 'Alabama\xa0 Republican House Representative', 'Barry Moore': 'Alabama\xa0 Republican House Representative', 'Mike Rogers': 'Alabama\xa0 Republican House Representative', 'Robert Aderholt': 'Alabama\xa0 Republican House Representative', 'Mo Brooks': 'Alabama\xa0 Republican House Representative', 'Gary Palmer': 'Alabama\xa0 Republican House Representative', 'Terri Sewell': 'Alabama\xa0 Democratic House Representative', 'Don Young': 'Alaska\xa0at-large Republican House Representative', "Tom O'Halleran": 'Arizona\xa0 Democratic House Representative', 'Ann Kirkpatrick': 'Arizona\xa0 Democratic House Representative', 'Raúl Grijalva': 'Arizona\xa0 Democratic House Representative', 'Paul Gosar': 'Arizona\xa0 Republican House Representative', 'Andy Biggs': 'Arizona\xa0 Republican House Representative', 'David Schweikert': 'Arizona\xa0 Republican House Representative', 'Ruben Gallego': 'Arizona\xa0 Democratic House Representative', 'Debbie Lesko': 'Arizona\xa0 Republican House Re

  """Entry point for launching an IPython kernel.


In [75]:
#convert dictionary to dataframe for easy viewing
pd.DataFrame.from_dict(dict_house, orient='index')

Unnamed: 0,0
Jerry Carl,Alabama Republican House Representative
Barry Moore,Alabama Republican House Representative
Mike Rogers,Alabama Republican House Representative
Robert Aderholt,Alabama Republican House Representative
Mo Brooks,Alabama Republican House Representative
...,...
Scott Fitzgerald,Wisconsin Republican House Representative
Glenn Grothman,Wisconsin Republican House Representative
Tom Tiffany,Wisconsin Republican House Representative
Mike Gallagher,Wisconsin Republican House Representative


In [35]:
dict_senators = senators.set_index('Senator').T.to_dict('list')
for key, value in dict_senators.items():
    dict_senators[key] = value[0] + " " + value[3] + " Senator"
print(dict_senators)

{'Richard Shelby': 'Alabama Republican[d] Senator', 'Tommy Tuberville': 'Alabama Republican Senator', 'Lisa Murkowski': 'Alaska Republican Senator', 'Dan Sullivan': 'Alaska Republican Senator', 'Kyrsten Sinema': 'Arizona Democratic Senator', 'Mark Kelly': 'Arizona Democratic Senator', 'John Boozman': 'Arkansas Republican Senator', 'Tom Cotton': 'Arkansas Republican Senator', 'Dianne Feinstein': 'California Democratic Senator', 'Alex Padilla': 'California Democratic Senator', 'Michael Bennet': 'Colorado Democratic Senator', 'John Hickenlooper': 'Colorado Democratic Senator', 'Richard Blumenthal': 'Connecticut Democratic Senator', 'Chris Murphy': 'Connecticut Democratic Senator', 'Tom Carper': 'Delaware Democratic Senator', 'Chris Coons': 'Delaware Democratic Senator', 'Marco Rubio': 'Florida Republican Senator', 'Rick Scott': 'Florida Republican Senator', 'Jon Ossoff': 'Georgia Democratic Senator', 'Raphael Warnock': 'Georgia Democratic Senator', 'Brian Schatz': 'Hawaii Democratic Senat

In [76]:
#convert dictionary to dataframe for easy viewing
pd.DataFrame.from_dict(dict_senators, orient='index')

Unnamed: 0,0
Richard Shelby,Alabama Republican[d] Senator
Tommy Tuberville,Alabama Republican Senator
Lisa Murkowski,Alaska Republican Senator
Dan Sullivan,Alaska Republican Senator
Kyrsten Sinema,Arizona Democratic Senator
...,...
Shelley Moore Capito,West Virginia Republican Senator
Ron Johnson,Wisconsin Republican Senator
Tammy Baldwin,Wisconsin Democratic Senator
John Barrasso,Wyoming Republican Senator


In [15]:
dict_gov = governors.set_index('Governor').T.to_dict('list') 
for key, value in dict_gov.items():
    #print(value[0], value[2])
    dict_gov[key] = value[0] + " " + value[1] + " Governor"
print(dict_gov) 

#convert dictionary to dataframe for easy viewing
pd.DataFrame.from_dict(dict_gov, orient='index')

{'Kay Ivey': 'Alabama Republican Governor', 'Mike Dunleavy': 'Alaska Republican Governor', 'Doug Ducey': 'Arizona Republican Governor', 'Asa Hutchinson': 'Arkansas Republican Governor', 'Gavin Newsom': 'California Democratic Governor', 'Jared Polis': 'Colorado Democratic Governor', 'Ned Lamont': 'Connecticut Democratic Governor', 'John Carney': 'Delaware Democratic Governor', 'Ron DeSantis': 'Florida Republican Governor', 'Brian Kemp': 'Georgia Republican Governor', 'David Ige': 'Hawaii Democratic Governor', 'Brad Little': 'Idaho Republican Governor', 'J. B. Pritzker': 'Illinois Democratic Governor', 'Eric Holcomb': 'Indiana Republican Governor', 'Kim Reynolds': 'Iowa Republican Governor', 'Laura Kelly': 'Kansas Democratic Governor', 'Andy Beshear': 'Kentucky Democratic Governor', 'John Bel Edwards': 'Louisiana Democratic Governor', 'Janet Mills': 'Maine Democratic Governor', 'Larry Hogan': 'Maryland Republican Governor', 'Charlie Baker': 'Massachusetts Republican Governor', 'Gretchen 

Unnamed: 0,0
Kay Ivey,Alabama Republican Governor
Mike Dunleavy,Alaska Republican Governor
Doug Ducey,Arizona Republican Governor
Asa Hutchinson,Arkansas Republican Governor
Gavin Newsom,California Democratic Governor
Jared Polis,Colorado Democratic Governor
Ned Lamont,Connecticut Democratic Governor
John Carney,Delaware Democratic Governor
Ron DeSantis,Florida Republican Governor
Brian Kemp,Georgia Republican Governor


In [26]:
dict_mil = military.set_index('Incumbent').T.to_dict('list') 
for key, value in dict_mil.items():
    dict_mil[key] = value[1] + " " + value[2] #used service branch instead of specific position
print(dict_mil) 

#convert dictionary to dataframe for easy viewing
pd.DataFrame.from_dict(dict_mil, orient='index')

{'Mark A. Milley': 'U.S. Army General', 'Christopher W. Grady': 'U.S. Navy Admiral', 'Stephen J. Townsend': 'U.S. Army General', 'Kenneth F. McKenzie Jr.Retiring[1]': 'U.S. Marine Corps General', 'Paul M. Nakasone': 'U.S. Army General', 'Tod D. Wolters': 'U.S. Air Force General', 'John C. Aquilino': 'U.S. Navy Admiral', 'Glen D. VanHerck': 'U.S. Air Force General', 'Laura J. Richardson': 'U.S. Army General', 'James H. Dickinson': 'U.S. Army General', 'Richard D. Clarke Jr.': 'U.S. Army General', 'Charles A. Richard': 'U.S. Navy Admiral', 'Jacqueline D. Van Ovost': 'U.S. Air Force General', 'Daniel R. Hokanson': 'U.S. Army General', 'Paul J. LaCamera': 'U.S. Army General', 'James C. McConville': 'U.S. Army General', 'Joseph M. Martin': 'U.S. Army General', 'Michael X. Garrett': 'U.S. Army General', 'Edward M. Daly': 'U.S. Army General', 'Paul E. Funk II': 'U.S. Army General', 'Christopher G. Cavoli': 'U.S. Army General', 'Charles A. Flynn': 'U.S. Army General', 'David H. Berger': 'U.S. 

Unnamed: 0,0
Mark A. Milley,U.S. Army General
Christopher W. Grady,U.S. Navy Admiral
Stephen J. Townsend,U.S. Army General
Kenneth F. McKenzie Jr.Retiring[1],U.S. Marine Corps General
Paul M. Nakasone,U.S. Army General
Tod D. Wolters,U.S. Air Force General
John C. Aquilino,U.S. Navy Admiral
Glen D. VanHerck,U.S. Air Force General
Laura J. Richardson,U.S. Army General
James H. Dickinson,U.S. Army General


In [32]:
anti_nuke = pd.DataFrame(['Abalone Alliance', 
"Alliance for Nuclear Accountability",
"Alliance for Nuclear Responsibility",
"Arms Control Association",
"Beyond Nuclear",
"Cactus Alliance",
"Catfish Alliance",
"Citizen's Committee for Protection of the Environment",
"Citizens Energy Council",
"Clamshell Alliance",
"Coalition Against Nukes",
"Coalition for Nuclear Power Postponement",
"Committee for a Nuclear Free Island",
"Committee for a Nuclear Overkill Moratorium",
"Committee for Nuclear Responsibility",
"Corporate Accountability International",
"Council for a Livable World",
"Crabshell Alliance (Seattle)",
"Critical Mass",
"Don't Make a Wave Committee"
"Economists for Peace and Security",
"Environmental Coalition on Nuclear Power",
"Federation of American Scientists",
"Friends of the Earth",
"Greenpeace",
"Heart of America Northwest",
"Institute for Energy and Environmental Research",
"Lawyers' Committee on Nuclear Policy",
"Nuclear Age Peace Foundation",
"Nuclear Control Institute",
"Nuclear Disarmament Partnership",
"Nuclear Energy Information Service",
"Nuclear Policy Research Institute",
"Nuclear Threat Initiative",
"Peace Action",
"People's Alliance for Clean Energy",
"Public Citizen",
"White House Peace Vigil",
"Wisconsin Project on Nuclear Arms Control"], columns=['Name'])
anti_nuke['Type'] = ['anti-nuclear group' for i in range(len(anti_nuke))]
display(anti_nuke.head())
anti_nuke.set_index('Name').to_dict()

Unnamed: 0,Name,Type
0,Abalone Alliance,anti-nuclear group
1,Alliance for Nuclear Accountability,anti-nuclear group
2,Alliance for Nuclear Responsibility,anti-nuclear group
3,Arms Control Association,anti-nuclear group
4,Beyond Nuclear,anti-nuclear group


{'Type': {'Abalone Alliance': 'anti-nuclear group',
  'Alliance for Nuclear Accountability': 'anti-nuclear group',
  'Alliance for Nuclear Responsibility': 'anti-nuclear group',
  'Arms Control Association': 'anti-nuclear group',
  'Beyond Nuclear': 'anti-nuclear group',
  'Cactus Alliance': 'anti-nuclear group',
  'Catfish Alliance': 'anti-nuclear group',
  "Citizen's Committee for Protection of the Environment": 'anti-nuclear group',
  'Citizens Energy Council': 'anti-nuclear group',
  'Clamshell Alliance': 'anti-nuclear group',
  'Coalition Against Nukes': 'anti-nuclear group',
  'Coalition for Nuclear Power Postponement': 'anti-nuclear group',
  'Committee for a Nuclear Free Island': 'anti-nuclear group',
  'Committee for a Nuclear Overkill Moratorium': 'anti-nuclear group',
  'Committee for Nuclear Responsibility': 'anti-nuclear group',
  'Corporate Accountability International': 'anti-nuclear group',
  'Council for a Livable World': 'anti-nuclear group',
  'Crabshell Alliance (Se

In [65]:
dict_world = headofstate.set_index('Name').T.to_dict('list')
for key, value in dict_world.items():
    #print(value[0], value[2])
    dict_world[key] = value[0] + " " + value[2]
print(dict_world)

{' Hibatullah Akhundzada': 'Afghanistan Leader\xa0', ' Ilir Meta': 'Albania President\xa0', ' Abdelmadjid Tebboune': 'Algeria President\xa0', " Joan Enric Vives i SicíliaCo-Prince's Representative\xa0– Josep Maria MauriFrench Co-Prince\xa0– Emmanuel Macron[α]Co-Prince's Representative\xa0– Patrick Strzoda": 'Andorra Episcopal Co-Prince\xa0', ' João Lourenço': 'Angola President\xa0', ' Elizabeth II[β]Governor-General\xa0– Sir Rodney Williams': 'Antigua and Barbuda Queen\xa0', ' Alberto Fernández': 'Argentina President\xa0', ' Alen Simonyan': 'Armenia Acting President\xa0', ' Elizabeth II[β]Governor-General\xa0– David Hurley': 'Australia Queen\xa0', ' Alexander Van der Bellen': 'Austria President\xa0', ' Ilham Aliyev': 'Azerbaijan President\xa0', ' Elizabeth II[β]Governor-General\xa0– Sir Cornelius A. Smith': 'Bahamas, The Queen\xa0', ' Sheikh Hamad bin Isa Al Khalifa': 'Bahrain King\xa0', ' Abdul Hamid': 'Bangladesh President\xa0', ' Dame Sandra Mason': 'Barbados President\xa0', ' Alexa

  """Entry point for launching an IPython kernel.


In [69]:
dict_world2 = headofgov.set_index('Name').T.to_dict('list')
for key, value in dict_world2.items():
    #print(value[0], value[2])
    dict_world2[key] = value[0] + " " + value[2]
print(dict_world2)

{' Hasan Akhund': 'Afghanistan Acting Prime Minister\xa0', ' Edi Rama': 'Albania Prime Minister\xa0', ' Aymen Benabderrahmane': 'Algeria Prime Minister\xa0', ' Xavier Espot Zamora': 'Andorra Prime Minister\xa0', ' João Lourenço': 'Angola President\xa0', ' Gaston Browne': 'Antigua and Barbuda Prime Minister\xa0', ' Alberto Fernández': 'Argentina President\xa0', ' Nikol Pashinyan': 'Armenia Prime Minister\xa0', ' Scott Morrison': 'Australia Prime Minister\xa0', ' Karl Nehammer': 'Austria Chancellor\xa0', ' Ali Asadov': 'Azerbaijan Prime Minister\xa0', ' Philip Davis': 'Bahamas, The Prime Minister\xa0', ' Prince Salman bin Hamad Al Khalifa': 'Bahrain Prime Minister\xa0', ' Sheikh Hasina': 'Bangladesh Prime Minister\xa0', ' Mia Mottley': 'Barbados Prime Minister\xa0', ' Roman Golovchenko': 'Belarus Prime Minister\xa0', ' Alexander De Croo': 'Belgium Prime Minister\xa0', ' Juan Briceño': 'Belize Prime Minister\xa0', ' Patrice Talon': 'Benin President\xa0', ' Lotay Tshering': 'Bhutan Prime M

  """Entry point for launching an IPython kernel.


In [80]:
#convert dictionary to dataframe for easy viewing
pd.DataFrame.from_dict(dict_world2, orient='index')

Unnamed: 0,0
Hasan Akhund,Afghanistan Acting Prime Minister
Edi Rama,Albania Prime Minister
Aymen Benabderrahmane,Algeria Prime Minister
Xavier Espot Zamora,Andorra Prime Minister
João Lourenço,Angola President
...,...
Nguyễn Phú Trọng,Vietnam General Secretary of the Communist Party
Phạm Minh Chính,Vietnam Prime Minister
Maeen Abdulmalik Saeed,Yemen Prime Minister
Hakainde Hichilema,Zambia President
