# Votes (Divisions) in the Oireachtas: 01 - Data

An API is now available, see https://data.oireachtas.ie/ for documentation.

In [1]:
# Commonly useful notebook functions:
import sys
sys.path.insert(0,'../lib')
from common import *

In [2]:
# Save json from https://api.oireachtas.ie/
# Max results available for paging through with limit & skip is 10,000 so splitting by years works best
def save_page(year, overwrite=False):
    save_page_year('divisions', year, overwrite)
    
# Entire dataset: 11475 as of 15-12-2017, starts at 1922
years = range(1922, datetime.now().year + 1)
process_with_progress(save_page, years)
print()





In [3]:
# To update after loading everything already: Grab the latest year and overwrite it:
save_page(datetime.now().year, True)

In [7]:
# Explore an example year of results (2002 has very few votes for some reason):
with lzma.open('data/year/2002.json.xz', 'rb') as f:
    preview = json.loads(f.read().decode('utf-8'))

In [8]:
RenderJSON(preview) # Preview all

In [9]:
RenderJSON(preview['results'][0]['division']) # Preview single vote

In [4]:
# Load Data from a single file and return dataframe
def divisions_df(fname):
    with lzma.open(fname, 'rb') as f:
        data = json.loads(f.read().decode('utf-8'))
        #print(fname, len(data['results']))
        
        # We don't care about the "head" part for now, just "results"
        records = json.dumps([r['division'] for r in data['results']])
        df = pd.read_json(records, orient='records', dtype=False)
        return df

In [5]:
# Process all the files and stick them together in a single dataframe:
files = glob('data/year/*.json.xz')

data_frames = process_with_progress(divisions_df, files)
df_raw = pd.concat(data_frames, axis=0)




In [6]:
# Not all the columns are useful, we can transform and drop the redundant ones:
df_raw.sample(3)

Unnamed: 0,category,chamber,date,datetime,debate,house,isBill,outcome,subject,tallies,tellers,uri,voteId
207,Division,"{'showAs': 'Seanad Éireann', 'uri': '/ie/oirea...",1991-03-13,NaT,"{'debateSection': 'dbsect_3', 'showAs': 'Order...","{'showAs': '19th Seanad', 'houseNo': '19', 'ch...",False,Carried,"{'showAs': 'Question put.', 'uri': None}","{'staonVotes': None, 'taVotes': {'showAs': 'Tá...","Tellers: Tá, Senators Wright and Fitzgerald; N...",/ie/oireachtas/division/house/seanad/19/1991-0...,vote_1
40,Division,"{'showAs': 'Dáil Éireann', 'uri': '/ie/oireach...",1982-03-24,NaT,{'showAs': 'Private Members' Business. - Emplo...,"{'chamberType': 'house', 'showAs': '23rd Dáil'...",False,Carried,"{'showAs': 'Amendment put.', 'uri': None}",{'taVotes': {'members': [{'member': {'memberCo...,"Tellers: Tá, Deputies B. Ahern and Briscoe; Ní...",/ie/oireachtas/division/house/dail/23/1982-03-...,vote_1
5,Division,"{'showAs': 'Dáil Éireann', 'uri': '/ie/oireach...",1928-11-29,NaT,"{'debateSection': 'dbsect_16', 'showAs': 'PUBL...","{'committeeCode': '', 'houseNo': '6', 'showAs'...",False,_,"{'showAs': 'Amendment put.', 'uri': None}","{'nilVotes': {'showAs': 'Níl', 'members': [{'m...",Tellers:—Tá: Deputies Boland and Allen. Níl: ...,/ie/oireachtas/division/house/dail/6/1928-11-2...,vote_2


In [7]:
flatten_columns = ['chamber','debate','house','subject'] # deal with tallies later
df_raw = flatten_dataframe_columns(df_raw, flatten_columns)

# category is always "Division", isBill is always False, not used in "Divisions"
# debate.formats.pdf, subject.uri are always empty for this data
df_raw.drop(['category','isBill','debate.formats.pdf','subject.uri'], axis=1, inplace=True)

In [8]:
df_raw.sample(3)

Unnamed: 0,chamber.showAs,chamber.uri,debate.debateSection,debate.formats.xml.uri,debate.showAs,debate.uri,house.chamberType,house.committeeCode,house.houseCode,house.houseNo,house.showAs,house.uri,subject.showAs,date,datetime,outcome,tallies,tellers,uri,voteId
150,Seanad Éireann,/ie/oireachtas/house/seanad,dbsect_6,/akn/ie/debateRecord/seanad/2010-03-31/debate/...,Lost at Sea Scheme: Motion.,/akn/ie/debateRecord/seanad/2010-03-31/debate/...,house,,seanad,23,23rd Seanad,/ie/oireachtas/house/seanad/23,"The Seanad divided: Tá,",2010-03-31,NaT,Carried,"{'taVotes': {'showAs': 'Tá', 'tally': 29, 'mem...","Tellers: Tá, Senators Niall Ó Brolcháin and Di...",/ie/oireachtas/division/house/seanad/23/2010-0...,vote_1
38,Dáil Éireann,/ie/oireachtas/house/dail,dbsect_10,/akn/ie/debateRecord/dail/1924-11-20/debate/mu...,"LOCAL GOVERNMENT BILL, 1924,—THIRD STAGE (RESU...",/akn/ie/debateRecord/dail/1924-11-20/debate/main,house,,dail,4,4th Dáil,/ie/oireachtas/house/dail/4,"The Committee divided: Tá,",1924-11-20,NaT,_,"{'nilVotes': {'tally': 37, 'members': [{'membe...",,/ie/oireachtas/division/house/dail/4/1924-11-2...,vote_1
127,Dáil Éireann,/ie/oireachtas/house/dail,dbsect_19,/akn/ie/debateRecord/dail/2005-03-01/debate/mu...,Social Welfare and Pensions Bill 2005: Report ...,/akn/ie/debateRecord/dail/2005-03-01/debate/main,house,,dail,29,29th Dáil,/ie/oireachtas/house/dail/29,"The Dáil divided: Tá,",2005-03-01,NaT,Carried,"{'staonVotes': None, 'nilVotes': {'members': [...","Tellers: Tá, Deputies Kitt and Kelleher; Níl, ...",/ie/oireachtas/division/house/dail/29/2005-03-...,vote_1


In [9]:
describe_with_top_n(df_raw, exclude_columns=['tallies'], n=50)

----------

# chamber.showAs

###  Stats:

Unnamed: 0,chamber.showAs
count,11487
unique,69
top,Dáil Éireann
freq,7348
missing,0


###  Top N:

Unnamed: 0,chamber.showAs
Dáil Éireann,7348
Seanad Éireann,3546
SELECT COMMITTEE ON FINANCE AND THE PUBLIC SERVICE,71
SELECT COMMITTEE ON ENVIRONMENT AND LOCAL GOVERNMENT,61
"Select Sub-Committee on the Environment, Community and Local Government",45
"SELECT COMMITTEE ON JUSTICE, EQUALITY, DEFENCE AND WOMEN’S RIGHTS",33
SELECT COMMITTEE ON TRANSPORT,25
SELECT COMMITTEE ON EDUCATION AND SCIENCE,24
SELECT COMMITTEE ON ENTERPRISE AND SMALL BUSINESS,23
"SELECT COMMITTEE ON JUSTICE, EQUALITY AND WOMEN’S RIGHTS",22


----------

# chamber.uri

###  Stats:

Unnamed: 0,chamber.uri
count,11487
unique,3
top,/ie/oireachtas/house/dail
freq,7348
missing,0


###  Top N:

Unnamed: 0,chamber.uri
/ie/oireachtas/house/dail,7348
/ie/oireachtas/house/seanad,3546
/ie/oireachtas/committee,593


----------

# debate.debateSection

###  Stats:

Unnamed: 0,debate.debateSection
count,11487
unique,109
top,dbsect_3
freq,1356
missing,0


###  Top N:

Unnamed: 0,debate.debateSection
dbsect_3,1356
dbsect_2,691
dbsect_4,660
dbsect_5,540
dbsect_6,459
dbsect_8,396
dbsect_7,392
dbsect_10,301
dbsect_9,292
dbsect_11,270


----------

# debate.formats.xml.uri

###  Stats:

Unnamed: 0,debate.formats.xml.uri
count,11487
unique,5295
top,/akn/ie/debateRecord/select_sub-committee_on_t...
freq,41
missing,0


###  Top N:

Unnamed: 0,debate.formats.xml.uri
/akn/ie/debateRecord/select_sub-committee_on_the_environment_community_and_local_government/2012-01-18/debate/mul@/main.xml,41
/akn/ie/debateRecord/dail/1933-07-14/debate/mul@/main.xml,30
/akn/ie/debateRecord/dail/1935-07-05/debate/mul@/main.xml,22
/akn/ie/debateRecord/dail/1929-05-30/debate/mul@/main.xml,22
/akn/ie/debateRecord/dail/1967-07-21/debate/mul@/main.xml,20
/akn/ie/debateRecord/dail/1927-08-03/debate/mul@/main.xml,17
/akn/ie/debateRecord/dail/1923-07-12/debate/mul@/main.xml,17
/akn/ie/debateRecord/seanad/2015-12-01/debate/mul@/main.xml,17
/akn/ie/debateRecord/dail/1933-06-02/debate/mul@/main.xml,16
/akn/ie/debateRecord/dail/1928-06-20/debate/mul@/main.xml,16


----------

# debate.showAs

###  Stats:

Unnamed: 0,debate.showAs
count,11487
unique,6319
top,Order of Business
freq,512
missing,0


###  Top N:

Unnamed: 0,debate.showAs
Order of Business,512
Order of Business.,425
Order of Business.,137
"Livestock Marts Bill, 1967: Committee Stage (Resumed).",56
Suspension of Member.,49
Water Services (Amendment) Bill 2011 [Seanad]: Committee Stage (Resumed),41
Nomination of Taoiseach.,35
Order of Business (Resumed),29
Water Services Bill 2014: Committee Stage (Resumed),23
"Public Business. - Juries (Protection) Bill, 1929—Committee (Resumed).",22


----------

# debate.uri

###  Stats:

Unnamed: 0,debate.uri
count,11487
unique,5295
top,/akn/ie/debateRecord/select_sub-committee_on_t...
freq,41
missing,0


###  Top N:

Unnamed: 0,debate.uri
/akn/ie/debateRecord/select_sub-committee_on_the_environment_community_and_local_government/2012-01-18/debate/main,41
/akn/ie/debateRecord/dail/1933-07-14/debate/main,30
/akn/ie/debateRecord/dail/1929-05-30/debate/main,22
/akn/ie/debateRecord/dail/1935-07-05/debate/main,22
/akn/ie/debateRecord/dail/1967-07-21/debate/main,20
/akn/ie/debateRecord/dail/1923-07-12/debate/main,17
/akn/ie/debateRecord/seanad/2015-12-01/debate/main,17
/akn/ie/debateRecord/dail/1927-08-03/debate/main,17
/akn/ie/debateRecord/dail/1933-06-02/debate/main,16
/akn/ie/debateRecord/dail/1928-06-20/debate/main,16


----------

# house.chamberType

###  Stats:

Unnamed: 0,house.chamberType
count,11487
unique,2
top,house
freq,10894
missing,0


###  Top N:

Unnamed: 0,house.chamberType
house,10894
committee,593


----------

# house.committeeCode

###  Stats:

Unnamed: 0,house.committeeCode
count,11487.0
unique,66.0
top,
freq,10894.0
missing,10894.0


###  Top N:

Unnamed: 0,house.committeeCode
,10894
select_committee_on_finance_and_the_public_service,71
select_committee_on_environment_and_local_government,61
select_sub-committee_on_the_environment_community_and_local_government,45
"select_committee_on_justice,_equality,_defence_and_women’s_rights",33
select_committee_on_transport,25
select_committee_on_education_and_science,24
select_committee_on_enterprise_and_small_business,23
"select_committee_on_justice,_equality_and_women’s_rights",22
"select_committee_on_communications,_marine_and_natural_resources",19


----------

# house.houseCode

###  Stats:

Unnamed: 0,house.houseCode
count,11487
unique,2
top,dail
freq,7941
missing,0


###  Top N:

Unnamed: 0,house.houseCode
dail,7941
seanad,3546


----------

# house.houseNo

###  Stats:

Unnamed: 0,house.houseNo
count,11487
unique,36
top,24
freq,1349
missing,0


###  Top N:

Unnamed: 0,house.houseNo
24,1349
8,741
31,722
23,621
29,615
6,607
20,564
26,538
4,474
27,407


----------

# house.showAs

###  Stats:

Unnamed: 0,house.showAs
count,11487
unique,126
top,24th Seanad
freq,1032
missing,0


###  Top N:

Unnamed: 0,house.showAs
24th Seanad,1032
8th Dáil,721
6th Dáil,593
23rd Seanad,593
31st Dáil,577
26th Dáil,538
4th Dáil,471
29th Dáil,424
27th Dáil,400
20th Dáil,364


----------

# house.uri

###  Stats:

Unnamed: 0,house.uri
count,11487
unique,139
top,/ie/oireachtas/house/seanad/24
freq,1032
missing,0


###  Top N:

Unnamed: 0,house.uri
/ie/oireachtas/house/seanad/24,1032
/ie/oireachtas/house/dail/8,721
/ie/oireachtas/house/seanad/23,593
/ie/oireachtas/house/dail/6,593
/ie/oireachtas/house/dail/31,577
/ie/oireachtas/house/dail/26,538
/ie/oireachtas/house/dail/4,471
/ie/oireachtas/house/dail/29,424
/ie/oireachtas/house/dail/27,400
/ie/oireachtas/house/dail/20,364


----------

# subject.showAs

###  Stats:

Unnamed: 0,subject.showAs
count,11487
unique,1450
top,Question put.
freq,2626
missing,0


###  Top N:

Unnamed: 0,subject.showAs
Question put.,2626
Amendment put.,1944
Amendment put:,1097
"The Seanad divided: Tá,",676
"The Dáil divided: Tá,",634
"The Committee divided: Tá,",580
Question put:,411
"Question put: ""That the words proposed to be deleted stand.""",397
"Question put: ""That the motion, as amended, be agreed to.""",139
"The Select Committee divided: Tá,",104


----------

# date

###  Stats:

Unnamed: 0,date
count,11487
unique,4475
top,2012-01-18 00:00:00
freq,46
first,1922-09-11 00:00:00
last,2018-01-23 00:00:00
missing,0


###  Top N:

Unnamed: 0,date
2012-01-18,46
1933-07-14,30
1935-07-05,22
1929-05-30,22
2017-12-13,20
2014-12-17,20
1967-07-21,20
2015-12-01,20
1929-07-03,18
1927-08-03,17


----------

# datetime

###  Stats:

Unnamed: 0,datetime
count,2066
unique,705
top,2012-01-18 11:00:00
freq,41
first,2012-01-18 10:30:00
last,2018-01-23 14:00:00
missing,9421


###  Top N:

Unnamed: 0,datetime
2012-01-18 11:00:00,41
2015-12-01 14:35:00,17
2017-04-13 09:30:00,16
2017-05-18 11:00:00,14
2017-12-13 14:40:00,14
2014-12-19 10:00:00,14
2014-12-17 10:30:00,13
2015-12-02 09:30:00,12
2013-12-18 10:30:00,12
2015-07-09 08:30:00,12


----------

# outcome

###  Stats:

Unnamed: 0,outcome
count,11487
unique,3
top,Carried
freq,6294
missing,0


###  Top N:

Unnamed: 0,outcome
Carried,6294
Lost,3192
_,2001


----------

# tellers

###  Stats:

Unnamed: 0,tellers
count,11487.0
unique,5730.0
top,
freq,1301.0
missing,1301.0


###  Top N:

Unnamed: 0,tellers
,1301
Tellers:—Tá: Deputies Little and Smith; Níl: Deputies Doyle and Bennett.,196
"Tellers: Tá, Deputies Kelly and B. Desmond; Níl, Deputies Lalor and Browne.",105
"Tellers: Tá, Senators Ned O'Sullivan and Diarmuid Wilson; Níl, Senators Paul Coghlan and Aideen Hayden.",90
"Tellers: Tá, Senators Paul Coghlan and Aideen Hayden; Níl, Senators Ned O'Sullivan and Diarmuid Wilson.",85
Tellers:—Tá: Deputies Little and Traynor; Níl: Deputies Doyle and Bennett.,76
"Tellers: Tá, Deputies Kitt and Kelleher; Níl, Deputies Kehoe and Stagg.",73
"Tellers: Tá, Deputies J. Higgins and B. Fitzgerald; Níl, Deputies D. Ahern and Callely.",73
"Tellers: Tá, Deputies Emmet Stagg and Paul Kehoe; Níl, Deputies Aengus Ó Snodaigh and Seán Ó Fearghaíl.",67
"Tellers: Tá, Deputies Lalor and Browne; Níl, Deputies Kelly and B. Desmond.",53


----------

# uri

###  Stats:

Unnamed: 0,uri
count,11487
unique,11483
top,/ie/oireachtas/division/house/seanad/1934/1936...
freq,2
missing,0


###  Top N:

Unnamed: 0,uri
/ie/oireachtas/division/house/seanad/1934/1936-01-01/vote_1,2
/ie/oireachtas/division/house/seanad/1934/1936-01-01/vote_2,2
/ie/oireachtas/division/house/seanad/1934/1936-01-01/vote_4,2
/ie/oireachtas/division/house/seanad/1934/1936-01-01/vote_3,2
/ie/oireachtas/division/house/dail/10/1941-04-30/vote_1,1
/ie/oireachtas/division/house/dail/20/1977-02-22/vote_1,1
/ie/oireachtas/division/house/dail/26/1991-12-10/vote_2,1
/ie/oireachtas/division/house/dail/8/1935-07-05/vote_18,1
/ie/oireachtas/division/house/seanad/3/1943-02-04/vote_1,1
/ie/oireachtas/division/house/dail/27/1996-10-09/vote_3,1


----------

# voteId

###  Stats:

Unnamed: 0,voteId
count,11487
unique,195
top,vote_1
freq,5220
missing,0


###  Top N:

Unnamed: 0,voteId
vote_1,5220
vote_2,2631
vote_3,1372
vote_4,749
vote_5,434
vote_6,268
vote_7,155
vote_8,104
vote_9,72
vote_10,50


In [10]:
df_raw.to_pickle('divisions.p.xz', compression='xz') # gzip is faster, but xz makes much smaller files for git