In [53]:
import pandas as pd
import data_loading
import data_cleaning
from data_cleaning import full_names

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Source Vote Data

The main dataset we work with are the voting data for all National Council members between 2007 and 2019.

In [2]:
source_vote_data = data_loading.source_vote_data()
source_vote_data.head()

Unnamed: 0,AffairShortId,AffairTitle,VoteRegistrationNumber,VoteDate,VoteMeaningYes,VoteMeaningNo,DivisionText,VoteSubmissionText,VoteFilteredYes,VoteFilteredNo,...,VoteFilteredExcused,VoteFilteredPresident,CouncillorId,CouncillorName,CouncillorYes,CouncillorNo,CouncillorAbstain,CouncillorNotParticipated,CouncillorExcused,CouncillorPresident
0,20070464,Prorogation de la loi fédérale sur l'adaptatio...,248,Fri Dec 21 2007 09:50:38 GMT+0100 (heure norma...,,,Ja,Vote final,196,1,...,0,1,3923,Marra Ada,1,0,0,0,0,0
1,20070464,Prorogation de la loi fédérale sur l'adaptatio...,248,Fri Dec 21 2007 09:50:38 GMT+0100 (heure norma...,,,Ja,Vote final,196,1,...,0,1,3883,Glauser-Zufferey Alice,1,0,0,0,0,0
2,20070464,Prorogation de la loi fédérale sur l'adaptatio...,248,Fri Dec 21 2007 09:50:38 GMT+0100 (heure norma...,,,Ja,Vote final,196,1,...,0,1,3907,Thorens Goumaz Adèle,1,0,0,0,0,0
3,20070464,Prorogation de la loi fédérale sur l'adaptatio...,248,Fri Dec 21 2007 09:50:38 GMT+0100 (heure norma...,,,Ja,Vote final,196,1,...,0,1,3913,Wyss Brigit,1,0,0,0,0,0
4,20070464,Prorogation de la loi fédérale sur l'adaptatio...,248,Fri Dec 21 2007 09:50:38 GMT+0100 (heure norma...,,,Ja,Vote final,196,1,...,0,1,3878,Flückiger-Bäni Sylvia,1,0,0,0,0,0


The voting data has some minor issues:

- The 'VoteDate' is not formatted as a datetime
- Some duplicates are present
- 'Bernasconi Maria' changed her name to 'Roth-Bernasconi Maria'

### VoteDate
The vote dates are not parsed properly because they contain some additional text.

In [4]:
source_vote_data.loc[0, 'VoteDate']

'Fri Dec 21 2007 09:50:38 GMT+0100 (heure normale d’Europe centrale)'

### Duplicates Count
There are not too many duplicates, dropping them should not be an issue.

In [5]:
len(source_vote_data) - len(source_vote_data.drop_duplicates())

2200

### Roth-Bernasconi Maria
Listed as Roth-Bernasconi Maria on Wikipedia: https://de.wikipedia.org/wiki/Maria_Roth-Bernasconi

But Bernasconi Maria on the website of the Swiss Parlament: https://www.parlament.ch/de/biografie/maria-bernasconi/350

In [6]:
source_vote_data[source_vote_data['CouncillorName'] == 'Bernasconi Maria'].head(1)

Unnamed: 0,AffairShortId,AffairTitle,VoteRegistrationNumber,VoteDate,VoteMeaningYes,VoteMeaningNo,DivisionText,VoteSubmissionText,VoteFilteredYes,VoteFilteredNo,...,VoteFilteredExcused,VoteFilteredPresident,CouncillorId,CouncillorName,CouncillorYes,CouncillorNo,CouncillorAbstain,CouncillorNotParticipated,CouncillorExcused,CouncillorPresident
48,20070464,Prorogation de la loi fédérale sur l'adaptatio...,248,Fri Dec 21 2007 09:50:38 GMT+0100 (heure norma...,,,Ja,Vote final,196,1,...,0,1,350,Bernasconi Maria,1,0,0,0,0,0


# Vote Data
vote_data is the cleaned source_vote_data dataset

In [69]:
vote_data = data_cleaning.get_vote_data(source_vote_data)
vote_data.head()

Unnamed: 0,AffairShortId,AffairTitle,VoteRegistrationNumber,VoteDate,VoteMeaningYes,VoteMeaningNo,DivisionText,VoteSubmissionText,VoteFilteredYes,VoteFilteredNo,...,VoteFilteredExcused,VoteFilteredPresident,CouncillorId,CouncillorName,CouncillorYes,CouncillorNo,CouncillorAbstain,CouncillorNotParticipated,CouncillorExcused,CouncillorPresident
0,20070464,Prorogation de la loi fédérale sur l'adaptatio...,248,2007-12-21 09:50:38+00:00,,,Ja,Vote final,196,1,...,0,1,3923,Marra Ada,1,0,0,0,0,0
1,20070464,Prorogation de la loi fédérale sur l'adaptatio...,248,2007-12-21 09:50:38+00:00,,,Ja,Vote final,196,1,...,0,1,3883,Glauser-Zufferey Alice,1,0,0,0,0,0
2,20070464,Prorogation de la loi fédérale sur l'adaptatio...,248,2007-12-21 09:50:38+00:00,,,Ja,Vote final,196,1,...,0,1,3907,Thorens Goumaz Adèle,1,0,0,0,0,0
3,20070464,Prorogation de la loi fédérale sur l'adaptatio...,248,2007-12-21 09:50:38+00:00,,,Ja,Vote final,196,1,...,0,1,3913,Wyss Brigit,1,0,0,0,0,0
4,20070464,Prorogation de la loi fédérale sur l'adaptatio...,248,2007-12-21 09:50:38+00:00,,,Ja,Vote final,196,1,...,0,1,3878,Flückiger-Bäni Sylvia,1,0,0,0,0,0


# Source Members

The second dataset we work with are the background information on parliament members in the Swiss parliament.
The dataset contains information from 1884-2019 and entries for the National Council, the Council of States and the Federal Council.

In [71]:
source_members = data_loading.source_members()
source_members.head()

Unnamed: 0,Active,FirstName,LastName,GenderAsString,CantonName,CantonAbbreviation,CouncilName,ParlGroupName,ParlGroupAbbreviation,PartyName,PartyAbbreviation,MaritalStatusText,BirthPlace_City,BirthPlace_Canton,Mandates,DateJoining,DateLeaving,Citizenship,DateOfBirth,DateOfDeath
0,False,Giuseppe,a Marca,m,Grisons,GR,Conseil des Etats,Centre,MC,Conservateurs,Cons*,,,,,1849-12-01,1851-07-01,Soazza (GR),1799-07-29,1866-07-16
1,False,Alois,Ab Yberg,m,Schwyz,SZ,Conseil national,Groupe radical-démocratique,R,Parti radical-démocratique suisse,PRD,,,,,1928-12-03,1935-12-01,Schwyz (SZ),1878-10-06,1959-10-17
2,False,Fabio,Abate,m,Tessin,TI,Conseil national,Groupe radical-libéral,RL,Parti radical-démocratique suisse,PRD,,Locarno,Tessin,Incarichi esecutivi presso il comune di Locarn...,2007-12-03,2011-12-04,Cabbio (TI),1966-01-04,NaT
3,False,Fabio,Abate,m,Tessin,TI,Conseil national,Groupe radical-libéral,R,Parti radical-démocratique suisse,PRD,,Locarno,Tessin,Incarichi esecutivi presso il comune di Locarn...,2000-09-25,2003-11-30,Cabbio (TI),1966-01-04,NaT
4,False,Fabio,Abate,m,Tessin,TI,Conseil des Etats,Groupe libéral-radical,RL,PLR.Les Libéraux-Radicaux,PLR,,Locarno,Tessin,Incarichi esecutivi presso il comune di Locarn...,2011-12-05,2015-11-29,Cabbio (TI),1966-01-04,NaT


We are only interested in the data between 2007 and 2019. As usual, there are some issues with the data:

- 'Imfeld Adrian' is actually called 'Imfeld Adriano'
- 'Bignasca Giuliano' is completely missing from the dataset for the period between 2007-2019
- 'Bruderer Pascale' is also present as 'Bruderer Wyss Pascale'
- 'Diener Verena' is also present as 'Diener Lenz Verena'

### Imfeld Adrian

The name in vote_data does not match the name in source_members.

According to Wikipedia he is called Adriano: https://de.wikipedia.org/wiki/Adriano_Imfeld

In [72]:
source_members[full_names(source_members) == 'Imfeld Adrian'].head(1)

Unnamed: 0,Active,FirstName,LastName,GenderAsString,CantonName,CantonAbbreviation,CouncilName,ParlGroupName,ParlGroupAbbreviation,PartyName,PartyAbbreviation,MaritalStatusText,BirthPlace_City,BirthPlace_Canton,Mandates,DateJoining,DateLeaving,Citizenship,DateOfBirth,DateOfDeath
2875,False,Adrian,Imfeld,m,Obwald,OW,Conseil national,Groupe démocrate-chrétien,C,Parti démocrate-chrétien suisse,PDC,,Bâle,Bâle-Ville,1986-1990 Einwohnergemeinderat von Sarnen; 199...,2001-11-16,2003-11-30,Lungern (OW),1954-12-04,NaT


In [73]:
vote_data.loc[vote_data['CouncillorName'] == 'Imfeld Adriano', 'CouncillorName'].head(1)

2903    Imfeld Adriano
Name: CouncillorName, dtype: object

### Bignasca Giulino
There are no entries in the source_members DataFrame for Bignasca Giuliano after 2003. But he voted in the National Council in 2007.

In [74]:
vote_data.loc[vote_data['CouncillorName'] == 'Bignasca Giuliano', 'VoteDate'].min()

Timestamp('2007-03-05 16:28:58+0000', tz='GMT')

In [75]:
source_members.loc[full_names(source_members) == 'Bignasca Giuliano', 'DateLeaving'].max()

datetime.date(2003, 11, 30)

### Bruderer Pascale / Diener Verena

They changed family names between their time in the Coucil of States and National Council

In [76]:
# Bruderer Pascale
source_members.loc[full_names(source_members) =='Bruderer Pascale'].head(1)

Unnamed: 0,Active,FirstName,LastName,GenderAsString,CantonName,CantonAbbreviation,CouncilName,ParlGroupName,ParlGroupAbbreviation,PartyName,PartyAbbreviation,MaritalStatusText,BirthPlace_City,BirthPlace_Canton,Mandates,DateJoining,DateLeaving,Citizenship,DateOfBirth,DateOfDeath
774,False,Pascale,Bruderer,f,Argovie,AG,Conseil national,Groupe socialiste,S,Parti socialiste suisse,PSS,,Baden,Argovie,Legislative der Gemeinde (Einwohnerrätin) Bade...,2002-04-15,2003-11-30,"Rorschach (SG),Baden (AG),Baar (ZG),Römerswil ...",1977-07-28,NaT


In [77]:
# Bruderer Wyss Pascale
source_members.loc[full_names(source_members) == 'Bruderer Wyss Pascale'].head(1)

Unnamed: 0,Active,FirstName,LastName,GenderAsString,CantonName,CantonAbbreviation,CouncilName,ParlGroupName,ParlGroupAbbreviation,PartyName,PartyAbbreviation,MaritalStatusText,BirthPlace_City,BirthPlace_Canton,Mandates,DateJoining,DateLeaving,Citizenship,DateOfBirth,DateOfDeath
778,False,Pascale,Bruderer Wyss,f,Argovie,AG,Conseil des Etats,Groupe socialiste,S,Parti socialiste suisse,PSS,,Baden,Argovie,Legislative der Gemeinde (Einwohnerrätin) Bade...,2011-12-05,2015-11-29,"Rorschach (SG),Baden (AG),Baar (ZG),Römerswil ...",1977-07-28,NaT


In [78]:
# Diener Verena
source_members.loc[full_names(source_members) == 'Diener Verena'].head(1)

Unnamed: 0,Active,FirstName,LastName,GenderAsString,CantonName,CantonAbbreviation,CouncilName,ParlGroupName,ParlGroupAbbreviation,PartyName,PartyAbbreviation,MaritalStatusText,BirthPlace_City,BirthPlace_Canton,Mandates,DateJoining,DateLeaving,Citizenship,DateOfBirth,DateOfDeath
1445,False,Verena,Diener,f,Zurich,ZH,Conseil national,Groupe PDC/PEV/PVL,CEg,Parti vert'libéral,pvl,,Zurich,Zurich,Exekutive des Kantons (Regierungsrätin): von A...,2007-12-03,2007-12-05,Maur (ZH),1949-03-27,NaT


In [79]:
# Diener Lenz Verena
source_members.loc[full_names(source_members) == 'Diener Lenz Verena'].head(1)

Unnamed: 0,Active,FirstName,LastName,GenderAsString,CantonName,CantonAbbreviation,CouncilName,ParlGroupName,ParlGroupAbbreviation,PartyName,PartyAbbreviation,MaritalStatusText,BirthPlace_City,BirthPlace_Canton,Mandates,DateJoining,DateLeaving,Citizenship,DateOfBirth,DateOfDeath
1449,False,Verena,Diener Lenz,f,Zurich,ZH,Conseil des Etats,Groupe vert'libéral,GL,Parti vert'libéral,pvl,,Zurich,Zurich,Exekutive des Kantons (Regierungsrätin): von A...,2011-12-05,2015-11-29,Maur (ZH),1949-03-27,NaT


# Members

members is the cleaned source_members dataset. 

Note that members from all three councils are present.

In [80]:
members = data_cleaning.get_members(source_members, vote_data)
members.head()

Unnamed: 0,Active,FirstName,LastName,GenderAsString,CantonName,CantonAbbreviation,CouncilName,ParlGroupName,ParlGroupAbbreviation,PartyName,PartyAbbreviation,MaritalStatusText,BirthPlace_City,BirthPlace_Canton,Mandates,DateJoining,DateLeaving,Citizenship,DateOfBirth,DateOfDeath
0,False,Fabio,Abate,m,Tessin,TI,Conseil national,Groupe radical-libéral,RL,Parti radical-démocratique suisse,PRD,,Locarno,Tessin,Incarichi esecutivi presso il comune di Locarn...,2007-12-03,2011-12-04,Cabbio (TI),1966-01-04,NaT
1,False,Fabio,Abate,m,Tessin,TI,Conseil des Etats,Groupe libéral-radical,RL,PLR.Les Libéraux-Radicaux,PLR,,Locarno,Tessin,Incarichi esecutivi presso il comune di Locarn...,2011-12-05,2015-11-29,Cabbio (TI),1966-01-04,NaT
2,True,Fabio,Abate,m,Tessin,TI,Conseil des Etats,Groupe libéral-radical,RL,PLR.Les Libéraux-Radicaux,PLR,,Locarno,Tessin,Incarichi esecutivi presso il comune di Locarn...,2015-11-30,2019-12-01,Cabbio (TI),1966-01-04,NaT
3,False,Fabio,Abate,m,Tessin,TI,Conseil national,Groupe radical-libéral,R,Parti radical-démocratique suisse,PRD,,Locarno,Tessin,Incarichi esecutivi presso il comune di Locarn...,2003-12-01,2007-12-02,Cabbio (TI),1966-01-04,NaT
4,True,Jean-Luc,Addor,m,Valais,VS,Conseil national,Groupe de l'Union démocratique du Centre,V,Union Démocratique du Centre,UDC,,Lausanne,Vaud,Député au Grand Conseil: 2005-2015; Conseiller...,2015-11-30,2019-12-01,"Ste-Croix (VD),Savièse (VS)",1964-04-22,NaT


# National Council Members

National council members are stored in the 'nc_members' DataFrame.

In [81]:
nc_members = data_cleaning.get_national_council_members(members)
nc_members.head()

Unnamed: 0,Active,FirstName,LastName,GenderAsString,CantonName,CantonAbbreviation,CouncilName,ParlGroupName,ParlGroupAbbreviation,PartyName,PartyAbbreviation,MaritalStatusText,BirthPlace_City,BirthPlace_Canton,Mandates,DateJoining,DateLeaving,Citizenship,DateOfBirth,DateOfDeath
0,False,Fabio,Abate,m,Tessin,TI,Conseil national,Groupe radical-libéral,RL,Parti radical-démocratique suisse,PRD,,Locarno,Tessin,Incarichi esecutivi presso il comune di Locarn...,2007-12-03,2011-12-04,Cabbio (TI),1966-01-04,NaT
1,False,Fabio,Abate,m,Tessin,TI,Conseil national,Groupe radical-libéral,R,Parti radical-démocratique suisse,PRD,,Locarno,Tessin,Incarichi esecutivi presso il comune di Locarn...,2003-12-01,2007-12-02,Cabbio (TI),1966-01-04,NaT
2,True,Jean-Luc,Addor,m,Valais,VS,Conseil national,Groupe de l'Union démocratique du Centre,V,Union Démocratique du Centre,UDC,,Lausanne,Vaud,Député au Grand Conseil: 2005-2015; Conseiller...,2015-11-30,2019-12-01,"Ste-Croix (VD),Savièse (VS)",1964-04-22,NaT
3,False,Jean-Luc,Addor,m,Valais,VS,Conseil national,,,Union Démocratique du Centre,UDC,,Lausanne,Vaud,Député au Grand Conseil: 2005-2015; Conseiller...,2019-12-02,NaT,"Ste-Croix (VD),Savièse (VS)",1964-04-22,NaT
4,False,Andreas,Aebi,m,Berne,BE,Conseil national,Groupe de l'Union démocratique du Centre,V,Union Démocratique du Centre,UDC,,Burgdorf,Berne,Legislative der Gemeinde: seit Januar 2000; Ge...,2007-12-03,2011-12-04,Wynigen (BE),1958-11-26,NaT


# Full Votes

We join the vote data and the national council members datasets to get one single dataset.

In [85]:
full_votes = data_cleaning.get_full_votes(vote_data, nc_members)
full_votes.head()

Unnamed: 0,Active,AffairShortId,AffairTitle,BirthPlace_Canton,BirthPlace_City,CantonAbbreviation,CantonName,Citizenship,CouncilName,CouncillorAbstain,...,VoteFilteredAbstain,VoteFilteredExcused,VoteFilteredNo,VoteFilteredNotParticipated,VoteFilteredPresident,VoteFilteredYes,VoteMeaningNo,VoteMeaningYes,VoteRegistrationNumber,VoteSubmissionText
0,False,20070464,Prorogation de la loi fédérale sur l'adaptatio...,Vaud,Lausanne,VD,Vaud,Paudex (VD),Conseil national,0,...,1,0,1,1,1,196,,,248,Vote final
1,False,20070464,Prorogation de la loi fédérale sur l'adaptatio...,Valais,Sierre,VD,Vaud,Champvent (VD),Conseil national,0,...,1,0,1,1,1,196,,,248,Vote final
2,False,20070464,Prorogation de la loi fédérale sur l'adaptatio...,Soleure,Soleure,VD,Vaud,"Ste-Croix (VD),Villarzel (VD)",Conseil national,0,...,1,0,1,1,1,196,,,248,Vote final
3,False,20070464,Prorogation de la loi fédérale sur l'adaptatio...,Soleure,Lüsslingen,SO,Soleure,Lüsslingen (SO),Conseil national,0,...,1,0,1,1,1,196,,,248,Vote final
4,False,20070464,Prorogation de la loi fédérale sur l'adaptatio...,Argovie,Aarau,AG,Argovie,"Schöftland (AG),Rüegsau (BE)",Conseil national,0,...,1,0,1,1,1,196,,,248,Vote final


Check that every full_votes entry has a corresponding entry in vote_data

In [88]:
len(full_votes.index ^ vote_data.index) == 0

True