In [1]:
# imports
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pyplot import show

%matplotlib inline
sns.set_context('notebook')
pd.options.mode.chained_assignment = None  # default='warn'
pd.set_option('display.max_columns', 500) # show more columns
nan = np.nan # store numpy.nan in 'nan'

In [2]:
raw_data = pd.read_csv('politique_data_utf_d.csv', sep=';', header=1)
raw_data.head(10)

Unnamed: 0,Bezirk (>>) / Gemeinde (......),Bezirk (>>) / Gemeinde (......).1,Jahr,Jahr.1,Partei,Partei.1,Ergebnis,Ergebnis.1,Unnamed: 8
0,101,>> Bezirk Affoltern,2015,2015,1,FDP/PLR (PRD),1,Parteistimmen,95805.0000000000
1,101,>> Bezirk Affoltern,2015,2015,1,FDP/PLR (PRD),2,Parteistärke in %,15.7782920000
2,101,>> Bezirk Affoltern,2015,2015,2,CVP/PDC,1,Parteistimmen,24001.0000000000
3,101,>> Bezirk Affoltern,2015,2015,2,CVP/PDC,2,Parteistärke in %,3.9527664000
4,101,>> Bezirk Affoltern,2015,2015,3,SP/PS,1,Parteistimmen,112705.0000000000
5,101,>> Bezirk Affoltern,2015,2015,3,SP/PS,2,Parteistärke in %,18.5615824000
6,101,>> Bezirk Affoltern,2015,2015,4,SVP/UDC,1,Parteistimmen,206032.0000000000
7,101,>> Bezirk Affoltern,2015,2015,4,SVP/UDC,2,Parteistärke in %,33.9317682000
8,101,>> Bezirk Affoltern,2015,2015,5,LPS/PLS,1,Parteistimmen,...
9,101,>> Bezirk Affoltern,2015,2015,5,LPS/PLS,2,Parteistärke in %,...


In [3]:
# there are no nones
sum(raw_data.isnull().any())

0

In [4]:
# keep only data from 2015 (2013 and 2014 are not in the dataset)
data = raw_data[raw_data['Jahr'] >= 2013]
raw_data['Jahr'].unique()

array([2015, 2011, 2007, 2003, 1999, 1995, 1991, 1987, 1983, 1979, 1975])

In [5]:
# remove the year columns
data.drop(['Jahr', 'Jahr.1'], axis=1, inplace=True)

In [6]:
data.columns[1]

'Bezirk (>>) / Gemeinde (......).1'

In [7]:
# remove 'Bezirk >>' (keep only communes ('...'))
data = data[data[data.columns[1]].str.find('...') >= 0]

In [8]:
# rename the columns
data.rename(columns={'Bezirk (>>) / Gemeinde (......)': 'commune_id', 
                   'Bezirk (>>) / Gemeinde (......).1': 'commune_name',
                   'Partei':'party_id',
                   'Partei.1':'party_name',
                   'Ergebnis':'result_id', 
                   'Ergebnis.1':'result_name',
                   'Unnamed: 8':'value'}, inplace=True)

In [9]:
#remove '....' from commune_name
def _remove_dots(row):
    return row['commune_name'].replace('...... ', '')
data.commune_name = data.apply(_remove_dots, axis=1)
data.head()

Unnamed: 0,commune_id,commune_name,party_id,party_name,result_id,result_name,value
528,1,Aeugst am Albis,1,FDP/PLR (PRD),1,Parteistimmen,4906.0
529,1,Aeugst am Albis,1,FDP/PLR (PRD),2,Parteistärke in %,18.69166
530,1,Aeugst am Albis,2,CVP/PDC,1,Parteistimmen,545.0
531,1,Aeugst am Albis,2,CVP/PDC,2,Parteistärke in %,2.0764278
532,1,Aeugst am Albis,3,SP/PS,1,Parteistimmen,4894.0


In [10]:
# '...' values mean 0, replace ... by 0
data.replace('...', 0.0, inplace=True)
data.value = pd.to_numeric(data['value'])

In [11]:
parties = data.party_name.unique()
parties

array(['FDP/PLR (PRD)', 'CVP/PDC', 'SP/PS', 'SVP/UDC', 'LPS/PLS',
       'LdU/AdI', 'EVP/PEV', 'CSP/PCS', 'GLP/PVL', 'BDP/PBD', 'PdA/PST',
       'PSA', 'POCH', 'GPS/PES', 'FGA/AVF', 'Sol.', 'SD/DS', 'Rep./Rép.',
       'EDU/UDF', 'FPS/PSL', 'Lega', 'MCR', 'Sep./Sép.', 'Übrige/Autres'], dtype=object)

In [12]:
for party in parties:
    data[party] = data.apply(lambda row: row.value if row.party_name == party else 0.0, axis=1)

In [13]:
#split into 2 df, one for 'parteistimmen' and the other for 'parteistärke %'
data_voices = data[data.result_name == 'Parteistimmen']
data_percent = data[data.result_name != 'Parteistimmen']
data_voices.head()

Unnamed: 0,commune_id,commune_name,party_id,party_name,result_id,result_name,value,FDP/PLR (PRD),CVP/PDC,SP/PS,SVP/UDC,LPS/PLS,LdU/AdI,EVP/PEV,CSP/PCS,GLP/PVL,BDP/PBD,PdA/PST,PSA,POCH,GPS/PES,FGA/AVF,Sol.,SD/DS,Rep./Rép.,EDU/UDF,FPS/PSL,Lega,MCR,Sep./Sép.,Übrige/Autres
528,1,Aeugst am Albis,1,FDP/PLR (PRD),1,Parteistimmen,4906.0,4906.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
530,1,Aeugst am Albis,2,CVP/PDC,1,Parteistimmen,545.0,0.0,545.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
532,1,Aeugst am Albis,3,SP/PS,1,Parteistimmen,4894.0,0.0,0.0,4894.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
534,1,Aeugst am Albis,4,SVP/UDC,1,Parteistimmen,8118.0,0.0,0.0,0.0,8118.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536,1,Aeugst am Albis,5,LPS/PLS,1,Parteistimmen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
data_percent.head()

Unnamed: 0,commune_id,commune_name,party_id,party_name,result_id,result_name,value,FDP/PLR (PRD),CVP/PDC,SP/PS,SVP/UDC,LPS/PLS,LdU/AdI,EVP/PEV,CSP/PCS,GLP/PVL,BDP/PBD,PdA/PST,PSA,POCH,GPS/PES,FGA/AVF,Sol.,SD/DS,Rep./Rép.,EDU/UDF,FPS/PSL,Lega,MCR,Sep./Sép.,Übrige/Autres
529,1,Aeugst am Albis,1,FDP/PLR (PRD),2,Parteistärke in %,18.69166,18.69166,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
531,1,Aeugst am Albis,2,CVP/PDC,2,Parteistärke in %,2.076428,0.0,2.076428,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
533,1,Aeugst am Albis,3,SP/PS,2,Parteistärke in %,18.64594,0.0,0.0,18.64594,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
535,1,Aeugst am Albis,4,SVP/UDC,2,Parteistärke in %,30.929249,0.0,0.0,0.0,30.929249,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
537,1,Aeugst am Albis,5,LPS/PLS,2,Parteistärke in %,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
data_voices.drop(['result_name', 'result_id'], axis=1, inplace=True)
data_percent.drop(['result_name', 'result_id'], axis=1, inplace=True)
data_percent.head()

Unnamed: 0,commune_id,commune_name,party_id,party_name,value,FDP/PLR (PRD),CVP/PDC,SP/PS,SVP/UDC,LPS/PLS,LdU/AdI,EVP/PEV,CSP/PCS,GLP/PVL,BDP/PBD,PdA/PST,PSA,POCH,GPS/PES,FGA/AVF,Sol.,SD/DS,Rep./Rép.,EDU/UDF,FPS/PSL,Lega,MCR,Sep./Sép.,Übrige/Autres
529,1,Aeugst am Albis,1,FDP/PLR (PRD),18.69166,18.69166,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
531,1,Aeugst am Albis,2,CVP/PDC,2.076428,0.0,2.076428,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
533,1,Aeugst am Albis,3,SP/PS,18.64594,0.0,0.0,18.64594,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
535,1,Aeugst am Albis,4,SVP/UDC,30.929249,0.0,0.0,0.0,30.929249,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
537,1,Aeugst am Albis,5,LPS/PLS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
data_percent = data_percent.drop(['commune_name', 'party_id', 'party_name', 'value'], axis=1).groupby(by='commune_id', axis=0, as_index=True).sum()
data_voices = data_voices.drop(['commune_name', 'party_id', 'party_name', 'value'], axis=1).groupby(by='commune_id', axis=0, as_index=True).sum()

In [17]:
data_percent.columns = ['percentages {}'.format(c) for c in data_percent.columns]
data_percent.head()

Unnamed: 0_level_0,percentages FDP/PLR (PRD),percentages CVP/PDC,percentages SP/PS,percentages SVP/UDC,percentages LPS/PLS,percentages LdU/AdI,percentages EVP/PEV,percentages CSP/PCS,percentages GLP/PVL,percentages BDP/PBD,percentages PdA/PST,percentages PSA,percentages POCH,percentages GPS/PES,percentages FGA/AVF,percentages Sol.,percentages SD/DS,percentages Rep./Rép.,percentages EDU/UDF,percentages FPS/PSL,percentages Lega,percentages MCR,percentages Sep./Sép.,percentages Übrige/Autres
commune_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
1,18.69166,2.076428,18.64594,30.929249,0.0,0.0,3.467063,0.0,8.43525,2.617442,0.167638,0.0,0.0,7.075094,0.0,0.0,0.312417,0.0,4.575761,0.0,0.0,0.0,0.0,3.006058
2,14.226624,4.585387,19.080314,33.785786,0.0,0.0,5.464827,0.0,7.357859,4.164299,0.190049,0.0,0.0,6.211047,0.0,0.0,0.19098,0.0,1.577217,0.0,0.0,0.0,0.0,3.165612
3,16.472636,3.378541,20.403264,29.100156,0.0,0.0,3.143003,0.0,11.862398,3.803109,0.112518,0.0,0.0,6.661066,0.0,0.0,0.166527,0.0,1.74928,0.0,0.0,0.0,0.0,3.147504
4,12.793652,2.881915,19.393304,34.937369,0.0,0.0,2.569875,0.0,8.748273,4.656087,0.193911,0.0,0.0,8.021664,0.0,0.0,0.184995,0.0,1.64044,0.0,0.0,0.0,0.0,3.978514
5,15.805827,3.918166,22.478008,30.114599,0.0,0.0,3.589299,0.0,9.625938,3.768864,0.227988,0.0,0.0,6.466387,0.0,0.0,0.018158,0.0,1.821887,0.0,0.0,0.0,0.0,2.164878


In [18]:
data_voices.columns = ['total voices {}'.format(c) for c in data_voices.columns]
data_voices.head()

Unnamed: 0_level_0,total voices FDP/PLR (PRD),total voices CVP/PDC,total voices SP/PS,total voices SVP/UDC,total voices LPS/PLS,total voices LdU/AdI,total voices EVP/PEV,total voices CSP/PCS,total voices GLP/PVL,total voices BDP/PBD,total voices PdA/PST,total voices PSA,total voices POCH,total voices GPS/PES,total voices FGA/AVF,total voices Sol.,total voices SD/DS,total voices Rep./Rép.,total voices EDU/UDF,total voices FPS/PSL,total voices Lega,total voices MCR,total voices Sep./Sép.,total voices Übrige/Autres
commune_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
1,4906.0,545.0,4894.0,8118.0,0.0,0.0,910.0,0.0,2214.0,687.0,44.0,0.0,0.0,1857.0,0.0,0.0,82.0,0.0,1201.0,0.0,0.0,0.0,0.0,789.0
2,15271.0,4922.0,20481.0,36266.0,0.0,0.0,5866.0,0.0,7898.0,4470.0,204.0,0.0,0.0,6667.0,0.0,0.0,205.0,0.0,1693.0,0.0,0.0,0.0,0.0,3398.0
3,10980.0,2252.0,13600.0,19397.0,0.0,0.0,2095.0,0.0,7907.0,2535.0,75.0,0.0,0.0,4440.0,0.0,0.0,111.0,0.0,1166.0,0.0,0.0,0.0,0.0,2098.0
4,5740.0,1293.0,8701.0,15675.0,0.0,0.0,1153.0,0.0,3925.0,2089.0,87.0,0.0,0.0,3599.0,0.0,0.0,83.0,0.0,736.0,0.0,0.0,0.0,0.0,1785.0
5,7834.0,1942.0,11141.0,14926.0,0.0,0.0,1779.0,0.0,4771.0,1868.0,113.0,0.0,0.0,3205.0,0.0,0.0,9.0,0.0,903.0,0.0,0.0,0.0,0.0,1073.0


In [19]:
data_voices.to_csv('cleaned_politique_party_total_voices2015.csv')
data_percent.to_csv('cleaned_politique_party_percentages2015.csv')