In [1]:
# imports
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pyplot import show

%matplotlib inline
sns.set_context('notebook')
pd.options.mode.chained_assignment = None  # default='warn'
pd.set_option('display.max_columns', 500) # show more columns
nan = np.nan # store numpy.nan in 'nan'

In [2]:
data_to_merge = pd.read_csv('cleaned_data/2015/cleaned_politique_party_percentages2015.csv')
data_commune = pd.read_csv('../municipalities/2015/orig_data_commune.csv')
data_to_merge.rename(columns={'commune_id':'id'}, inplace=True)

In [3]:
data_to_merge.head()

Unnamed: 0,id,FDP/PLR (PRD),CVP/PDC,SP/PS,SVP/UDC,GLP/PVL,BDP/PBD,GPS/PES,Other/Autres
0,1,18.69,2.08,18.65,30.93,8.44,2.62,7.08,11.54
1,2,14.23,4.59,19.08,33.79,7.36,4.16,6.21,10.59
2,3,16.47,3.38,20.4,29.1,11.86,3.8,6.66,8.32
3,4,12.79,2.88,19.39,34.94,8.75,4.66,8.02,8.56
4,5,15.81,3.92,22.48,30.11,9.63,3.77,6.47,7.82


In [4]:
data_commune.head()

Unnamed: 0,commune,id,total_inhabitants,percentage_18,percentage_40,percentage_65,percentage_100,percentage_men,percentage_swiss
0,Aeugst am Albis,1,1981,0.19,0.23,0.42,0.16,0.5,0.87
1,Affoltern am Albis,2,11707,0.18,0.3,0.35,0.17,0.5,0.73
2,Bonstetten,3,5326,0.22,0.26,0.37,0.15,0.49,0.86
3,Hausen am Albis,4,3477,0.2,0.23,0.4,0.17,0.49,0.86
4,Hedingen,5,3659,0.21,0.25,0.39,0.15,0.5,0.85


In [5]:
merged = data_commune.merge(data_to_merge, on='id')
merged.head()

Unnamed: 0,commune,id,total_inhabitants,percentage_18,percentage_40,percentage_65,percentage_100,percentage_men,percentage_swiss,FDP/PLR (PRD),CVP/PDC,SP/PS,SVP/UDC,GLP/PVL,BDP/PBD,GPS/PES,Other/Autres
0,Aeugst am Albis,1,1981,0.19,0.23,0.42,0.16,0.5,0.87,18.69,2.08,18.65,30.93,8.44,2.62,7.08,11.54
1,Affoltern am Albis,2,11707,0.18,0.3,0.35,0.17,0.5,0.73,14.23,4.59,19.08,33.79,7.36,4.16,6.21,10.59
2,Bonstetten,3,5326,0.22,0.26,0.37,0.15,0.49,0.86,16.47,3.38,20.4,29.1,11.86,3.8,6.66,8.32
3,Hausen am Albis,4,3477,0.2,0.23,0.4,0.17,0.49,0.86,12.79,2.88,19.39,34.94,8.75,4.66,8.02,8.56
4,Hedingen,5,3659,0.21,0.25,0.39,0.15,0.5,0.85,15.81,3.92,22.48,30.11,9.63,3.77,6.47,7.82


In [6]:
# all merged and no left out (all outputs must be True)
print(sum(merged['id'] != merged['id']) == 0)
print(len(merged) == len(data_commune) == len(data_to_merge))

True
False


In [7]:
if 'commune_id' in merged.columns:
    merged.drop('commune_id', axis=1, inplace=True)

In [8]:
merged.head()

Unnamed: 0,commune,id,total_inhabitants,percentage_18,percentage_40,percentage_65,percentage_100,percentage_men,percentage_swiss,FDP/PLR (PRD),CVP/PDC,SP/PS,SVP/UDC,GLP/PVL,BDP/PBD,GPS/PES,Other/Autres
0,Aeugst am Albis,1,1981,0.19,0.23,0.42,0.16,0.5,0.87,18.69,2.08,18.65,30.93,8.44,2.62,7.08,11.54
1,Affoltern am Albis,2,11707,0.18,0.3,0.35,0.17,0.5,0.73,14.23,4.59,19.08,33.79,7.36,4.16,6.21,10.59
2,Bonstetten,3,5326,0.22,0.26,0.37,0.15,0.49,0.86,16.47,3.38,20.4,29.1,11.86,3.8,6.66,8.32
3,Hausen am Albis,4,3477,0.2,0.23,0.4,0.17,0.49,0.86,12.79,2.88,19.39,34.94,8.75,4.66,8.02,8.56
4,Hedingen,5,3659,0.21,0.25,0.39,0.15,0.5,0.85,15.81,3.92,22.48,30.11,9.63,3.77,6.47,7.82


In [9]:
[i for i in np.array(data_commune['id']) if i not in np.array(data_to_merge['id'])]

[296]

In [10]:
# not in data_to_merge:
data_commune[data_commune.id == 296]

Unnamed: 0,commune,id,total_inhabitants,percentage_18,percentage_40,percentage_65,percentage_100,percentage_men,percentage_swiss
119,Illnau-Effretikon,296,16327,0.18,0.3,0.33,0.2,0.49,0.74


In [11]:
not_in_data_commune = [i for i in np.array(data_to_merge['id']) if i not in np.array(data_commune['id'])]
np.array(not_in_data_commune)

array([9012, 9022, 9030, 9040, 9052, 9100, 9112, 9120, 9160, 9161, 9162,
       9170, 9182, 9190, 9200, 9211, 9212, 9220, 9222, 9230, 9232, 9250,
       9252])

In [12]:
len(not_in_data_commune)

23

In [13]:
data_to_merge[data_to_merge['id'].isin(not_in_data_commune)]

Unnamed: 0,id,FDP/PLR (PRD),CVP/PDC,SP/PS,SVP/UDC,GLP/PVL,BDP/PBD,GPS/PES,Other/Autres
2324,9012,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2325,9022,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2326,9030,16.88,16.66,19.76,20.65,7.78,2.21,14.5,1.57
2327,9040,0.0,25.32,0.0,26.58,0.0,0.0,25.32,22.78
2328,9052,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2329,9100,10.98,21.11,23.96,25.47,4.15,1.08,9.78,3.47
2330,9112,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2331,9120,19.98,5.14,34.68,14.89,7.11,1.25,11.58,5.37
2332,9160,0.0,62.79,32.56,0.0,0.0,0.0,0.0,4.65
2333,9161,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
merged.shape

(2324, 17)

In [15]:
data_commune.shape

(2325, 9)

In [16]:
data_to_merge.shape

(2347, 9)

In [17]:
(merged.isnull() == True).any()

commune              False
id                   False
total_inhabitants    False
percentage_18        False
percentage_40        False
percentage_65        False
percentage_100       False
percentage_men       False
percentage_swiss     False
FDP/PLR (PRD)        False
CVP/PDC              False
SP/PS                False
SVP/UDC              False
GLP/PVL              False
BDP/PBD              False
GPS/PES              False
Other/Autres         False
dtype: bool

In [19]:
merged.to_csv('../municipalities/2015/data_commune.csv', index=False)