Data from https://www.pxweb.bfs.admin.ch/default.aspx

In [1]:
# imports
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pyplot import show

%matplotlib inline
sns.set_context('notebook')
pd.options.mode.chained_assignment = None  # default='warn'
pd.set_option('display.max_columns', 500) # show more columns
nan = np.nan # store numpy.nan in 'nan'

In [2]:
raw_o40= pd.read_csv('../data/data_to_be_processed/raw_data/deaths_over40_utf8_raw.csv', sep=';', header=1)
raw_sub40 = pd.read_csv('../data/data_to_be_processed/raw_data/deaths_sub40_utf8_raw.csv', sep=';', header=1)
raw_tot = pd.read_csv('../data/data_to_be_processed/raw_data/deaths_total_utf8_raw.csv', sep=';', header=1)

# rename the columns
d = {'Jahr':'year', 'Kanton (-) / Bezirk (>>) / Gemeinde (......)': 'commune_name', 
     'Zivilstand - Total Unter 20 Jahre':'death age 0-20',
     'Zivilstand - Total 20-29 Jahre':'death age 20-29',
     'Zivilstand - Total 30-39 Jahre':'death age 30-39',
                   'Zivilstand - Total 40-49 Jahre':'death age 40-49',
                   'Zivilstand - Total 50-59 Jahre':'death age 50-59',
                    'Zivilstand - Total 60-69 Jahre':'death age 60-69',
                    'Zivilstand - Total 70-79 Jahre':'death age 70-79',
                    'Zivilstand - Total 80-89 Jahre':'death age 80-89',
                    'Zivilstand - Total 80 Jahre und mehr':'death age 90+',
    'Zivilstand - Total Altersklasse - Total':'total deaths'}
raw_sub40.rename(columns=d, inplace=True)
raw_o40.rename(columns=d, inplace=True)
raw_tot.rename(columns=d, inplace=True)

raw_o40.drop(['Geschlecht', 'Staatsangehörigkeit'], inplace=True, axis=1)
raw_sub40.drop(['Geschlecht', 'Staatsangehörigkeit'], inplace=True, axis=1)
raw_tot.drop(['Geschlecht'], inplace=True, axis=1)


In [3]:
# remove cantons and bezirk and remove '......' from names
def _remove_dots(row):
    return row['commune_name'].replace('......', '')
def _keep_only_communes(df):
    tmp = df[df['commune_name'].str.find('....') >= 0]
    tmp['commune_name'] = tmp.apply(_remove_dots, axis=1)
    return tmp

In [4]:
raw_o40 = _keep_only_communes(raw_o40)
raw_sub40 = _keep_only_communes(raw_sub40)
raw_tot = _keep_only_communes(raw_tot)

In [5]:
raw_o40.head()

Unnamed: 0,year,commune_name,death age 40-49,death age 50-59,death age 60-69,death age 70-79,death age 80-89,Zivilstand - Total 90 Jahre und mehr
3,2013,0001 Aeugst am Albis,1,1,0,3,7,1
4,2013,0002 Affoltern am Albis,1,3,10,19,28,13
5,2013,0003 Bonstetten,2,1,3,6,6,6
6,2013,0004 Hausen am Albis,2,1,8,8,10,5
7,2013,0005 Hedingen,1,5,2,4,5,5


In [6]:
raw_sub40.head()

Unnamed: 0,year,commune_name,death age 0-20,death age 20-29,death age 30-39
3,2013,0001 Aeugst am Albis,0,0,0
4,2013,0002 Affoltern am Albis,0,1,0
5,2013,0003 Bonstetten,0,0,0
6,2013,0004 Hausen am Albis,0,1,0
7,2013,0005 Hedingen,0,0,0


In [7]:
raw_tot.head()

Unnamed: 0,year,commune_name,Staatsangehörigkeit,total deaths
9,2013,0001 Aeugst am Albis,Staatsangehörigkeit - Total,13
10,2013,0001 Aeugst am Albis,Schweiz,12
11,2013,0001 Aeugst am Albis,Ausland,1
12,2013,0002 Affoltern am Albis,Staatsangehörigkeit - Total,75
13,2013,0002 Affoltern am Albis,Schweiz,67


Continue only with total deaths

In [8]:
# parse commune_id
def _parse_commune_id(row):
    return int(row['commune_name'].split()[0])

def _remove_id_from_name(row):
    return ' '.join(row['commune_name'].split()[1:])

In [9]:
data = raw_tot
data['id'] = data.apply(_parse_commune_id, axis=1)
data['commune_name'] = data.apply(_remove_id_from_name, axis=1)

In [10]:
data = data[data['Staatsangehörigkeit'] == 'Staatsangehörigkeit - Total']
data.drop(['Staatsangehörigkeit', 'commune_name'], axis=1, inplace=True)
data.head()

Unnamed: 0,year,total deaths,id
9,2013,13,1
12,2013,75,2
15,2013,24,3
18,2013,35,4
21,2013,22,5


In [11]:
#split years:
def _split_years(year):
    return data[data['year'] == year]
data2013 = _split_years(2013)
data2014 = _split_years(2014)
data2015 = _split_years(2015)
data2013.drop('year', axis=1, inplace=True)
data2014.drop('year', axis=1, inplace=True)
data2015.drop('year', axis=1, inplace=True)

In [12]:
data2013.head()

Unnamed: 0,total deaths,id
9,13,1
12,75,2
15,24,3
18,35,4
21,22,5


In [13]:
data2013.to_csv("../data/municipalities/2013/deaths.csv", index=False)
data2014.to_csv("../data/municipalities/2014/deaths.csv", index=False)
data2015.to_csv("../data/municipalities/2015/deaths.csv", index=False)
