The data used here can be found [here](https://www.pxweb.bfs.admin.ch/Selection.aspx?px_language=fr&px_db=px-x-0702000000_104&px_tableid=px-x-0702000000_104\px-x-0702000000_104.px&px_type=PX).

We gathered the data from 2013-2015 (2016) is not available. However, the names of the municipalities are those from the year 2016 (or later). In ordrer to have some data for the right municipality names, we assume that the ratio cow/inhabitant doesn't change between the municipalities that merge together.

In [2]:
# imports
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pyplot import show
import re

In [3]:
df = pd.read_csv('../data/data_to_be_processed/raw_data/cows.csv', encoding='latin_1', skiprows=2, delimiter=';')

In [3]:
df = df.drop(['Système d\'exploitation'], axis=1)
df.columns = ['year', 'commune', 'cows']
df.head()


Unnamed: 0,year,commune,cows
0,2013,- Suisse,1557474
1,2013,- Zürich,93233
2,2013,>> Bezirk Affoltern,9315
3,2013,......0001 Aeugst am Albis,622
4,2013,......0002 Affoltern am Albis,596


### Preprocessing

In [4]:
#remove districts
df = df.loc[(df.commune.str.contains('>>') == False)]
#remove cantons
df = df.loc[(df.commune.str.contains('- ') == False)]
#remove swiss total
df = df[df['commune'] != '- Suisse']
#rewrite commune name
df['id'], df['commune'] = df['commune'].str.split(' ', 1).str
regex = re.compile(r"\.{6}")
df['id'].replace(to_replace=regex, value='', inplace=True, regex=True)
df['id'] = df['id'].astype(int)


df = df[~df.commune.str.isnumeric()]

In [5]:
f = pd.read_csv("../data/data_to_be_processed/cleaned_data/fusion_mappingsBefore02-01-2016.csv")
f = f[f.date.str.contains('2016')]
f.head()

Unnamed: 0,pre_id,pre_name,post_id,post_name,date
698,175,Kyburg,296,Illnau-Effretikon,01.01.2016
699,174,Illnau-Effretikon,296,Illnau-Effretikon,01.01.2016
700,532,Bangerten,310,Rapperswil (BE),01.01.2016
701,417,Niederösch,405,Ersigen,01.01.2016
702,419,Oberösch,405,Ersigen,01.01.2016


In [6]:
fu = pd.merge(f, df, left_on='post_name', right_on='commune', how='right')
fu[fu.id.duplicated()]
fu['commune'] = fu['pre_name'].fillna(fu['commune'])
fu['compute_ratio_id'] = fu['id']
fu['id'] = fu['pre_id'].fillna(fu['id'])
fu['id'] = fu['id'].astype(int)
fu['compute_ratio_id'] = fu['compute_ratio_id'].astype(int)
fu.drop(['pre_id', 'pre_name', 'post_id', 'post_name', 'date'], axis=1, inplace=True)
fu.head()

Unnamed: 0,year,commune,cows,id,compute_ratio_id
0,2013,Kyburg,1573,175,296
1,2013,Illnau-Effretikon,1573,174,296
2,2014,Kyburg,1590,175,296
3,2014,Illnau-Effretikon,1590,174,296
4,2015,Kyburg,1675,175,296


In [7]:
fu.shape[0] /3

2324.0

In [11]:
mun = pd.read_csv('../data/municipalities/2016/data_commune.csv')
mun.drop([mun.columns[i] for i in [1,3,4,5,6,7,8]], axis=1, inplace=True)
data = mun

fusion = pd.read_csv("../data/data_to_be_processed/cleaned_data/fusion_mappingsBefore02-01-2016csv")
fusion = fusion[fusion.date.str.contains('2016')]
fusion['post_id'] = fusion['post_id'].astype(int)
#if use again check if outer is the rigth join
merged = pd.merge(fusion, data, left_on='pre_id', right_on='id', how='outer')
merged['id'] = merged['post_id'].fillna(merged['id'])
merged['id'] = merged['id'].astype(int)
merged.drop(['pre_id', 'pre_name', 'post_id', 'post_name', 'date'], axis=1, inplace=True)
merged = merged.groupby(['id'], as_index=False).sum()
merged.head()

Unnamed: 0,id,total_inhabitants
0,1,1981
1,2,11707
2,3,5326
3,4,3477
4,5,3659


In [12]:
for year in range (2013, 2017):    
    #filter out by year
    if(year == 2016):
        tmp = fu[fu['year'] == 2015]
        tmp = tmp.drop('year', axis=1)
    else:
        tmp = fu[fu['year'] == year]
        tmp = tmp.drop('year', axis=1)
    cows = pd.merge(tmp, merged, left_on='compute_ratio_id', right_on='id', how='left')
    cows['cow_ratio'] = cows['cows'] / cows['total_inhabitants']
    cows.cow_ratio = cows.cow_ratio.round(2)
    cows.drop(['compute_ratio_id', 'id_y', 'total_inhabitants', 'cows'], axis=1, inplace=True)
    cows.columns = ['commune', 'id', 'cow_ratio']
    cows.to_csv('../data/municipalities/%d/data_cows.csv' % year, index=False)
cows.head()

Unnamed: 0,commune,id,cow_ratio
0,Kyburg,175,0.1
1,Illnau-Effretikon,174,0.1
2,Bangerten,532,0.66
3,Rapperswil (BE),310,0.66
4,Niederösch,417,0.5


In [13]:
cows[cows.cow_ratio > 2]

Unnamed: 0,commune,id,cow_ratio
215,La Ferrière,435,2.07
216,Mont-Tramelan,437,5.35
228,Champoz,683,2.20
241,Schelten,708,8.16
242,Seehof,709,7.74
245,Rebévelier,715,7.53
246,Petit-Val,716,3.27
285,Meienried,389,2.52
322,Oeschenbach,335,2.25
326,Rohrbachgraben,339,2.34
