# Slovakia

Note: Valaškovce (vojenský obvod) has 0 population.

In [2]:
import pandas as pd
import geopandas as gpd
from bs4 import BeautifulSoup
import numpy as np

pd.options.mode.chained_assignment = None

## Fetch results

In [382]:
selects = """<select id="obvodSelect" class="ObvodSelect">    <option value="3216">Bratislava</option><option value="3217">Malacky</option><option value="3218">Pezinok</option><option value="3219">Senec</option></select>
<select id="obvodSelect" class="ObvodSelect">    <option value="3220">Dunajská Streda</option><option value="3221">Galanta</option><option value="3222">Piešťany</option><option value="3223">Senica</option><option value="3224">Trnava</option></select>
<select id="obvodSelect" class="ObvodSelect">    <option value="3225">Bánovce nad Bebravou</option><option value="3226">Nové Mesto nad Váhom</option><option value="3227">Považská Bystrica</option><option value="3228">Prievidza</option><option value="3229">Trenčín</option></select>
<select id="obvodSelect" class="ObvodSelect">    <option value="3230">Komárno</option><option value="3231">Levice</option><option value="3232">Nitra</option><option value="3233">Nové Zámky</option><option value="3234">Šaľa</option><option value="3235">Topoľčany</option></select>
<select id="obvodSelect" class="ObvodSelect">    <option value="3236">Čadca</option><option value="3237">Dolný Kubín</option><option value="3238">Liptovský Mikuláš</option><option value="3239">Martin</option><option value="3240">Námestovo</option><option value="3241">Ružomberok</option><option value="3242">Žilina</option></select>
<select id="obvodSelect" class="ObvodSelect">    <option value="3243">Banská Bystrica</option><option value="3244">Brezno</option><option value="3245">Lučenec</option><option value="3246">Rimavská Sobota</option><option value="3247">Veľký Krtíš</option><option value="3248">Zvolen</option><option value="3249">Žiar nad Hronom</option></select>
<select id="obvodSelect" class="ObvodSelect">    <option value="3250">Bardejov</option><option value="3251">Humenné</option><option value="3252">Kežmarok</option><option value="3253">Poprad</option><option value="3254">Prešov</option><option value="3255">Stará Ľubovňa</option><option value="3256">Stropkov</option><option value="3257">Svidník</option><option value="3258">Vranov nad Topľou</option></select>
<select id="obvodSelect" class="ObvodSelect">    <option value="3259">Košice</option><option value="3260">Košice - okolie</option><option value="3261">Michalovce</option><option value="3262">Rožňava</option><option value="3263">Spišská Nová Ves</option><option value="3264">Trebišov</option></select>"""

In [383]:
h = BeautifulSoup("""<select id="krajSelect" class="KrajSelect"><option value="3129">Bratislavský kraj</option>
<option value="3130">Trnavský kraj</option>
<option value="3131">Trenčiansky kraj</option>
<option value="3132">Nitriansky kraj</option>
<option value="3133">Žilinský kraj</option>
<option value="3134">Banskobystrický kraj</option>
<option value="3135">Prešovský kraj</option>
<option value="3136">Košický kraj</option>
    </select>""")

In [384]:
regions = [
'Bratislavský kraj',
'Trnavský kraj',
'Trenčiansky kraj',
'Nitriansky kraj',
'Žilinský kraj',
'Banskobystrický kraj',
'Prešovský kraj',
'Košický kraj']

In [385]:
def get_municipalities(code):
    url = f'http://volby.statistics.sk/ep/ep2014/EP-dv/Tabulka12_Obvod_{code}_en.html'
    return pd.read_html(url, encoding='utf8')[0]

In [386]:
selects = BeautifulSoup(selects).select('select')

In [387]:
links = []
for select, region in zip(selects, regions):
    for district in select.select('option'):
        links.append((region, district.attrs['value'], district.text))

In [388]:
%%time
data = []
for link in links:
    d = get_municipalities(link[1])
    d['region'] = link[0]
    d['district'] = link[2]
    data.append(d)

CPU times: user 1.21 s, sys: 227 ms, total: 1.44 s
Wall time: 6.45 s


In [389]:
df = pd.concat(data)

In [390]:
df = df[['Commune, town, town district', 'Number ofreturnedenvelopes',
         'Number of votersregistered in listsof eligible voters',
         'Voterturnoutin %', 'district', 'region']]

In [391]:
df.columns = ['commune', 'voters', 'eligible', 'turnout', 'district', 'region']
df.voters = df.voters.str.replace(' ', '').astype(float)
df.eligible = df.eligible.str.replace(' ', '').astype(float)
df.turnout = df.turnout / 100
df.commune = df.commune.str.replace(' - ', '-')
df = df[df.commune.notnull()]
df = df[df.commune != 'Total for electoral district']
df.region = df.region.str[:-5]

In [393]:
df.to_csv('../../data/raw/svk.csv', index=False)

In [3]:
df = pd.read_csv('../../data/raw/svk.csv')

## Merge

In [4]:
geo = gpd.read_file('../../data/shapefiles/svk/obec_3.shp')
geo = geo.to_crs(epsg=4326)

In [5]:
remap = {
    'Banská Štiavnica': 'Žiar nad Hronom',
    'Myjava': 'Nové Mesto nad Váhom',
    'Revúca': 'Rimavská Sobota',
    'Snina': 'Humenné',
    'Skalica': 'Senica',
    'Sabinov': 'Prešov',
    'Poltár': 'Lučenec',
    'Krupina': 'Zvolen',
    'Žarnovica': 'Žiar nad Hronom',
    'Gelnica': 'Spišská Nová Ves',
    'Bytča': 'Žilina',
    'Detva': 'Zvolen',
    'Hlohovec': 'Trnava',
    'Ilava': 'Trenčín',
    'Kysucké Nové Mesto': 'Žilina',
    'Levoča': 'Poprad',
    'Medzilaborce': 'Humenné',
    'Partizánske': 'Prievidza',
    'Púchov': 'Považská Bystrica',
    'Sobrance': 'Michalovce',
    'Turčianske Teplice': 'Martin',
    'Tvrdošín': 'Námestovo',
    'Zlaté Moravce': 'Nitra'
}

In [6]:
#geo = geo.dissolve(by='IDN4')
geo['electoral_district'] = geo.NM3.map(remap)
geo['electoral_district'] = geo.electoral_district.fillna(geo.NM3)

In [7]:
for district in [
    'Bratislava I',
    'Bratislava II',
    'Bratislava III',
    'Bratislava IV',
    'Bratislava V',
    'Košice I',
    'Košice II',
    'Košice III',
    'Košice IV']:
    geo.electoral_district = geo.electoral_district.replace(district, district.split(' ')[0])

In [8]:
geo = geo.merge(df,
                left_on=['electoral_district', 'NM4'],
                right_on=['district', 'commune'],
                how='left',
                validate='one_to_one')

In [9]:
geo.voters.sum() - df.voters.sum()

0.0

In [10]:
geo.eligible.sum() - df.eligible.sum()

0.0

In [11]:
geo[geo.turnout.isnull()]

Unnamed: 0,DOW,FACC,IDN4,NM4,IDN3,NM3,IDN2,NM2,VYMERA,Shape_Leng,Shape_Area,geometry,electoral_district,commune,voters,eligible,turnout,district,region
1534,2019-04-30,FA004,518638,Valaškovce (vojenský obvod),702,Humenné,7,Prešovský,119228967.0,62231.247432,118814800.0,"POLYGON ((22.15891375373651 48.94651634121639,...",Humenné,,,,,,


## Export

In [12]:
geo = geo[['NM4', 'turnout', 'geometry']]
geo = geo.rename(columns={'NM4': 'name'})
geo['country'] = 'svk'

In [14]:
geo.to_file('../../data/processed/svk')