# Lithuania

In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import requests
from bs4 import BeautifulSoup

%config InlineBackend.figure_format = 'retina'
pd.options.mode.chained_assignment = None

## Fetch results

In [21]:
base_url = 'https://www.vrk.lt'
home_url = (f'{base_url}/statiniai/puslapiai/2014_ep_rinkimai/output_lt/'
             'rezultatai_daugiamand_apygardose/rezultatai_daugiamand_apygardose1turas.html')

In [290]:
headers = {
    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36',
    'if-none-match': 'W/"5550b592-75a3"',
    'cookie': '__cfduid=da62b75860db603501549d9bf586b94e21557856557; cf_clearance=e02672356d73813751c650754f05920ae79bbfb7-1557856561-1800-150'
}

In [291]:
def get_municipalities():
    r = requests.get(home_url, headers=headers)
    r.encoding = 'utf8'
    html = BeautifulSoup(r.text)
    table = html.select('table.partydata table.partydata')[2]
    tds = table.select('td[align=left]')
    links = [(x.select_one('a').attrs['href'], x.select_one('a').text)
             for x in tds]
    return links

In [292]:
def get_district_turnouts(url):
    url = f'{base_url}{url}'
    r = requests.get(url, headers=headers)
    r.encoding = 'utf8'
    if 'Checking your browser' in r.text:
        raise ValueError('Browser check')
    html = BeautifulSoup(r.text)
    table = html.select('table.partydata table.partydata')[2]
    data = pd.read_html(str(table), header=1)[0].iloc[:-1]
    return data

In [293]:
municipalities = get_municipalities()

In [None]:
data = []

for m_url, m_name in municipalities:
    table = get_district_turnouts(m_url)
    table['municipality'] = m_name
    data.append(table)

In [295]:
df = pd.concat(data)

In [296]:
df = df[['Apylinkė', 'Rinkėjų skaičius', 'skaičius', 'proc.', 'municipality']]

In [297]:
# Should be 47.35%
df['skaičius'].sum() / df['Rinkėjų skaičius'].sum()

0.4735350573701597

In [298]:
df.columns = ['district', 'eligible', 'voters', 'turnout', 'municipality']

In [299]:
df = df[~df.district.str.contains('diplomat')]

In [300]:
df.turnout = df.turnout.apply(lambda x: float(x[:2] + '.' + x[2:]))

In [301]:
df['APL_NUM'] = df.district.str.split('.', expand=True)[0].astype(int)

In [302]:
df.district = df.district.str.replace('^\d+\. ', '').str.strip()
df.municipality = df.municipality.str.replace('^\d+\. ', '').str.strip()

In [303]:
df.municipality = df.municipality.str.replace('Vilniaus miesto ir užsienio', 'Vilniaus miesto')

In [304]:
df.district = df.district.replace('A.Vienuolio', 'A. Vienuolio')
df.district = df.district.replace('J.Tumo - Vaižganto', 'J.Tumo-Vaižganto')
df.district = df.district.replace('M.Račkausko', 'M. Račkausko')
df.district = df.district.replace('Vilkaviškio rytų', 'Vilkaviškio rytų')

In [314]:
df.to_csv('../../data/raw/ltu.csv', index=False)

In [9]:
df = pd.read_csv('../../data/raw/ltu.csv')

## Merge

In [10]:
geo = gpd.read_file('../../data/shapefiles/ltu/apylinkes.shp')

In [11]:
geo = geo.to_crs(epsg=4326)

In [12]:
geo = geo.merge(df,
          right_on=['municipality', 'district', 'APL_NUM'],
          left_on=['SAVIVALD', 'APL_PAV', 'APL_NUM'],
          how='left')

In [310]:
geo = geo[['district', 'turnout', 'geometry']]
geo = geo.rename(columns={'district': 'name'})
geo['country'] = 'ltu'

## Export

In [312]:
geo.to_file('../../data/processed/ltu')