# Spain

In [1]:
import pandas as pd
import geopandas as gpd
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import numpy as np
import requests
from fuzzywuzzy import process, fuzz

%config InlineBackend.figure_format = 'retina'
pd.options.mode.chained_assignment = None

## Fetch results

In [71]:
home_url = 'http://www.infoelectoral.mir.es/infoelectoral/flash/swf/flashdata/map99_0.xml'

In [72]:
def get_index():
    r = requests.get(home_url)
    r.encoding = 'utf8'
    html = BeautifulSoup(r.text)
    return html

In [499]:
def construct_municipality_url(row):
    url = ('http://www.infoelectoral.mir.es/infoelectoral/min/mapa.html?'
          'private=0&vuelta=1&method=getXmlResumen&codTipoEleccion=7&codPeriodo=201405&codEstado=99')
    url = f'{url}&codComunidad={row.community_id}'
    url = f'{url}&codProvincia={row.province_id}'
    url = f'{url}&codMunicipio={row.municipality_id}'
    return url

In [500]:
def parse_municipality(url):
    r = requests.get(url)
    html = BeautifulSoup(r.text)
    title = html.select_one('resumen').attrs['titulo']
    voted = r.text.split('votantes:</b>')[1].split('<br/>')[0].strip()
    voters, turnout = voted.split('(')
    voters = int(voters.replace('.', '').strip())
    turnout = turnout[:-2]
    turnout = float(turnout.replace(',', '.'))
    eligible = r.text.split('electoral:</b>')[1].split('<br/>')[0].strip()
    eligible = int(eligible.replace('.', ''))
    return {'municipality': title, 'eligible': eligible, 'voters': voters, 'turnout': turnout}

In [73]:
index = get_index()

In [502]:
meta = []
for community in index.select('comunidadautonoma')[1:]:
    community_id = community.attrs['id']
    community_name = community.attrs['nombre']
    for province in community.select('provincia'):
        province_id = province.attrs['id']
        province_name = province.attrs['nombre']
        for municipality in province.select('municipio'):
            municipality_id = municipality.attrs['id']
            municipality_name = municipality.attrs['nombre']
            meta.append({
                'community_id': community_id,
                'community_name': community_name,
                'province_id': province_id,
                'province_name': province_name,
                'municipality_id': municipality_id,
                'municipality_name': municipality_name
            })

meta = pd.DataFrame(meta)

In [503]:
meta['url'] = meta.apply(construct_municipality_url, axis=1)

In [504]:
meta['data'] = None

In [505]:
%%time
for i, row in meta.iterrows():
    if i % 1000 == 0:
        print(i)
    try:
        row['data'] = parse_municipality(row.url)
    except IndexError:
        row['data'] = None
        continue

0
1000
2000
3000
4000
5000
6000
7000
8000
CPU times: user 58.6 s, sys: 5.14 s, total: 1min 3s
Wall time: 23min 39s


In [506]:
meta = pd.concat([meta, meta.data.apply(pd.Series)], axis=1)
meta = meta.drop('data', axis=1)

In [507]:
def fix_cesuras():
    """Hack to fix municipality of Oza-Cesuras. Numbers from website."""
    meta.loc[lambda x: x.municipality_name.isin(['Cesuras', 'Oza dos Ríos']), 'eligible'] = 4729 / 2
    meta.loc[lambda x: x.municipality_name.isin(['Cesuras', 'Oza dos Ríos']), 'voters'] = 2032 / 2
    meta.loc[lambda x: x.municipality_name.isin(['Cesuras', 'Oza dos Ríos']), 'turnout'] = 42.97

fix_cesuras()

In [508]:
def fix_petilla():
    """Hack of mistake in map"""
    geo.loc[lambda x: x.NAME_4 == 'Petilla de Aragón', 'NAME_1'] = 'Comunidad Foral de Navarra'
    geo.loc[lambda x: x.NAME_4 == 'Petilla de Aragón', 'NAME_2'] = 'Navarra'

fix_petilla()

In [509]:
meta['prefix'] = meta.municipality_name.str.extract('\((\D+)\)')[0]
meta['prefix'] = meta['prefix'].str.replace("l'", "L'")
meta.prefix = meta.prefix.str.capitalize()

def move_prefix(row):
    if pd.isnull(row.prefix):
        return row.municipality_name
    elif row.prefix == "L'":
        name = f"L'{row.municipality_name}"
        name = name.split('(')[0].strip()
        return name
    else:
        name = f'{row.prefix} {row.municipality_name}'
        name = name.split('(')[0].strip()
        return name

meta['municipality_name'] = meta.apply(move_prefix, axis=1)

In [510]:
meta.community_name = meta.community_name.replace('Comunitat Valenciana', 'Comunidad Valenciana')
meta.community_name = meta.community_name.replace('Illes Balears', 'Islas Baleares')
meta.community_name = meta.community_name.replace('Canarias', 'Islas Canarias')
meta.community_name = meta.community_name.replace('Ciudad de Ceuta', 'Ceuta y Melilla')
meta.community_name = meta.community_name.replace('Ciudad de Melilla', 'Ceuta y Melilla')

meta.province_name = meta.province_name.str.split(' /', expand=True)[0]
meta.province_name = meta.province_name.replace('Guipuzcoa', 'Guipúzcoa')
meta.province_name = meta.province_name.replace('Illes Balears', 'Baleares')

In [511]:
meta.municipality_name = meta.municipality_name.replace('El Herradón', 'Herradón de Pinares')
meta.municipality_name = meta.municipality_name.replace('La Villajoyosa/Vila Joiosa', 'Villajoyosa')
meta.municipality_name = meta.municipality_name.replace('Vila-real', 'Villarreal')
meta.municipality_name = meta.municipality_name.replace('El Pinós', 'Pinoso')
meta.municipality_name = meta.municipality_name.replace('El Fondó de les Neus', 'Hondón de las Nieves')
meta.loc[lambda x: x.municipality_name.str.contains('Valle de elorz'), 'municipality_name'] = 'Noáin'
meta.loc[lambda x: x.municipality_name == 'Urduña-Orduña', 'province_name'] = 'Álava'
meta.municipality_name = meta.municipality_name.replace('Orkoien', 'Orcoyen')
meta.municipality_name = meta.municipality_name.replace('Mungia', 'Mungai')
meta.municipality_name = meta.municipality_name.replace('Garai', 'Garay')
meta.municipality_name = meta.municipality_name.replace('Ager', 'Àger')
meta.municipality_name = meta.municipality_name.replace('Boadella i les Escaules', "Boadella d'Empordà")
meta.municipality_name = meta.municipality_name.replace("Cabrera d'Anoia", "Cabrera d'Igualada")
meta.municipality_name = meta.municipality_name.replace("L'Alqueria de la Comtessa", "Alquería de la Condesa")
meta.municipality_name = meta.municipality_name.replace("La Aldea de San Nicolás", "San Nicolás de Tolentino")
meta.municipality_name = meta.municipality_name.replace("Llocnou de la Corona", "Lugar Nuevo de la Corona")
meta.municipality_name = meta.municipality_name.replace("Maó", "Mahón")
meta.municipality_name = meta.municipality_name.replace("Valle de Villaverde", "Villaverde de Trucíos")

In [512]:
# Fuzzy match the rest
missing = meta[~meta.municipality_name.isin(list(geo.NAME_4))].copy()

missing['fuzz'] = missing.municipality_name.apply(lambda x:
    process.extractOne(x,
                       geo.NAME_4[~geo.NAME_4.isin(meta.municipality_name)],
                       scorer=fuzz.token_set_ratio))

missing = missing[missing[['municipality_name', 'fuzz']].apply(lambda x: x.fuzz[1] >= 89, axis=1)]
missing['municipality_name_fuzz'] = missing.fuzz.apply(lambda x: x[0])
meta = meta.merge(missing.municipality_name_fuzz, left_index=True, right_index=True, how='left')
meta.municipality_name = meta.municipality_name_fuzz.fillna(meta.municipality_name)

In [516]:
meta.to_csv('../../data/raw/esp.csv', index=False)

In [7]:
meta = pd.read_csv('../../data/raw/esp.csv')

## Merge

In [4]:
geo = gpd.read_file('../../data/shapefiles/esp/gadm36_ESP_4.shp')

In [5]:
geo.NAME_4 = geo.NAME_4.replace('La JoyosaJ', 'La Joyosa')
geo.NAME_4 = geo.NAME_4.replace('Karrantza Harana', 'Harana')

In [8]:
geo = geo.merge(meta[['community_name', 'province_name', 'municipality_name', 'turnout']],
          left_on=['NAME_1', 'NAME_2', 'NAME_4'],
          right_on=['community_name', 'province_name', 'municipality_name'],
          how='left')

## Export

In [519]:
geo = geo[['NAME_4', 'turnout', 'geometry']]
geo = geo.rename(columns={'NAME_4': 'name'})
geo['country'] = 'esp'

In [520]:
geo.to_file('../../data/processed/esp')