In [190]:
from decimal import Decimal

import pandas as pd
import numpy as np

In [191]:
voice = pd.read_csv('voice.csv')
geo = pd.read_csv('geo-categorized.csv', converters={'latitude': Decimal, 'longitude': Decimal})
tih_coefficients = pd.read_csv('tihanovkaja_photo_coefficients.csv')
protest_coefficients = pd.read_csv('protest_registered_coefficients.csv')
alt_coefficients = pd.read_csv('alt_candidates_coefficients.csv')
trusted_vp = pd.read_csv('trusted-for-alternative-fixed.csv')

In [192]:
vp_to_area = {x['id']: x['area'] for _, x in geo.iterrows()}
vp_to_coordinates = {x['id']: {'latitude': x['latitude'], 'longitude': x['longitude']} for _, x in geo.iterrows()}

area_to_tih_coeff = {x['area']: x['coefficient'] for _, x in tih_coefficients.iterrows()}
area_to_protest_coeff = {x['area']: x['protest_coefficient'] for _, x in protest_coefficients.iterrows()}
area_candidate_to_alt_coeff = {
    f"{x['area']}_{x['candidate']}": x['coefficient'] 
    for _, x in alt_coefficients.iterrows()
}

trusted_rows = {x['id']: x for _, x in trusted_vp.iterrows()}

In [193]:
candidates = ['against', 'cherechen', 'corrupted', 'dmitriyev', 'kanopatskaja', 'tihanovkaja']
alt_candidates = ['against', 'cherechen', 'corrupted', 'dmitriyev', 'kanopatskaja']
alt_candidates_no_corrupted = set(alt_candidates) - {'corrupted'}

In [194]:
def official_candidates(data):
    return _generic_candidates_sum(data, 'officialVotes', candidates)


def official_alt_candidates(data):
    return _generic_candidates_sum(data, 'officialVotes', alt_candidates)


def registered_alt_candidates(data):
    return _generic_candidates_sum(data, 'registered', alt_candidates)


def _generic_candidates_sum(data, source, candidates_to_sum):
    return sum([data[f'{candidate}_{source}'] for candidate in candidates_to_sum])

In [195]:
def calculate_alt(candidate, protest, tihanovkaja):
    if tihanovkaja >= protest:
        return 0
    
    return (protest - tihanovkaja) * area_candidate_to_alt_coeff[f'{area}_{candidate}']

In [196]:
expected_list = []

for _, voice_row in voice.iterrows():
    poll_station_id = voice_row['id']
    area = vp_to_area[poll_station_id]
    alternative = {}
    
    if poll_station_id in trusted_rows:
        trusted_row = trusted_rows[poll_station_id]
        
        source = 'official'
        tihanovkaja = trusted_row['tihanovkaja_officialVotes']
        
        for candidate in alt_candidates:
            alternative[candidate] = trusted_row[f'{candidate}_officialVotes']
            
        if trusted_row['corrupted_officialVotes'] == 0:
            source = 'official-corrupted-fix'     
            coeff = area_candidate_to_alt_coeff[f'{area}_corrupted/corrupted+against']
            alternative['corrupted'] = alternative['against'] * coeff
            alternative['against'] -= alternative['corrupted']
        
    else:
        source = 'voice'
        tihanovkaja = voice_row['tihanovkaja_photoVoices'] * area_to_tih_coeff[area]
        protest = (
            voice_row['registered'] - voice_row['lukashenko_registered'] - voice_row['ignore_registered']
        ) * area_to_protest_coeff[area]
        for candidate in alt_candidates:
            alternative[candidate] = calculate_alt(candidate, protest, tihanovkaja)
        
    poll_station_data = {
        'id': voice_row['id'],
        'area': area,
        'source': source,
        'tihanovkaja': tihanovkaja,
    }
    poll_station_data.update(alternative)
    poll_station_data.update(vp_to_coordinates[poll_station_id])
    expected_list.append(poll_station_data)

In [197]:
expected = pd.DataFrame(expected_list)
expected.to_csv('candidates-by-poll-station.csv', float_format='%g', index=False)

In [198]:
expected[['against', 'cherechen', 'corrupted', 'dmitriyev', 'kanopatskaja', 'tihanovkaja']].sum().sum()

2539229.301392014

In [199]:
expected['tihanovkaja'].sum()

1931485.4540000004

In [200]:
expected['against'].sum()

293846.18839599995

In [201]:
expected['dmitriyev'].sum()

93922.93813999998

In [202]:
expected['cherechen'].sum()

83545.83856099998

In [203]:
expected['kanopatskaja'].sum()

76625.79366299999

In [204]:
expected['corrupted'].sum()

59803.088631999984

In [205]:
expected[expected['area'] == 'capital'][['against', 'cherechen', 'corrupted', 'dmitriyev', 'kanopatskaja', 'tihanovkaja']].sum() / 758000

against         0.101758
cherechen       0.022841
corrupted       0.024173
dmitriyev       0.024387
kanopatskaja    0.022519
tihanovkaja     0.703188
dtype: float64

In [206]:
expected[expected['area'] == 'capital'].sum()

id              05-141-002507-001-000107-001-000207-001-000307...
area            capitalcapitalcapitalcapitalcapitalcapitalcapi...
source          officialvoicevoicevoicevoiceofficialvoicevoice...
tihanovkaja                                                533017
against                                                   77132.4
cherechen                                                 17313.4
corrupted                                                 18323.3
dmitriyev                                                 18485.1
kanopatskaja                                              17069.5
latitude                                            36970.7953376
longitude                                          18906.49252911
dtype: object

In [207]:
expected[expected['area'] == 'capital'][['against', 'cherechen', 'corrupted', 'dmitriyev', 'kanopatskaja', 'tihanovkaja']].sum().sum()

681340.451

In [208]:
pd.DataFrame(expected[candidates].sum().astype(int), columns=['total']).to_csv('candidates-total.csv')