In [389]:
import pandas as pd
from decimal import Decimal
import numpy as np

In [390]:
voice = pd.read_csv('voice.csv')

In [391]:
geo_data = pd.read_csv('geo-categorized.csv', converters={'latitude': Decimal, 'longitude': Decimal})

In [392]:
candidates = ['tihanovkaja', 'against', 'dmitriyev', 'kanopatskaja', 'cherechen']

In [393]:
broken_poll_stations = ['02-016-0007', '02-016-0043', '02-074-0013', '03-100-0010', '05-036-0019']

In [394]:
voice.loc[voice['id'].isin(broken_poll_stations), f'officialVotes'] = np.nan
voice.loc[voice['id'].isin(broken_poll_stations), f'officialVoters'] = np.nan

for candidate in candidates + ['lukashenko']:
    voice.loc[voice['id'].isin(broken_poll_stations), f'{candidate}_officialVotes'] = np.nan
    

In [395]:
fix_data = {
    '05-036-0022': {
        'officialVoters': 2854,        
    },
    '06-146-0025': {'officialVoters': 1300},
}

In [396]:
for vpid, data in fix_data.items():
    for col, val in data.items(): 
        voice.loc[voice['id'] == vpid, col] = val

In [397]:
def get_trusted_by(source):
    trusted_by_tihanovskaja = voice['tihanovkaja_officialVotes'] >= voice[f'tihanovkaja_{source}']
    trusted_by_againast = voice['against_officialVotes'] >= voice[f'against_{source}']
    trusted_by_dmitriyev = voice['dmitriyev_officialVotes'] >= voice[f'dmitriyev_{source}']
    trusted_by_kanopatskaja = voice['kanopatskaja_officialVotes'] >= voice[f'kanopatskaja_{source}']
    trusted_by_cherechen = voice['cherechen_officialVotes'] >= voice[f'cherechen_{source}']
    return trusted_by_tihanovskaja & trusted_by_againast & trusted_by_dmitriyev & trusted_by_kanopatskaja & trusted_by_cherechen

In [398]:
def get_trusted_by_registration():
    mask = (
        (voice['corrupted_officialVotes'] >= voice[f'corrupted_registered']) |
        (voice[f'corrupted_registered'] < 10) |
        (voice['corrupted_officialVotes'] == 0) |
        voice['corrupted_officialVotes'].isna()
    )
    
    for candidate in candidates:
        mask &= (
            (voice[f'{candidate}_officialVotes'] >= voice[f'{candidate}_registered']) |
            (voice[f'{candidate}_registered'] < 10)
        )
    
    return mask

In [399]:
photo_trust = get_trusted_by('photoVoices')

In [400]:
sum(photo_trust)

867

In [401]:
registered_trust = get_trusted_by_registration()

In [402]:
sum(registered_trust & photo_trust)

675

In [403]:
voice_trusted = voice[photo_trust & registered_trust]

In [404]:
voice_trusted_extended = geo_data.merge(voice_trusted, on='id')

In [405]:
voice_trusted_extended

Unnamed: 0,id,town,area,latitude,longitude,region,monitoredVotes,registered,photoVoices,officialVotes,...,ignore_photoVoices,kanopatskaja_registered,kanopatskaja_officialVotes,kanopatskaja_photoVoices,lukashenko_registered,lukashenko_officialVotes,lukashenko_photoVoices,tihanovkaja_registered,tihanovkaja_officialVotes,tihanovkaja_photoVoices
0,01-010-0001,Brest,city,52.089515,23.680013,1,,486,256,1319.0,...,0,0,16.0,0,5,464.0,0,466,701.0,253
1,01-010-0002,Brest,city,52.09133,23.652785,1,,474,291,2249.0,...,0,1,25.0,0,3,994.0,0,456,948.0,284
2,01-010-0004,Brest,city,52.098645,23.654222,1,,275,154,1714.0,...,0,1,26.0,0,0,1125.0,0,260,395.0,153
3,01-010-0005,Brest,city,52.099618,23.649713,1,,334,179,2122.0,...,1,1,21.0,1,2,1506.0,1,321,412.0,174
4,01-010-0006,Brest,city,52.105589,23.646314,1,,495,307,2383.0,...,0,2,28.0,2,4,923.0,0,476,1157.0,301
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
670,07-008-0080,Minsk,capital,53.910628,27.485043,7,,536,340,1595.0,...,0,0,6.0,0,4,899.0,1,519,583.0,336
671,07-008-0081,Minsk,capital,53.910628,27.485043,7,,558,342,1575.0,...,0,1,25.0,0,3,838.0,1,539,597.0,336
672,07-009-0014,Minsk,capital,53.934864,27.563107,7,,342,212,1266.0,...,0,0,21.0,0,3,428.0,2,329,707.0,206
673,07-009-0017,Minsk,capital,53.929606,27.544952,7,,996,727,2304.0,...,0,0,50.0,0,3,656.0,1,971,1407.0,715


In [406]:
sum(voice_trusted_extended['area'] == 'village')

68

In [407]:
sum(voice_trusted_extended['area'] == 'city')

136

In [408]:
sum(voice_trusted_extended['area'] == 'town_below100')

158

In [409]:
sum(voice_trusted_extended['area'] == 'town_over100')

94

In [410]:
sum(voice_trusted_extended['area'] == 'capital')

81

In [411]:
sum(voice_trusted_extended['area'] == 'embassy')

11

In [412]:
sum(voice_trusted_extended['area'] == 'minsk_suburb')

61

In [413]:
sum(voice_trusted_extended['area'] == 'minsk_village')

66

In [414]:
voice_trusted_extended.to_csv('tusted_by_voice.csv', index=False)