In [577]:
from plotly.graph_objects import Scatter, Figure
import pandas as pd
import numpy as np
from decimal import Decimal

In [578]:
df = pd.read_csv('tusted_by_voice.csv', converters={'latitude': Decimal, 'longitude': Decimal})

In [579]:
areas = ['city', 'minsk_suburb', 'capital', 'village', 'town_below100', 'town_over100', 'embassy']

In [580]:
def corr(x, y):
    return np.corrcoef(x, y)[0][1]
    

In [581]:
def draw_chart(candidate, X_source, X_title):
    data = df[df['area'] != 'embassy']
    x = data[f'{candidate}_{X_source}']
    y = data[f'{candidate}_officialVotes']
    
    fig = Figure(layout_title='%s<br>Correlation (except embassy): %.2f' % (X_source.capitalize(), corr(x, y)))

    for area in areas:
        data = df[(df['area'] == area)]
        cr = corr(data[f'{candidate}_{X_source}'], data[f'{candidate}_officialVotes'])
        
        fig.add_trace(Scatter(
            x=data[f'{candidate}_{X_source}'], 
            y=data[f'{candidate}_officialVotes'],
            mode='markers',
            name=f'{area} ({cr:.2f})',
            text=data['id'],
        ))        

    fig.update_xaxes(title=X_title)
    fig.update_yaxes(title='People voted for Tsihanouskaya')


    fig.show()


In [582]:
draw_chart('tihanovkaja', 'registered', 'People registered on platform')

In [583]:
draw_chart('tihanovkaja', 'photoVoices', 'Ballot photos for Tsihanouskaya')

In [584]:
non_trusted_vp = [
    ('04-022-0069', "Trusted. Hrodna hospital. Too many votes. It's likely that people didn't expect to vote from there." ),
    ('05-141-0021', "Trusted. Minsk region children hospital. People didn't expect to vote from there."),
    ('05-141-0022', "Trusted. Minsk region hospital. People didn't expect to vote from there."),
    ('05-141-0023', "Trusted. Cancer clinic. People didn't expect to vote from there."),
    ('05-141-0037', "Trusted. Clinic. People didn't expect to vote from there."),
    ('05-141-0026', "Trusted. Novaja Baravaja is a separate world."),
    ('05-141-0027', "Trusted. Novaja Baravaja is a separate world."),
    #('07-002-0084', 'Trusted. Too many for Tihanovkaja'), # photo outlier
    ('04-106-0002', "Less than expected. Many reports on Zubr platform."),
    ('01-047-0054', "Less than expected. Zubr report."),
    ('07-002-0103', "SPB embassy. Less than expected. Zubr report."),     
    
    ('05-026-0041', "Too many for Cherechen and Kanopatskaja. Zubr report."),
    ('05-141-0095', "Too many for Cherechen. Voted less than registered in Voice."),
    ('01-047-0042', "Too many for Cherechen. Voted less than registered in Voice."),
    ('05-026-0055', "Too many for Cherechen. No observer."),
    ('04-022-0115', "Too many for Cherechen. Zubr reports."),
    ('05-141-0036', "Too few for Cherechen. Observer is not allowed inside."),
    ('05-141-0009', "Too few for Cherechen. Observer is not allowed inside."),
    
    ('01-011-0081', "Too many for Dmitriev. Zubr report."),
    ('05-026-0064', "Too many for Dmitriev. Zubr report."),
    ('04-022-0116', "Too many for Dmitriev. No observer."),
    ('02-075-0038', "Too many for Dmitriev. No observer."),
    
    ('04-022-0100', "Too many for Against. Zubr report."),
    
    ('03-019-0045', "Too many for Kanopatskaja. Less than registered."),
    ('01-049-0004', "Too many for Kanopatskaja. Less than registered."),
    ('03-018-0021', "Too many for Kanopatskaja. Less than registered."),
    ('03-018-0037', "Too many for Kanopatskaja. Less than registered."),
    ('03-087-0014', "Too many for Kanopatskaja. Less than registered."),
    ('05-026-0041', "Too many for Kanopatskaja. Less than registered."),
    ('05-026-0041', "Too many for Kanopatskaja. Zubr report."),
    ('05-141-0065', "Too many for Kanopatskaja. Zubr report."),
    ('05-141-0066', "Too many for Kanopatskaja. Zubr report."),
    ('07-004-0051', "Too many for Kanopatskaja. Zubr report."),
    ('07-001-0029', "Too many for Kanopatskaja. Zubr report."),
    ('03-018-0028', "Too many for Kanopatskaja. Zubr report."),
    ('06-132-0001', "Too many for Kanopatskaja. Zubr report."),
    ('03-096-0041', "Too many for Kanopatskaja. Zubr report."),
    ('06-012-0069', "Too many for Kanopatskaja. No observer."),
    ('02-016-0039', "Too many for Kanopatskaja. No observer."),
    ('05-026-0062', "Too many for Kanopatskaja. Few observers."),
    
    ('05-141-0074', "Too few for Against. Zubr report."),
    ('07-006-0038', "Too few for Against. Zubr report."),
    ('07-008-0080', "Too few for Against. Zubr report."),
    ('07-008-0081', "Too few for Against. Zubr report."),
    ('07-006-0020', "Too few for Against. Zubr report."),
    ('07-006-0033', "Too few for Against. Zubr report."),
    ('07-009-0021', "Too few for Against. Zubr report."),
    ('02-075-0038', "Too few for Against. Zubr report."),
    ('05-026-0082', "Too few for Against. No observers."),
    ('05-026-0082', "Too many for Against. One observer."),
    
    ('05-041-0025', "Too many for corrupted. One observer."),
    
    ('03-096-0025', "Too many outliers. One observer."),
    ('01-060-0010', "Too many outliers. One observer."),
    ('01-054-0001', "No observer."),
    ('05-036-0051', "No observer."),
    ('05-042-0023', "No observer."),
    ('01-042-0007', "No observer."),
    ('01-051-0003', "One outlier. One observer."),
    ('04-114-0017', "One outlier. One observer."),
    ('05-042-0017', "Two outliers. One observer."),
    ('05-042-0019', "Two outliers. One observer."),
    ('02-016-0040', "Two outliers. One observer."),
    
    ('03-018-0048', "Outliers. Zubr report."),
    ('04-022-0066', "Outliers. Zubr report."),
    ('04-022-0090', "Outliers. Zubr report."),
    ('05-026-0023', "Outliers. Zubr report."),
    ('05-026-0028', "Outliers. Zubr report."),
    ('05-026-0091', "Outliers. Zubr report."),
    ('05-141-0024', "Outliers. Zubr report."),
    ('05-141-0059', "Outliers. Zubr report."),
    ('06-012-0076', "Outliers. Zubr report."),
    ('06-132-0001', "Outliers. Zubr report."),
    ('07-001-0029', "Outliers. Zubr report."),
    
    ('05-141-0020', "Duplicate from 05-141-0024."),
    ('05-026-0018', "Unrealistic votes distribuation. Many outliers."),
    
    ('01-011-0058', 'Mess in the report.'),
    ('04-112-0013', 'Mess in the report.'),
    ('05-141-0013', 'Mess in the report.'),
    ('07-006-0049', 'Mess in the report.'),
    
    ('01-054-0026', 'Zubr report.'),
    ('01-010-0002', 'Too many for Lukashenko. Zubr report.'),
    ('01-010-0005', 'Too many for Lukashenko. Mess in Zubr data.'),
    ('01-010-0006', 'Too many for Lukashenko. Zubr report.'),
    ('01-010-0008', 'Too many for Lukashenko. High turnout. One observer.'),
    ('01-010-0011', 'Too many for Lukashenko. High turnout. One observer.'),
    ('01-010-0015', 'Too many for Lukashenko. Zubr report.'),
    ('01-010-0020', 'Too many for Lukashenko. Zubr report.'),
    ('01-010-0021', 'Too many for Lukashenko. Zubr report.'),
    ('01-010-0022', 'Too many for Lukashenko. High turnout. One observer.'),
    ('01-010-0027', 'Too many for Lukashenko. High turnout. One observer.'),
    ('01-010-0023', 'Too many for Lukashenko. No observers.'),
    ('01-010-0031', 'Too many for Lukashenko. Zubr report.'),
    ('01-010-0035', 'Too many for Lukashenko. Zubr report.'),
    ('01-010-0036', 'Too many for Lukashenko. Zubr report.'),
    ('01-010-0040', 'Too many for Lukashenko. No observers.'),
    ('01-010-0042', 'Too many for Lukashenko. Zubr report.'),
    ('01-011-0005', 'Too many for Lukashenko. Zubr report.'),
    ('01-011-0010', 'Too many for Lukashenko. Zubr report.'),
    ('01-011-0041', 'Too many for Lukashenko. Zubr report.'),
    ('01-011-0042', 'Too many for Lukashenko. Zubr report.'),
    ('01-011-0047', 'Too many for Lukashenko. Zubr report.'),
    ('01-011-0048', 'Too many for Lukashenko. Zubr report.'),
    ('01-011-0062', 'Too many for Lukashenko. Zubr report.'),
    ('01-011-0063', 'Too many for Lukashenko. High turnout. One observer.'),
    ('01-011-0066', 'Too many for Lukashenko. High turnout. One observer.'),
    ('01-011-0080', 'Too many for Lukashenko. Zubr report.'),
    ('01-049-0008', 'Too many for Lukashenko. One observer.'),
    ('01-054-0010', 'Too many for Lukashenko. Zubr report.'),
    ('01-054-0043', 'Too many for Lukashenko. High turnout. One observer.'),
    ('01-055-0004', 'Too many for Lukashenko. Zubr report.'),
    ('01-056-0007', 'Too many for Lukashenko. Zubr report.'),
    ('01-056-0020', 'Too many for Lukashenko. Zubr report.'),
    ('01-056-0043', 'Too many for Lukashenko. No observers.'),
    ('01-057-0037', 'Too many for Lukashenko. No observers.'),
    ('01-057-0043', 'Too many for Lukashenko. No observers.'),
    ('01-050-0011', 'Too many for Lukashenko. No observers.'),
    ('01-060-0028', 'Too many for Lukashenko. High turnout. One observer.'),
    ('01-063-0049', 'Too many for Lukashenko. No observers.'),
    ('01-063-0015', 'Too many for Lukashenko. Zubr report.'),
    ('01-011-0078', 'Too many for Lukashenko. Conflict reported.'),    
    
    ('02-014-0016', 'Too many for Lukashenko. No observers.'),   
    ('02-014-0005', 'Too many for Lukashenko. High turnout. One observer.'),
    ('02-015-0040', 'Too many for Lukashenko. Zubr report.'),  
    ('02-015-0045', 'Too many for Lukashenko. High turnout. One observer.'),
    ('02-016-0012', 'Too many for Lukashenko. No observers.'),
    ('02-016-0044', 'Too many for Lukashenko. Zubr report.'),  
    ('02-016-0059', 'Too many for Lukashenko. One observer. Zubr report.'),  
    ('02-065-0004', 'Too many for Lukashenko. Zubr report.'), 
    ('02-067-0002', 'Too many for Lukashenko. Zubr report.'),
    ('02-067-0005', 'Too many for Lukashenko. One observer.'),
    ('02-067-0015', 'Too many for Lukashenko. No observers.'),
    ('02-068-0013', 'Too many for Lukashenko. No observers.'),
    ('02-069-0005', 'Too many for Lukashenko. Zubr report.'), 
    ('02-074-0006', 'Too many for Lukashenko. Zubr report.'),    
    ('02-074-0014', 'Too many for Lukashenko. Zubr report.'),    
    ('02-073-0003', 'Too many for Lukashenko. Zubr report.'),    
    ('02-075-0001', 'Too many for Lukashenko. Zubr report.'),
    ('02-075-0051', 'Too many for Lukashenko. High turnout. One observer.'),
    ('02-076-0020', 'Too many for Lukashenko. One observer.'),
    ('02-076-0039', 'Too many for Lukashenko. High turnout. One observer.'),
    ('02-076-0040', 'Too many for Lukashenko. High turnout. One observer.'),
    ('02-077-0001', 'Too many for Lukashenko. Zubr report.'),  
    ('02-077-0006', 'Too many for Lukashenko. One observer.'),
    ('02-078-0008', 'Too many for Lukashenko. High turnout. One observer.'),
    ('02-143-0009', 'Too many for Lukashenko. Zubr report.'),  
    ('02-075-0013', 'Too many for Lukashenko. No observers.'),   
    ('02-074-0023', 'Too many for Lukashenko. No observers.'),
    
    ('03-019-0019', 'Too many for Lukashenko. One observer.'),
    ('03-087-0009', 'Too many for Lukashenko. One observer.'),
    ('03-088-0027', 'Too many for Lukashenko. No observer.'),
    ('03-091-0001', 'Too many for Lukashenko. Zubr report.'),
    ('03-091-0005', 'Too many for Lukashenko. Conflict reported.'),
    ('03-091-0009', 'Too many for Lukashenko. One observer.'),
    ('03-091-0024', 'Too many for Lukashenko. Zubr report.'),
    ('03-091-0036', 'Too many for Lukashenko. No observer.'),
    ('03-091-0043', 'Too many for Lukashenko. No observers.'),
    ('03-096-0009', 'Too many for Lukashenko. No observers.'),
    ('03-096-0026', 'Too many for Lukashenko. No observers.'),
    ('03-096-0036', 'Too many for Lukashenko. High turnout. One observer.'),
    ('03-096-0045', 'Too many for Lukashenko. Zubr report.'),
    ('03-096-0058', 'Too many for Lukashenko. Zubr report.'),
    ('03-100-0009', 'Too many for Lukashenko. High turnout. One observer.'),
    ('03-100-0057', 'Too many for Lukashenko. High turnout. One observer.'),
    ('03-100-0014', 'Too many for Lukashenko. Zubr report. One observer.'),
    
    ('04-022-0097', 'Too many for Lukashenko. No report.'),
    ('04-021-0002', 'Too many for Lukashenko. Zubr report.'),
    ('04-021-0022', 'Too many for Lukashenko. High turnout. One observer.'),
    ('04-021-0046', 'Too many for Lukashenko. Zubr report.'),
    ('04-021-0047', 'Too many for Lukashenko. Zubr report.'),
    ('04-021-0062', 'Too many for Lukashenko. No observers.'),
    ('04-022-0076', 'Too many for Lukashenko. Zubr report.'),
    ('04-022-0077', 'Too many for Lukashenko. High turnout. One observer.'),
    ('04-022-0087', 'Too many for Lukashenko. High turnout. One observer.'),
    ('04-022-0105', 'Too many for Lukashenko. High turnout. One observer.'),
    ('04-022-0106', 'Too many for Lukashenko. High turnout. One observer.'),
    ('04-022-0119', 'Too many for Lukashenko. High turnout. One observer.'),
    ('04-022-0120', 'Too many for Lukashenko. Zubr report.'),
    ('04-022-0123', 'Too many for Lukashenko. High turnout. One observer.'),
    ('04-022-0124', 'Too many for Lukashenko. High turnout. One observer.'),
    ('04-022-0130', 'Too many for Lukashenko. Zubr reports.'),
    ('04-022-0092', 'Too many for Lukashenko. Zubr report.'),
    ('04-022-0093', 'Too many for Lukashenko. Zubr report (see 0092).'),
    ('04-022-0094', 'Too many for Lukashenko. Zubr report (see 0092).'),
    ('04-106-0011', 'Too many for Lukashenko. Zubr report.'),
    ('04-106-0012', 'Too many for Lukashenko. Zubr report.'),
    ('04-112-0010', 'Too many for Lukashenko. One observer.'),
    ('04-112-0011', 'Too many for Lukashenko. Zubr report.'),
    ('04-112-0014', 'Too many for Lukashenko. Zubr report.'),
    ('04-112-0018', 'Too many for Lukashenko. Zubr report. One observer.'),
    ('04-112-0024', 'Too many for Lukashenko. One observer.'),
    ('04-112-0028', 'Too many for Lukashenko. One observer.'),
    ('04-112-0027', 'Too many for Lukashenko. Zubr report.'),
    ('04-110-0001', 'Too many for Lukashenko. No observers.'),
    ('04-112-0006', 'Too many for Lukashenko. Zubr report. One observer.'),
    ('04-112-0008', 'Too many for Lukashenko. Zubr report.'),
    ('04-112-0018', 'Too many for Lukashenko. Zubr report.'),
    ('04-112-0019', 'Too many for Lukashenko. No observers.'),
    ('04-112-0033', 'Too many for Lukashenko. Zubr report.'),
    ('04-112-0035', 'Too many for Lukashenko. Zubr report. One observer.'),
    ('04-112-0036', 'Too many for Lukashenko. High turnout. One observer.'),
    ('04-112-0059', 'Too many for Lukashenko. No observers.'),
    ('04-112-0065', 'Too many for Lukashenko. High turnout. One observer.'),
    ('04-112-0070', 'Too many for Lukashenko. No observers.'),
    ('04-114-0001', 'Too many for Lukashenko. One observer.'),
    ('04-114-0003', 'Too many for Lukashenko. Zubr report.'),
    ('04-114-0018', 'Too many for Lukashenko. High turnout. One observer.'),
    ('04-114-0028', 'Too many for Lukashenko. Zubr report.'),
    ('04-118-0010', 'Too many for Lukashenko. Zubr report.'),
    ('04-118-0011', 'Too many for Lukashenko. Zubr report.'),
    ('04-118-0014', 'Too many for Lukashenko. Zubr report.'),
    ('04-118-0019', 'Too many for Lukashenko. Zubr report.'),
    ('04-118-0024', 'Too many for Lukashenko. Zubr report.'),
    ('04-118-0027', 'Too many for Lukashenko. One observer.'),
    ('04-118-0002', 'Too many for Lukashenko. No observers.'),
    ('04-118-0026', 'Too many for Lukashenko. High turnout. One observer.'),
    ('04-118-0027', 'Too many for Lukashenko. High turnout. One observer.'),
    ('04-119-0002', 'Too many for Lukashenko. High turnout. One observer.'),
    ('04-119-0007', 'Too many for Lukashenko. No observers.'),
    ('04-119-0017', 'Too many for Lukashenko. Zubr report.'),
    ('04-145-0001', 'Too many for Lukashenko. Zubr report.'),
    ('04-145-0002', 'Too many for Lukashenko. High turnout. One observer.'),
    ('04-145-0023', 'Too many for Lukashenko. One observer.'),
    ('04-122-0120', 'Too many for Lukashenko. Zubr report.'),
    ('04-022-0099', 'Too many for Lukashenko. Zubr report.'),
    ('04-022-0098', 'Too many for Lukashenko. Same school as 0099.'),    
    ('04-022-0118', 'Too many for Lukashenko. Zubr report.'),
    
    ('05-026-0010', 'Too many for Lukashenko. Zubr report.'),
    ('05-026-0012', 'Too many for Lukashenko. Zubr report.'),
    ('05-026-0014', 'Too many for Lukashenko. Zubr report.'),
    ('05-026-0022', 'Too many for Lukashenko. Zubr report.'),
    ('05-026-0029', 'Too many for Lukashenko. Zubr report.'),
    ('05-026-0042', 'Too many for Lukashenko. Zubr report.'),
    ('05-026-0066', 'Too many for Lukashenko. No observers.'),
    ('05-026-0068', 'Too many for Lukashenko. High turnout. One observer.'),
    ('05-026-0087', 'Too many for Lukashenko. Zubr report.'),
    ('05-027-0001', 'Too many for Lukashenko. Zubr report.'),
    ('05-027-0013', 'Too many for Lukashenko. Zubr report.'),
    ('05-027-0005', 'Too many for Lukashenko. No observers.'),
    ('05-027-0009', 'Too many for Lukashenko. High turnout. One observer.'),
    ('05-027-0015', 'Too many for Lukashenko. Zubr report.'),
    ('05-029-0021', 'Too many for Lukashenko. Zubr report.'),
    ('05-029-0025', 'Too many for Lukashenko. Zubr report.'),
    ('05-029-0026', 'Too many for Lukashenko. Zubr report.'),
    ('05-029-0027', 'Too many for Lukashenko. Zubr report.'),
    ('05-029-0030', 'Too many for Lukashenko. Zubr report.'),
    ('05-036-0007', 'Too many for Lukashenko. Zubr report.'),
    ('05-036-0001', 'Too many for Lukashenko. Zubr report.'),
    ('05-036-0010', 'Too many for Lukashenko. Zubr report.'),
    ('05-036-0011', 'Too many for Lukashenko. Zubr report.'),
    ('05-036-0020', 'Too many for Lukashenko. Zubr report.'),
    ('05-036-0021', 'Too many for Lukashenko. Zubr report.'),
    ('05-036-0022', 'Too many for Lukashenko. Zubr report.'),
    ('05-036-0027', 'Too many for Lukashenko. Zubr report.'),
    ('05-036-0030', 'Too many for Lukashenko. Zubr report/No observers.'),
    ('05-036-0035', 'Too many for Lukashenko. Zubr report.'),
    ('05-036-0076', 'Too many for Lukashenko. One observer.'),
    ('05-038-0001', 'Too many for Lukashenko. Zubr report.'),
    ('05-038-0014', 'Too many for Lukashenko. Zubr report.'),
    ('05-038-0032', 'Too many for Lukashenko. Zubr report.'),
    ('05-039-0007', 'Too many for Lukashenko. Zubr report.'),
    ('05-039-0009', 'Too many for Lukashenko. Zubr report.'),
    ('05-039-0028', 'Too many for Lukashenko. No observers.'),
    ('05-039-0046', 'Too many for Lukashenko. High turnout. One observer.'),
    ('05-039-0050', 'Too many for Lukashenko. Conflict reported.'),
    ('05-040-0018', 'Too many for Lukashenko. No observers.'),
    ('05-043-0005', 'Too many for Lukashenko. Zubr report.'),
    ('05-044-0002', 'Too many for Lukashenko. High turnout. One observer.'),
    ('05-041-0005', 'Too many for Lukashenko. Zubr report.'),
    ('05-041-0007', 'Too many for Lukashenko. Zubr report.'),
    ('05-041-0022', 'Too many for Lukashenko. Zubr report.'),
    ('05-041-0032', 'Too many for Lukashenko. No observers.'),
    ('05-044-0004', 'Too many for Lukashenko. No observers.'),
    ('05-045-0010', 'Too many for Lukashenko. No observers.'),
    ('05-045-0014', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0001', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0004', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0006', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0012', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0028', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0032', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0034', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0039', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0063', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0068', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0030', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0031', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0033', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0035', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0038', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0047', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0048', 'Too many for Lukashenko. High turnout. One observer.'),
    ('05-141-0060', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0062', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0067', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0071', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0073', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0076', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0082', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0085', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0086', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0088', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0089', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0099', 'Too many for Lukashenko. Zubr report.'),
    ('05-141-0102', 'Too many for Lukashenko. High turnout. One observer.'),
    ('05-141-0106', 'Too many for Lukashenko. Zubr report.'),
    
    ('06-012-0056', 'Too many for Lukashenko. Conflict reported.'),
    ('06-012-0058', 'Too many for Lukashenko. High turnout. One observer.'),
    ('06-012-0085', 'Too many for Lukashenko. No observers.'),
    ('06-012-0067', 'Too many for Lukashenko. Zubr report.'),
    ('06-012-0080', 'Too many for Lukashenko. Zubr report.'),
    ('06-012-0082', 'Too many for Lukashenko. High turnout. One observer.'),
    ('06-012-0083', 'Too many for Lukashenko. High turnout. One observer.'),
    ('06-013-0010', 'Too many for Lukashenko. Zubr report.'),
    ('06-013-0014', 'Too many for Lukashenko. No observers.'),
    ('06-013-0038', 'Too many for Lukashenko. Zubr report.'),
    ('06-121-0006', 'Too many for Lukashenko. High turnout. One observer.'),
    ('06-125-0010', 'Too many for Lukashenko. Zubr report.'),
    ('06-135-0001', 'Too many for Lukashenko. High turnout. One observer.'),
    ('06-135-0020', 'Too many for Lukashenko. High turnout. One observer.'),
    ('06-135-0007', 'Too many for Lukashenko. Zubr report.'),
    
    ('01-047-0030', 'Too many for Lukashenko. Zubr report.'),
    ('01-049-0014', 'Too many for Lukashenko. No signature.'),
    ('01-057-0002', 'Too many for Lukashenko. Zubr report.'),
    ('01-063-0021', 'Too many for Lukashenko. Zubr report.'),
    ('01-011-0075', 'Too many for Lukashenko. Conflict reported.'),
    
    ('02-016-0033', 'Too many for Lukashenko. Late report.'),
    ('04-022-0074', 'Too many for Lukashenko. Zubr report.'),
    ('04-022-0075', 'Too many for Lukashenko. Zubr report.'),
    ('04-112-0054', 'Too many for Lukashenko. No observers.'),
    
    ('07-001-0061', 'Too many for Lukashenko. One observer.'),
    ('07-001-0064', 'Too many for Lukashenko. Zubr report. Conflict.'),
    ('07-001-0088', 'Too many for Lukashenko. Zubr report. Conflict.'),
    ('07-006-0005', 'Too many for Lukashenko. Zubr report.'),
    ('07-006-0012', 'Too many for Lukashenko. Zubr report.'),
    ('07-006-0021', 'Too many for Lukashenko. Zubr report.'),
    ('07-006-0023', 'Too many for Lukashenko. Zubr report.'),
    ('07-006-0035', 'Too many for Lukashenko. Zubr report.'),
    ('07-006-0041', 'Too many for Lukashenko. Zubr report.'),
    ('07-006-0044', 'Too many for Lukashenko. Zubr report.'),
    ('07-006-0048', 'Too many for Lukashenko. Zubr report.'),
    ('07-006-0007', 'Too many for Lukashenko. Zubr report.'),
    ('07-006-0043', 'Too many for Lukashenko. Zubr report.'),
    ('07-006-0059', 'Too many for Lukashenko. Zubr report.'),
    ('07-009-0017', 'Too many for Lukashenko. Zubr report.'),
    ('07-003-0020', 'Too many for Lukashenko. Zubr report.'),
    ('07-002-0043', 'Too many for Lukashenko. Zubr report.'),
    ('07-002-0050', 'Too many for Lukashenko. Zubr report.'),
    ('07-002-0065', 'Too many for Lukashenko. Zubr report.'),
    ('07-003-0010', 'Too many for Lukashenko. Zubr report.'),
    ('07-003-0011', 'Too many for Lukashenko. Zubr report.'),
    ('07-003-0019', 'Too many for Lukashenko. Zubr report.'),
    ('07-003-0021', 'Too many for Lukashenko. Zubr report.'),
    ('07-003-0061', 'Too many for Lukashenko. Zubr report.'),
    ('07-003-0093', 'Too many for Lukashenko. Zubr report.'),
    ('07-004-0018', 'Too many for Lukashenko. Zubr report.'),
    ('07-004-0047', 'Too many for Lukashenko. Zubr report.'),
    ('07-005-0006', 'Too many for Lukashenko. Zubr report.'),
    ('07-005-0014', 'Too many for Lukashenko. Zubr report.'),  
    ('07-006-0070', 'Too many for Lukashenko. Zubr report.'),
    ('07-002-0115', 'Too many for Lukashenko. Zubr report.'),
    
    ('01-055-0017', 'Too few Tihanovkaja. No observers.'),
    ('01-056-0048', 'Too few Tihanovkaja. Conflict reported.'),
    ('02-065-0015', 'Too few Tihanovkaja. No observers.'),
    ('02-069-0006', 'Too few Tihanovkaja. No observers.'),
    ('02-074-0015', 'Too few Tihanovkaja. Zubr report.'),
    ('02-075-0056', 'Too few Tihanovkaja. Zubr report.'),
    ('02-077-0016', 'Too few Tihanovkaja. Zubr report/No observers.'),
    ('03-091-0029', 'Too few Tihanovkaja. Zubr report.'),
    ('03-091-0048', 'Too few Tihanovkaja. No observers.'),
    ('03-096-0004', 'Too few Tihanovkaja. Zubr report.'),
    ('03-100-0045', 'Too few Tihanovkaja. No observers.'),
    ('04-021-0040', 'Too few Tihanovkaja. No observers.'),
    ('04-119-0024', 'Too few Tihanovkaja. No observers.'),
    ('06-024-0050', 'Too few Tihanovkaja. No observers.'),
    ('05-041-0028', 'Too few Tihanovkaja. Zubr report.'),
    ('05-141-0016', 'Too few Tihanovkaja. No observers.'),
    ('05-141-0029', 'Too few Tihanovkaja. Zubr report.'),
    ('05-141-0007', 'Too few Tihanovkaja. Zubr report.'),
    ('05-141-0084', 'Too few Tihanovkaja. Zubr report.'),
    ('06-013-0017', 'Too few Tihanovkaja. Conflict reported.'),
    ('06-135-0009', 'Too few Tihanovkaja. Zubr report.'),
    ('07-001-0078', 'Too few Tihanovkaja. Conflict reported.'),
    ('07-006-0016', 'Too few Tihanovkaja. Zubr report.'),
    ('07-006-0071', 'Too few Tihanovkaja. Zubr report.'),
    
    ('01-011-0007', 'High turnout, many for Lukashenko. Minor zubr report.'),
    ('01-011-0036', 'High turnout, many for Lukashenko. Minor zubr report.'),
    ('01-011-0046', 'High turnout, many for Lukashenko. Minor zubr report.'),
    ('01-011-0076', 'High turnout, many for Lukashenko. Minor zubr report.'),
    ('02-074-0005', 'High turnout, many for Lukashenko. Minor zubr report.'),
    ('02-066-0013', 'High turnout, many for Lukashenko. Minor zubr report.'),
    
    ('04-021-0011', 'High turnout, many for Lukashenko. Minor zubr report.'),
    ('04-022-0068', 'High turnout, many for Lukashenko. Minor zubr report.'),
    ('05-026-0009', 'High turnout, many for Lukashenko. Minor zubr report.'),
    ('06-023-0135', 'High turnout, many for Lukashenko. Minor zubr report.'),
    ('07-001-0059', 'High turnout, many for Lukashenko. Minor zubr report.'),
    ('07-004-0028', 'High turnout, many for Lukashenko. Minor zubr report.'),
    ('07-005-0018', 'High turnout, many for Lukashenko. Minor zubr report.'),
    
    ('01-010-0004', 'High turnout, many for Lukashenko. No zubr report.'),
    ('01-011-0066', 'High turnout, many for Lukashenko. No zubr report.'),
    ('01-011-0088', 'High turnout, many for Lukashenko. No zubr report.'),
    ('01-049-0006', 'High turnout, many for Lukashenko. No zubr report.'),
    ('01-054-0005', 'High turnout, many for Lukashenko. No zubr report.'),
    ('01-054-0009', 'High turnout, many for Lukashenko. No zubr report.'),
    ('02-016-0056', 'High turnout, many for Lukashenko. No zubr report.'),
    ('02-016-0071', 'High turnout, many for Lukashenko. No zubr report.'),
    ('02-075-0002', 'High turnout, many for Lukashenko. No zubr report.'),
    ('03-096-0031', 'High turnout, many for Lukashenko. No zubr report.'),
    ('03-096-0043', 'High turnout, many for Lukashenko. No zubr report.'),
    ('04-022-0082', 'High turnout, many for Lukashenko. No zubr report.'),
    ('04-022-0095', 'High turnout, many for Lukashenko. No zubr report.'),
    ('04-112-0023', 'High turnout, many for Lukashenko. No zubr report.'),
    ('04-119-0011', 'High turnout, many for Lukashenko. No zubr report.'),
    ('04-119-0019', 'High turnout, many for Lukashenko. No zubr report.'),
    ('05-026-0017', 'High turnout, many for Lukashenko. No zubr report.'),
    ('05-038-0003', 'High turnout, many for Lukashenko. No zubr report.'),
    ('06-012-0084', 'High turnout, many for Lukashenko. No zubr report.'),
    ('01-010-0030', 'Trusted. High turnout, many for Lukashenko. No zubr report.'),
    ('05-041-0011', 'Trusted. High turnout, many for Lukashenko. No zubr report.'),
    ('05-041-0026', 'Trusted. High turnout, many for Lukashenko. No zubr report.'),
    
]

In [585]:
df = df[~df['id'].isin([x[0] for x in non_trusted_vp])]

In [586]:
zubr = pd.read_csv('zubr.csv')
df = df.merge(zubr, on='id')

In [587]:
reasons = pd.read_csv('zubr-violation-codes.csv')['reason']

In [588]:
df['major-violations'] = df['non-transparent-counting'] | df['late-report']
df['minor-violations'] = (
    df['accreditation-reject'] | 
    df['let-observer-in-violation'] | 
    df['no-let-observer-in'] |
    df['observer-pushed-away'] |
    df['force-beforehand-voting'] |
    df['home-voting-violation'] |
    df['no-medcine-on-poll-station'] |
    df['other'] |
    df['observer-limitations']    
)


In [589]:
df = df.drop(set(reasons) - {'wrong-voters-number'}, axis=1)

In [590]:
df.to_csv('trusted-for-alternative.csv', index=False)
pd.DataFrame(non_trusted_vp, columns=['id', 'non_trust_reason']).to_csv('outliers.csv', index=False)

In [591]:
draw_chart('tihanovkaja', 'photoVoices', 'Ballot photos for Tsihanouskaya')

In [592]:
draw_chart('tihanovkaja', 'registered', 'People registered on platform')

In [593]:
def tih_photo(area, coeff):
    fig = Figure()
    mask = (df['area'] == area) & (df['observers'] > 0)
    for region in range(1, 9):
        data = df[mask & (df['region'] == region)]
        fig.add_trace(Scatter(
            x=data[~data['wrong-voters-number']]['tihanovkaja_photoVoices'], 
            y=data[~data['wrong-voters-number']]['tihanovkaja_officialVotes'],
            mode='markers',
            name=f'{region} more trusted',
            text=data['id'],
        ))
        fig.add_trace(Scatter(
            x=data[data['wrong-voters-number']]['tihanovkaja_photoVoices'], 
            y=data[data['wrong-voters-number']]['tihanovkaja_officialVotes'],
            mode='markers',
            name=f'{region} less trusted',
            text=data['id'],
        ))
        
    data = df[mask]
        
    fig.add_trace(Scatter(
        x=data['tihanovkaja_photoVoices'], 
        y=data['tihanovkaja_photoVoices'] * coeff,
        mode='lines',
        name=f'{region} exp',
        text=data['id'],
    ))
        
        

    fig.update_xaxes(title='Photos')
    fig.update_yaxes(title='People voted for Tsihanouskaya')


    fig.show()


In [543]:
tih_photo('city', 3.209)

In [71]:
tih_photo('capital', 2.34)

In [164]:
df[df['id'] == '02-074-0004']

Unnamed: 0,id,town,area,latitude,longitude,region,monitoredVotes,registered,photoVoices,officialVotes,...,lukashenko_officialVotes,lukashenko_photoVoices,tihanovkaja_registered,tihanovkaja_officialVotes,tihanovkaja_photoVoices,zubr_id,observers,wrong-voters-number,major-violations,minor-violations
82,02-074-0004,Navapolack,town_over100,55.536701,28.631132,2,,375,218,1824.0,...,946.0,1,360,579.0,209,1910,3,False,False,False


In [183]:
data = df[df['area'].isin(['town_over100', 'town_below100'])]
corr(data['tihanovkaja_registered'], data['tihanovkaja_officialVotes'])

0.7397131543504594

In [184]:
for region in range(1, 7):
    data = df[df['area'].isin(['town_over100', 'town_below100']) & (df['region'] == region)]
    print(corr(data['tihanovkaja_registered'], data['tihanovkaja_officialVotes']))

0.8148024542782372
0.458596105796486
0.8458481076014803
0.7949299473704025
0.6725531574400494
0.16752255695803178


In [185]:
for region in range(1, 7):
    data = df[df['area'].isin(['town_over100']) & (df['region'] == region)]
    print(corr(data['tihanovkaja_registered'], data['tihanovkaja_officialVotes']))

0.9671126139210215
0.8441627396813576
0.7664715652479651
0.8628356345129207
0.8688733037528786
0.18765332873603865


In [186]:
for region in range(1, 7):
    data = df[df['area'].isin(['town_below100']) & (df['region'] == region)]
    print(corr(data['tihanovkaja_registered'], data['tihanovkaja_officialVotes']))

0.8038495262837639
0.29288119961165665
0.8762840180195197
0.7322163059269402
0.6247519098851327
0.06956746137832719


In [187]:
for region in range(1, 7):
    data = df[(df['area'] == 'city') & (df['region'] == region)]
    print(corr(data['tihanovkaja_registered'], data['tihanovkaja_officialVotes']))

0.7798940192198369
0.4732193614434622
nan
0.6823158761379433
nan
0.5910395446906795



Degrees of freedom <= 0 for slice


divide by zero encountered in true_divide


invalid value encountered in multiply


Mean of empty slice.


invalid value encountered in true_divide


Degrees of freedom <= 0 for slice


divide by zero encountered in true_divide


invalid value encountered in multiply



In [278]:
for region in range(1, 7):
    data = df[(df['area'] == 'village') & (df['region'] == region)]
    print(corr(data['tihanovkaja_registered'], data['tihanovkaja_officialVotes']))

0.9127870425101231
-0.14318408144204003
0.8313641431588108
0.9665262010563016
0.8243420306870015
-0.06442117899521313


In [280]:
data = df[(df['area'] == 'village')]
corr(data['tihanovkaja_registered'], data['tihanovkaja_officialVotes'])

0.8199947461573237