In [581]:
import pandas as pd
import numpy as np
from plotly.graph_objects import Scatter, Figure

In [582]:
non_trusted_turnout_by_report = [
    '01-010-0031', '01-010-0035', '01-011-0042', '01-055-0003', '01-055-0004',
    '02-065-0001', '02-077-0001', '02-082-0020',
    '04-022-0107', 
    '03-096-0008', '03-091-0026',
    '04-106-0001',
    '05-040-0019', '05-141-0096', '05-141-0098', '05-046-0022',
    '06-013-0021', '06-013-0048', '06-129-0001',
    '07-001-0005', '07-005-0007', '07-005-0034', '07-004-0005',  
]

non_trusted_turnout_no_observer = [
    '01-010-0040', '01-011-0032', '01-060-0059', '01-062-0009', '01-053-0021', '01-142-0002',
    '02-072-0020',
    '03-101-0011', '03-085-0009', '03-088-0027', '03-101-0036',
    '04-022-0081', '04-022-0085', '04-022-0121', '04-022-0135', '04-112-0002', '04-112-0070',
    '04-115-0003', '04-119-0007', '04-117-0011',
    '05-026-0066', '05-027-0007', '05-141-037', '05-037-0016', '05-039-0028', 
    '05-042-0041', '05-042-0050', '05-045-0012',
    '06-135-0002',
]

non_trusted_turnout_by_major_violation = [
    '01-010-0002', 
    '07-003-0094', 
    '04-112-0006', '04-119-0029',
    '05-141-0106', '05-029-0025', '05-043-0005',
]

non_trusted_turnout_by_minal_violation = [
    '01-010-0006', '01-010-0015', '01-010-0025', '01-010-0032', '01-010-0034', '01-011-0038',
    '01-011-0047', '01-011-0015', '01-010-0012',
    '01-049-0013', '01-056-0005', '01-056-0007',
    '02-143-0019', 
    '03-096-0023', '03-096-0045', '03-100-0014',
    '04-118-0019', '04-105-0011',
    '05-026-0014', '05-026-0022', '05-038-0004', '05-044-0005', '05-040-0028', '05-141-0076',
    '05-027-0018', '05-034-0014', '05-036-0039', '05-039-0005', '05-040-0063', '05-042-0062', 
    '05-045-0018', '05-141-0031', '05-141-0033', '05-141-0060',
    '06-146-0035', '06-013-0020', '06-013-0038',
    '07-001-0089', '07-003-0024', '07-005-0037', '07-006-0008', '07-009-0014', 
]


non_trusted_turnout_by_outlier = [
    '07-003-0058', '07-002-0072', '07-005-0002', '07-005-0026', '07-001-0016', '07-002-0017',
    '07-004-0002', '07-004-0004', '07-004-0006', '07-004-0007', '07-004-0026',
    '01-010-0010', '01-010-0011', '01-010-0027', '01-011-0049', '01-011-0063', '01-011-0079', '01-010-0008',
    '01-056-0017',
    '02-143-0016', '02-016-0056', '02-014-0005', '02-015-0001', '02-015-0026', '02-143-0022',
    '02-015-0045', '02-016-0059', '02-075-0019', '02-075-0051',
    '03-096-0031', '03-087-0009', '03-101-0015', '03-091-0015', '03-096-0036',
    '04-112-0029', '04-021-0044', '02-021-0022', '04-022-0134', '04-117-0014', '04-112-0044',
    '05-026-0017', '05-039-0012', '05-027-0023', '05-042-0056',
    '06-024-0010', '06-146-0024',
]

non_trusted_turnout = (
    non_trusted_turnout_by_report + 
    non_trusted_turnout_no_observer + 
    non_trusted_turnout_by_major_violation +
    non_trusted_turnout_by_minal_violation + 
    non_trusted_turnout_by_outlier
)

In [583]:
non_trust_turnout_df = pd.DataFrame({
    'id': non_trusted_turnout,
    'reason': (
        ['zubr-report'] * len(non_trusted_turnout_by_report) + 
        ['no-observer'] * len(non_trusted_turnout_no_observer) + 
        ['major-violation'] * len(non_trusted_turnout_by_major_violation) + 
        ['minor-violation'] * len(non_trusted_turnout_by_minal_violation) + 
        ['outlier'] * len(non_trusted_turnout_by_outlier) 
    ),
})

non_trust_turnout_df.to_csv('non_trusted_turnout.csv', index=False)

In [584]:
df = pd.read_csv('trusted-for-alternative-fixed.csv')
areas = ['city', 'minsk_suburb', 'minsk_village', 'capital', 'village', 'town_below100', 'town_over100', 'embassy']
candidates = ['against', 'cherechen', 'corrupted', 'dmitriyev', 'kanopatskaja', 'tihanovkaja']

In [585]:
df = df[~df['id'].isin(non_trusted_turnout)]

In [586]:
def draw_chart(area):
    fig = Figure(layout_title='Turnout by area')
    mask = df['area'] == area

    for region in range(1, 9):
        data = df[mask & (df['region'] == region)]
        
        fig.add_trace(Scatter(
            x=data.index, 
            y=data['officialVotes'] / data['officialVoters'],
            mode='markers',
            name=f'Turnout {region}',
            text=[f'{a} ({b})' for a, b in zip(data['id'], data['zubr_id'])],
        ))
        
    
    data = df[mask]
    mean = (data['officialVotes'] / data['officialVoters']).mean()
    
    fig.add_trace(Scatter(
        x=data.index, 
        y=[mean] * len(data),
        mode='lines',
        name=f'avg',
        text=[f'{a} ({b})' for a, b in zip(data['id'], data['zubr_id'])],
    ))
        
    fig.update_xaxes(title='Poll stations')
    fig.update_yaxes(title='Turnout')


    fig.show()


In [587]:
draw_chart('capital')

In [588]:
def add_trace(fig, data, name):
    fig.add_trace(Scatter(
        x=data.index, 
        y=data['officialVotes'] / data['officialVoters'],
        mode='markers',
        name=name,
        text=[
            f'{a} ({b}) {c:.4}' 
            for a, b, c in 
            zip(
                data['id'], 
                data['zubr_id'],
                (
                    (data['officialVotes'] - data['lukashenko_officialVotes']) / 
                    (data['registered'] - data['lukashenko_registered'] - data['ignore_registered'])
                )
            )
        ],
    ))
    

def turnout_detailed(area, region):
    fig = Figure(layout_title='Turnout by area')
    mask = (df['area'] == area) & (df['region'] == region)
    
    add_trace(fig, df[mask & df['wrong-voters-number']], 'wrong voters')
    add_trace(fig, df[mask & ~df['wrong-voters-number'] & (df['observers'] == 0)], 'no-observers')
    add_trace(fig, df[mask & ~df['wrong-voters-number'] & df['major-violations'] & (df['observers'] > 0)], 'major')
    add_trace(fig, df[mask & ~df['wrong-voters-number'] & ~df['major-violations'] & df['minor-violations'] & (df['observers'] > 0)], 'minor')
    add_trace(fig, df[mask & ~df['wrong-voters-number'] & ~df['major-violations'] & ~df['minor-violations'] & (df['observers'] > 0)], 'ok')

    
    data = df[mask]
    fig.add_trace(Scatter(
        x=data.index, 
        y=[(data['officialVotes'] / data['officialVoters']).mean()] * len(data),
        mode='lines',
        name='avg',
        text=[
            f'{a} ({b})' 
            for a, b in 
            zip(
                data['id'], 
                data['zubr_id'],
            )
        ],
    ))
    
    fig.update_xaxes(title='Poll stations')
    fig.update_yaxes(title='Turnout')


    fig.show()

In [589]:
turnout_detailed('city', 6)

In [590]:
turnout_detailed('capital', 7)

In [591]:
turnout_detailed('town_below100', 3)

In [592]:
def tih_to_lu(area):
    fig = Figure(layout_title='Tih to Lu')
    mask = df['area'] == area

    for region in range(1, 9):
        data = df[mask & (df['region'] == region)]
        
        fig.add_trace(Scatter(
            x=data.index, 
            y=data['tihanovkaja_officialVotes'] / data['lukashenko_officialVotes'],
            mode='markers',
            name=f'T/L {region}',
            text=[f'{a} ({b})' for a, b in zip(data['id'], data['zubr_id'])],
        ))
        
    fig.update_xaxes(title='T/L')
    fig.update_yaxes(title='Poll station')


    fig.show()

In [593]:
tih_to_lu('city')

In [594]:
turnouts = []

for area in set(areas) - {'city'}:
    data = df[df['area'] == area]
    turnout = (data['officialVotes'] / data['officialVoters']).mean()
    turnouts.append({
        'area': area,
        'turnout': turnout,
        'region': '',
    })
    
for region in range(1, 7):
    if region == 5:
        continue
        
    data = df[(df['area'] == 'city') & (df['region'] == region)]
    turnout = (data['officialVotes'] / data['officialVoters']).mean()
    turnouts.append({
        'area': 'city',
        'turnout': turnout,
        'region': region,
    })

In [595]:
ts_df = pd.DataFrame(turnouts, columns=['area', 'region', 'turnout'])

In [596]:
ts_df.loc[
    (ts_df['region'] == 3) & (ts_df['area'] == 'city'), 
    'turnout'
] = ts_df[(ts_df['region'].isin([2, 3, 6])) & (ts_df['area'] == 'city')]['turnout'].mean()

In [597]:
ts_df.to_csv('turnout.csv', float_format='%.3f', index=False)