In [706]:
from plotly.graph_objects import Scatter, Figure
import pandas as pd
import numpy as np

In [707]:
df = pd.read_csv('trusted-for-alternative-fixed.csv')
zubr_observers = dict(pd.read_csv('zubr-observers.csv')[['id', 'observers']].values)
protest_coeff = pd.read_csv('protest_registered_coefficients.csv')
tih_coeff = pd.read_csv('tihanovkaja_registered_coefficients.csv')
df['outlier-reason'] = None

In [708]:
areas = ['city', 'minsk_suburb', 'capital', 'village', 'town_below100', 'town_over100', 'embassy']
candidates = ['against', 'cherechen', 'corrupted', 'dmitriyev', 'kanopatskaja', 'tihanovkaja']

In [709]:
def add_exp_off_trace(fig, data, coeff, name):
    protest_expected = data['registered'] * coeff
    protest_official = sum(data[f'{c}_officialVotes'] for c in candidates)  
    obs = [zubr_observers[x] for x in data['id']]

    fig.add_trace(Scatter(
        x=protest_expected, 
        y=protest_official,
        mode='markers',
        name=name,
        text=[
            f'{ps_id} ({zubr_id}) {ob}' 
            for ps_id, zubr_id, ob in zip(data['id'], data['zubr_id'], obs)
        ]
    ))


def expected_vs_official_detailed(area, region):
    fig = Figure(layout_title=f'Expected protest vs Official ({area} - {region})')

    mask = (df['area'] == area) & (df['region'] == region)  
    coeff, source = protest_coeff.loc[
        (protest_coeff['area'] == area) & (protest_coeff['region'] == region), ['coefficient', 'source']
    ].values[0]
    print(coeff, source)
    
    
    
    add_exp_off_trace(
        fig, 
        df[mask & ~df['outlier-reason'].isna()], 
        coeff, 
        'excluded'
    )
    add_exp_off_trace(
        fig, 
        df[mask & df['outlier-reason'].isna() & df['wrong-voters-number']], 
        coeff, 
        'wrong voters'
    )
    add_exp_off_trace(
        fig, 
        df[mask & df['outlier-reason'].isna() & ~df['wrong-voters-number'] & (df['observers'] == 0)], 
        coeff, 
        'no-observers'
    )
    add_exp_off_trace(
        fig, 
        df[mask & df['outlier-reason'].isna() & ~df['wrong-voters-number'] & df['major-violations'] & (df['observers'] > 0)], 
        coeff, 
        'major'
    )
    add_exp_off_trace(
        fig, 
        df[mask & df['outlier-reason'].isna() & ~df['wrong-voters-number'] & ~df['major-violations'] & df['minor-violations'] & (df['observers'] > 0)], 
        coeff, 
        'minor'
    )
    add_exp_off_trace(
        fig, 
        df[mask & df['outlier-reason'].isna() & ~df['wrong-voters-number'] & ~df['major-violations'] & ~df['minor-violations'] & (df['observers'] > 0)], 
        coeff,
        'ok'
    )

    data = df[mask]     
    protest_expected = data['registered'] * coeff
        
    fig.add_trace(Scatter(
        x=protest_expected, 
        y=protest_expected,
        mode='lines',
        name=f'Expected',    
    ))

    fig.update_xaxes(title='Expected')
    fig.update_yaxes(title='Official')


    fig.show()

In [713]:
expected_vs_official_detailed('village', 1)

3.784 region


In [711]:
def add_tih_trace(fig, data, coeff, name):
    tih_expected = data['tihanovkaja_registered'] * coeff
    tih_official = data['tihanovkaja_officialVotes']
    obs = [zubr_observers[x] for x in data['id']]

    fig.add_trace(Scatter(
        x=tih_expected, 
        y=tih_official,
        mode='markers',
        name=name,
        text=[
            f'{ps_id} ({zubr_id}) {ob}' 
            for ps_id, zubr_id, ob in zip(data['id'], data['zubr_id'], obs)
        ]
    ))


def tih_detailed(area, region):
    fig = Figure(layout_title=f'Tihanovkaja Expected vs Official ({area} - {region})')
    
    coeff, source = tih_coeff.loc[
        (protest_coeff['area'] == area) & (protest_coeff['region'] == region), ['coefficient', 'source']
    ].values[0]
    print(coeff, source)
    
    area_mask = (df['area'] == area)    
    region_mask = (df['region'] == region)  
    mask = area_mask & region_mask    
    
    add_tih_trace(
        fig, 
        df[mask & ~df['outlier-reason'].isna()], 
        coeff, 
        'excluded'
    )
    add_tih_trace(
        fig, 
        df[mask & df['outlier-reason'].isna() & df['wrong-voters-number']], 
        coeff, 
        'wrong voters'
    )
    add_tih_trace(
        fig, 
        df[mask & df['outlier-reason'].isna() & ~df['wrong-voters-number'] & (df['observers'] == 0)], 
        coeff, 
        'no-observers'
    )
    add_tih_trace(
        fig, 
        df[mask & df['outlier-reason'].isna() & ~df['wrong-voters-number'] & df['major-violations'] & (df['observers'] > 0)], 
        coeff, 
        'major'
    )
    add_tih_trace(
        fig, 
        df[mask & df['outlier-reason'].isna() & ~df['wrong-voters-number'] & ~df['major-violations'] & df['minor-violations'] & (df['observers'] > 0)], 
        coeff, 
        'minor'
    )
    add_tih_trace(
        fig, 
        df[mask & df['outlier-reason'].isna() & ~df['wrong-voters-number'] & ~df['major-violations'] & ~df['minor-violations'] & (df['observers'] > 0)], 
        coeff,
        'ok'
    )

    data = df[mask]        
    tih_expected = data['tihanovkaja_registered'] * coeff
        
    fig.add_trace(Scatter(
        x=tih_expected, 
        y=tih_expected,
        mode='lines',
        name=f'Expected',    
    ))

    fig.update_xaxes(title='Expected')
    fig.update_yaxes(title='Official')


    fig.show()

In [712]:
tih_detailed('town_below100', 5)

2.818 total
