In [1]:
import pandas as pd
import numpy as np

from plotly.graph_objects import Scatter, Figure

In [2]:
df = pd.read_csv('trusted_by_voice_extended.csv')

In [3]:
set(df['outlier-reason'])

{'luhashenko-report',
 'lukashenko-high-turnout',
 'lukashenko-high-turnout-one-observer',
 'lukashenko-no-observers',
 'lukashenko-one-observer',
 'lukashenko-report-and-high-turnout',
 'mess-in-report',
 nan,
 'report',
 'tihanovkaja-no-observers',
 'tihanovkaja-report',
 'trusted',
 'trusted-high-turnout-many-for-lukashenko',
 'trusted-many-for-lukashenko'}

In [4]:
candidates = ['against', 'cherechen', 'corrupted', 'dmitriyev', 'kanopatskaja', 'tihanovkaja']

In [5]:
reasons = [
    {
        'name': 'Violation reported',
        'codes': ['luhashenko-report', 'lukashenko-report-and-high-turnout', 'report', 'tihanovkaja-report'],
        'colour': 'red',
    },
    {
        'name': 'No observers',
        'codes': ['tihanovkaja-no-observers', 'lukashenko-no-observers'],
        'colour': 'orange',
    },
    {
        'name': 'One observer',
        'codes': ['lukashenko-high-turnout-one-observer', 'lukashenko-one-observer'],
        'colour': 'darkkhaki',
    },
    {
        'name': 'High turnout',
        'codes': ['lukashenko-high-turnout'],
        'colour': 'violet',
    },
    {
        'name': 'Mess in official data',
        'codes': ['mess-in-report'],
        'colour': 'grey',
    },
    {
        'name': 'Trusted outlier',
        'codes': ['trusted', 'trusted-high-turnout-many-for-lukashenko', 'trusted-many-for-lukashenko'],
        'colour': 'blue',
    },
]

In [6]:
protest_coeff = pd.read_csv('protest_registered_coefficients.csv')
tih_coeff = pd.read_csv('tihanovkaja_registered_coefficients.csv')
protest_areas = pd.read_csv('protest_registered_areas_coefficients.csv')
tih_areas = pd.read_csv('tihanovkaja_registered_areas_coefficients.csv')
tih_original_df = pd.read_csv('tihanovkaja_registered_original_coefficients.csv')
protest_original_df = pd.read_csv('protest_registered_original_coefficients.csv')


In [93]:
regions = {
    1: 'Brest',
    2: 'Viciebsk',
    3: 'Homeĺ',
    4: 'Hrodna',
    5: 'Minsk',
    6: 'Mahilioŭ',
}

def build_chart_info(area, region, source):
    buff = []
    if area is not None:
        buff.append(area)
    if region is not None:
        txt = regions[region]
        if area != 'city':
            txt += ' region'
        buff.append(txt)
    if source is not None and source != 'area-region':
        buff.append('source=%s' % source)
    if area is None and region is None:
        buff.append('Total')
    
    return ', '.join(buff)

def build_chart_data(source, coeff, orig_coeff, corr, orig_corr):
    def _inlarge(x, orig):
        return '{} ({:.3f})'.format('?' if orig is None or np.isnan(orig) else ('%.3f' % orig), x)
    
    if source == 'area-region' or source is None:
        return 'k={:.3f}, r={:.3f}'.format(coeff, corr)
    
    return 'k={}, r={}'.format(_inlarge(coeff, orig_coeff), _inlarge(corr, orig_corr))

In [50]:
def tih_chart(area=None, region=None):
    mask = np.array([True] * len(df))
    if area:
        mask &= (df['area'] == area)
    if region:
        mask &= (df['region'] == region)
        
    #mask = mask & (df['id'] == '07-006-0071')
    
    fig = Figure(layout_title='Outliers')

    
    for reason in reasons:
        data = df[mask & df['outlier-reason'].isin(reason['codes'])]
        
        fig.add_trace(Scatter(
            x=data[f'tihanovkaja_registered'], 
            y=data[f'tihanovkaja_officialVotes'],
            mode='markers',
            name=reason['name'],
            text=data['id'],
            marker={
                'color': reason['colour'],
            },
        ))        
        
    data = df[mask & df['outlier-reason'].isna()]
    
    fig.add_trace(Scatter(
        x=data[f'tihanovkaja_registered'], 
        y=data[f'tihanovkaja_officialVotes'],
        mode='markers',
        name='Trusted',
        text=data['id'],
        marker={
            'color': 'greenyellow',
            'size': 10,
            'line': {'width': 2},
        }
    ))
    
    if area and region:
        coeff, source = tih_coeff.loc[
            (tih_coeff['area'] == area) & (tih_coeff['region'] == region), 
            ['coefficient', 'source']
        ].values[0]
        
        print(source)
        
        data = df[mask]

        fig.add_trace(Scatter(
            x=[0] + list(data[f'tihanovkaja_registered']), 
            y=[0] + list(data[f'tihanovkaja_registered'] * coeff),
            mode='lines',
            name='Expected',
            text=data['id'],
        ))
        
    
    fig.update_xaxes(rangemode='tozero')
    fig.update_yaxes(rangemode='tozero')
    fig.show()
        
        
            
    
    
    

In [113]:
tih_chart('town_below100', 1)

region


In [134]:
def protest_chart(area=None, region=None):
    mask = np.array([True] * len(df))
    if area:
        mask &= (df['area'] == area)
        
    if region:
        mask &= (df['region'] == region)
        
    if area is None and region == 5:
        mask &= (df['area'] != 'minsk_suburb')
    
    fig = Figure()
    
    fixed_df = df.copy()
    fixed_df.loc[fixed_df['corrupted_officialVotes'].isna(), 'corrupted_officialVotes'] = 10
    
    for reason in reasons:
        data = fixed_df[mask & df['outlier-reason'].isin(reason['codes'])]
        
        fig.add_trace(Scatter(
            x=data['registered'], 
            y=sum(data[f'{c}_officialVotes'] for c in candidates),
            mode='markers',
            name=reason['name'],
            text=data['id'],
            marker={'color': reason['colour']}
        ))        
        
    data = fixed_df[mask & df['outlier-reason'].isna()]
    
    fig.add_trace(Scatter(
        x=data[f'registered'], 
        y=sum(data[f'{c}_officialVotes'] for c in candidates),
        mode='markers',
        name='Trusted',
        text=data['id'],
        marker={
            'color': 'limegreen',
            'size': 10 if area or region else 6,
            'line': {'width': 1},
        }
    ))
    
    source = None
    orig_coeff = None
    orig_corr = None
    
    if area and region:
        coeff, source, corr = protest_coeff.loc[
            (protest_coeff['area'] == area) & (protest_coeff['region'] == region), 
            ['coefficient', 'source', 'correlation']
        ].values[0]
        orig_coeff, orig_corr = protest_original_df[
            (protest_original_df['area'] == area) &
            (protest_original_df['region'] == region)
        ][['coefficient', 'correlation']].values[0]
        
        
    elif area is not None:
        coeff, corr = protest_areas[protest_areas['area'] == area][['coefficient', 'correlation']].values[0]
        
    elif region is not None:
        coeff, corr = protest_areas[protest_areas['region'] == region][['coefficient', 'correlation']].values[0]
        
    else:
        coeff, corr = protest_areas[protest_areas['source'] == 'total'][['coefficient', 'correlation']].values[0]
        
    data = fixed_df[mask]

    fig.add_trace(Scatter(
        x=[0] + list(data['registered']), 
        y=[0] + list(data['registered'] * coeff),
        mode='lines',
        name='Expected',
        text=data['id'],
        line={'color': 'lightgreen'},
    ))
        
    fig.update_xaxes(rangemode='tozero', title='registered')
    fig.update_yaxes(rangemode='tozero', title='official votes')
    
    fig.update_layout(
        title='Against Lukashenko<br>{chart_info}<br>{data}'.format(
            coeff=coeff, 
            corr=corr, 
            data=build_chart_data(source, coeff, orig_coeff, corr, orig_corr),
            chart_info=build_chart_info(area, region, source)
        )
    )
    fig.show()
        
        
            
    
    
    

In [125]:
protest_chart('village', 6)

In [142]:
protest_chart('embassy', None)