In [59]:
import pandas as pd
import numpy as np

from plotly.graph_objects import Scatter, Figure

In [60]:
df = pd.read_csv('trusted_by_voice_extended.csv')

In [61]:
set(df['outlier-reason'])

{'alternative-candidates',
 'luhashenko-report',
 'lukashenko-high-turnout',
 'lukashenko-high-turnout-one-observer',
 'lukashenko-no-observers',
 'lukashenko-one-observer',
 'lukashenko-report-and-high-turnout',
 'mess-in-report',
 nan,
 'report',
 'tihanovkaja-no-observers',
 'tihanovkaja-report',
 'trusted',
 'trusted-high-turnout-many-for-lukashenko',
 'trusted-many-for-lukashenko'}

In [125]:
candidates = ['against', 'cherechen', 'corrupted', 'dmitriyev', 'kanopatskaja', 'tihanovkaja']

In [62]:
reasons = {
    'Alternative candidates related': ['alternative-candidates'],
    'Violation reported': [
        'luhashenko-report', 'lukashenko-report-and-high-turnout', 'report', 'tihanovkaja-report'
    ],
    'No observers': ['tihanovkaja-no-observers', 'lukashenko-no-observers'],
    'Many for Lukashenko and one observer': ['lukashenko-high-turnout-one-observer', 'lukashenko-one-observer'],
    'Many for Lukashenko and high turnout': ['lukashenko-high-turnout'],
    'Mess in official data': ['mess-in-report'],
    'Trusted outlier': ['trusted', 'trusted-high-turnout-many-for-lukashenko', 'trusted-many-for-lukashenko'],
}

In [63]:
protest_coeff = pd.read_csv('protest_registered_coefficients.csv')
tih_coeff = pd.read_csv('tihanovkaja_registered_coefficients.csv')


In [144]:
def tih_chart(area=None, region=None):
    mask = np.array([True] * len(df))
    if area:
        mask &= (df['area'] == area)
    if region:
        mask &= (df['region'] == region)
        
    #mask = mask & (df['id'] == '07-006-0071')
    
    fig = Figure(layout_title='Outliers')

    
    for reason, codes in reasons.items():
        data = df[mask & df['outlier-reason'].isin(codes)]
        
        fig.add_trace(Scatter(
            x=data[f'tihanovkaja_registered'], 
            y=data[f'tihanovkaja_officialVotes'],
            mode='markers',
            name=reason,
            text=data['id'],
        ))        
        
    data = df[mask & df['outlier-reason'].isna()]
    
    fig.add_trace(Scatter(
        x=data[f'tihanovkaja_registered'], 
        y=data[f'tihanovkaja_officialVotes'],
        mode='markers',
        name='Trusted',
        text=data['id'],
        marker={
            'color': 'orange',
            'size': 10,
            'line': {'width': 2},
        }
    ))
    
    if area and region:
        coeff, source = tih_coeff.loc[
            (tih_coeff['area'] == area) & (tih_coeff['region'] == region), 
            ['coefficient', 'source']
        ].values[0]
        
        print(source)
        
        data = df[mask]

        fig.add_trace(Scatter(
            x=data[f'tihanovkaja_registered'], 
            y=data[f'tihanovkaja_registered'] * coeff,
            mode='lines',
            name='Expected',
            text=data['id'],
            fillcolor='orange',
        ))
        
    
    fig.show()
        
        
            
    
    
    

In [152]:
tih_chart('town_over100', 3)

region


In [87]:
# review: 
# town_below100: 01-054-0001, 03-091-0024, 04-112-0070, 04-106-0011, 04-119-0007, 04-118-019
# village - 1: 01-042-0002, 01-056-0021, 01-063-0049, 01-054-0026, 01-055-0017
# village - 2: 02-065-0015, 02-077-0016, 02-069-0006, 02-069-0005 
# village - 3: 03-088-0027, 03-091-0048, 03-100-0057, 03-091-0036
# village - 5: 05-045-0014, 05-041-0028, 05-039-0028
# too many for tih: 06-012-0081, 06-013-0021
# too few for tih: 01-056-0017, 02-074-0004, 06-135-0025
# too few for tih: 02-076-0008 & 02-077-0008
# -------------Revised-------
# city: 01-010-0036, 02-016-0012, 02-016-0059
# capital: 07-006-0071, 07-003-0061, 07-004-0018
# town_over100: 02-074-0014, 02-074-0015, 02-074-0023, 03-096-0026, 03-096-0041, 03-096-0009


# --- Suspected ---
# 02-021-0044, 04-021-0008, 04-022-0067
# 02-015-0001, 

In [123]:
def protest_chart(area=None, region=None):
    mask = np.array([True] * len(df))
    if area:
        mask &= (df['area'] == area)
    if region:
        mask &= (df['region'] == region)
    
    fig = Figure(layout_title='Outliers')
    
    fixed_df = df.copy()
    fixed_df.loc[fixed_df['corrupted_officialVotes'].isna(), 'corrupted_officialVotes'] = 10
    
    for reason, codes in reasons.items():
        data = fixed_df[mask & df['outlier-reason'].isin(codes)]
        
        fig.add_trace(Scatter(
            x=data['registered'], 
            y=sum(data[f'{c}_officialVotes'] for c in candidates),
            mode='markers',
            name=reason,
            text=data['id'],
        ))        
        
    data = fixed_df[mask & df['outlier-reason'].isna()]
    
    fig.add_trace(Scatter(
        x=data[f'registered'], 
        y=sum(data[f'{c}_officialVotes'] for c in candidates),
        mode='markers',
        name='Trusted',
        text=data['id'],
        marker={
            'color': 'orange',
            'size': 10,
            'line': {'width': 2},
        }
    ))
    
    if area and region:
        coeff, source = tih_coeff.loc[
            (protest_coeff['area'] == area) & (protest_coeff['region'] == region), 
            ['coefficient', 'source']
        ].values[0]
        
        print(source)
        
        data = fixed_df[mask]

        fig.add_trace(Scatter(
            x=data['registered'], 
            y=data['registered'] * coeff,
            mode='lines',
            name='Expected',
            text=data['id'],
            fillcolor='orange',
        ))
        
    
    fig.show()
        
        
            
    
    
    

In [153]:
protest_chart('town_over100', 4)

region
