In [38]:
import pandas as pd
import numpy as np
from decimal import Decimal

from plotly.graph_objects import Scatter, Figure

In [39]:
df = pd.read_csv('trusted-for-alternative.csv', converters={'latitude': Decimal, 'longitude': Decimal})

In [40]:
areas = ['city', 'minsk_suburb', 'capital', 'village', 'town_below100', 'town_over100', 'embassy']
candidates = ['against', 'cherechen', 'corrupted', 'dmitriyev', 'kanopatskaja', 'tihanovkaja']
alt_candidates = ['against', 'cherechen', 'corrupted', 'dmitriyev', 'kanopatskaja']

In [41]:
def official_candidates(data):
    return _generic_candidates_sum(data, 'officialVotes', candidates)


def photo_candidates(data):
    return _generic_candidates_sum(data, 'photoVoices', candidates)


def official_alt_candidates(data):
    return _generic_candidates_sum(data, 'officialVotes', alt_candidates)


def photo_alt_candidates(data):
    return _generic_candidates_sum(data, 'photoVoices', alt_candidates)

def registered_alt_candidates(data):
    return _generic_candidates_sum(data, 'registered', alt_candidates)


def _generic_candidates_sum(data, source, candidates_to_sum):
    return sum([data[f'{candidate}_{source}'] for candidate in candidates_to_sum])

In [42]:
fig = Figure()

for area in areas:
    data = df[df['area'] == area]
    
    X = data['registered'] - data['lukashenko_registered'] - data['ignore_registered'] 
    Y = official_candidates(data)
    
    fig.add_trace(Scatter(
        x=X, 
        y=Y,
        mode='markers',
        name=area,
        text=data['id'],
    ))

fig.update_xaxes(title='Registered against Lukashenko')
fig.update_yaxes(title='Voted against Lukashenko')
fig.show()    

In [43]:
fig = Figure()

for area in areas:
    data = df[df['area'] == area]
    
    X = data['photoVoices'] - data['lukashenko_photoVoices'] 
    Y = sum([data[f'{candidate}_officialVotes'] for candidate in candidates])
    
    fig.add_trace(Scatter(
        x=X, 
        y=Y,
        mode='markers',
        name=area,
        text=data['id'],
    ))

fig.update_xaxes(title='Photos against Lukashenko')
fig.update_yaxes(title='Voted against Lukashenko')
fig.show() 

In [44]:
def draw_part_of_alt_reduced(candidate):
    _draw_part_of_alt_chart(candidate, df)
    
def draw_part_of_alt_fixed(candidate):
    _draw_part_of_alt_chart(candidate, fixed_df)
    
def _draw_part_of_alt_chart(candidate, df):
    fig = Figure()

    for area in areas:
        data = df[(df['area'] == area) & ~df['corrupted_officialVotes'].isna()]

        X = np.arange(len(data))
        Y = data[f'{candidate}_officialVotes'] / official_alt_candidates(data)

        fig.add_trace(Scatter(
            x=X, 
            y=Y,
            mode='markers',
            name=area,
            text=data['id'],
        ))

    fig.update_xaxes(title='poll stations')
    fig.update_yaxes(title=candidate)
    fig.show()    

In [45]:
draw_part_of_alt_reduced('dmitriyev')

In [49]:
draw_part_of_alt_reduced('against')

In [50]:
draw_part_of_alt_reduced('kanopatskaja')

In [48]:
draw_part_of_alt_fixed('cherechen')

In [204]:
draw_part_of_alt_reduced('corrupted')

In [205]:
mask = ~df['corrupted_officialVotes'].isna()


for area in areas:
    data = df[mask & (df['area'] == area)]
    corrupted_distribution_alt = data[f'corrupted_officialVotes'] / official_alt_candidates(data)
    print(np.std(corrupted_distribution_alt) / np.mean(corrupted_distribution_alt))

0.6070670734350884
0.5445337957637728
0.790368206365839
0.566727561176249
0.7489734691949281
0.7839281405370749
0.9985504073344642


In [206]:
mask = ~df['corrupted_officialVotes'].isna()


for area in areas:
    data = df[mask & (df['area'] == area)]
    corrupted_distribution_cand = data[f'corrupted_officialVotes'] / official_candidates(data)
    print(np.std(corrupted_distribution_cand) / np.mean(corrupted_distribution_cand))

0.865348684222977
0.6202694181846742
0.9655897918084013
0.819291503335662
1.2545298100242326
1.0008101303694217
0.9300051618832743


In [207]:
corrupted_coefficients = {}

for area in areas:
    data = df[~df['corrupted_officialVotes'].isna() & (df['area'] == area)]
    corrupted_coefficients[area] = np.mean(data[f'corrupted_officialVotes'] / official_alt_candidates(data))

In [208]:
corrupted_coefficients

{'city': 0.07681783584114212,
 'suburb': 0.09964163286690868,
 'capital': 0.13774958531776516,
 'village': 0.10273329205116738,
 'town_below100': 0.08865894186789253,
 'town_over100': 0.09839192306590416,
 'embassy': 0.2553197480026749}

In [209]:
pd.DataFrame(corrupted_coefficients.items(), columns=['area', 'corrupted_coefficient']).to_csv('corrupted_coefficients.csv', float_format='%.3f', index=False)

In [210]:
fixed_df = df.copy()

In [211]:
def calculate_corrupted(coeff, alt_no_corrupted):
    return alt_no_corrupted * (coeff / (1 - coeff))

In [212]:
corrupted_mask = fixed_df['corrupted_officialVotes'].isna()

In [213]:
coeffs = [corrupted_coefficients[x] for x in fixed_df[corrupted_mask]['area']]
alt_no_corrs = _generic_candidates_sum(fixed_df[corrupted_mask], 'officialVotes', set(alt_candidates) - {'corrupted'})

In [214]:
fixed_df.loc[corrupted_mask, 'corrupted_officialVotes'] = [calculate_corrupted(coeff, no_curr) for coeff, no_curr in zip(coeffs, alt_no_corrs)]

In [215]:
fixed_df['corrupted_officialVotes']

0       5.000000
1      13.000000
2      15.000000
3      16.974806
4      25.212580
         ...    
805    57.000000
806    16.000000
807    12.780471
808     9.000000
809    17.000000
Name: corrupted_officialVotes, Length: 810, dtype: float64

In [216]:
fixed_df.to_csv('trusted-for-alternative-fixed.csv', index=False)

In [19]:
sum(df['corrupted_officialVotes'].isna() & (df['id'] == '02-078-0008'))

1

In [54]:
fixed_df = df.copy()
fixed_df.loc[fixed_df['corrupted_officialVotes'].isna(), 'corrupted_officialVotes'] = 0

overall_sum = sum(fixed_df[f'{c}_officialVotes'] for c in (candidates + ['lukashenko']))

irregular_report_mask = (
    overall_sum != 
    fixed_df['officialVotes']
)

fixed_df[irregular_report_mask][
    ['id', 'officialVotes'] + [f'{c}_officialVotes' for c in (candidates + ['lukashenko'])]
]

Unnamed: 0,id,officialVotes,against_officialVotes,cherechen_officialVotes,corrupted_officialVotes,dmitriyev_officialVotes,kanopatskaja_officialVotes,tihanovkaja_officialVotes,lukashenko_officialVotes
10,01-010-0015,911.0,52.0,21.0,4.0,21.0,9.0,410.0,393.0
41,01-011-0049,1919.0,121.0,35.0,17.0,28.0,19.0,1055.0,661.0
49,01-011-0079,2152.0,107.0,25.0,11.0,49.0,45.0,571.0,1342.0
51,01-011-0087,1940.0,164.0,34.0,22.0,43.0,22.0,1155.0,499.0
113,02-016-0056,2079.0,77.0,23.0,5.0,28.0,15.0,504.0,1414.0
129,02-071-0015,459.0,11.0,5.0,4.0,9.0,8.0,41.0,383.0
146,02-075-0013,1488.0,30.0,14.0,12.0,25.0,22.0,135.0,1249.0
181,03-091-0018,711.0,52.0,12.0,6.0,23.0,5.0,389.0,227.0
197,03-096-0004,2373.0,133.0,22.0,25.0,9.0,3.0,597.0,1583.0
208,03-096-0058,1846.0,29.0,14.0,17.0,11.0,20.0,573.0,1184.0


In [55]:
diff = fixed_df['officialVotes'] - overall_sum

In [61]:
fixed_df[diff != 0][['corrupted_officialVotes', 'against_officialVotes']]

Unnamed: 0,corrupted_officialVotes,against_officialVotes
10,4.0,52.0
41,17.0,121.0
49,11.0,107.0
51,22.0,164.0
113,5.0,77.0
129,4.0,11.0
146,12.0,30.0
181,6.0,52.0
197,25.0,133.0
208,17.0,29.0


In [64]:
fixed_df[(diff != 0) & (fixed_df['corrupted_officialVotes'] == 0)]['id']

382    05-036-0022
389    05-036-0043
465    05-141-0030
485    05-141-0056
539    06-013-0029
545    06-121-0006
552    06-135-0007
563    07-001-0010
Name: id, dtype: object

In [66]:
diff[diff != 0]

10      1.0
41    -17.0
49      2.0
51      1.0
113    13.0
129    -2.0
146     1.0
181    -3.0
197     1.0
208    -2.0
231     3.0
256     2.0
268    -4.0
382     7.0
389    15.0
408    14.0
424     1.0
444   -12.0
461     2.0
465    25.0
485     2.0
499     9.0
510     2.0
514     6.0
539     3.0
544   -15.0
545     5.0
552     3.0
563    12.0
568    -5.0
569    -8.0
571    10.0
573    42.0
595     1.0
598     1.0
605   -24.0
612     2.0
630    -5.0
631   -60.0
633   -20.0
dtype: float64

In [70]:
fixed_df[fixed_df['id'] == '06-121-0006'][['officialVotes', 'against_officialVotes', 'corrupted_officialVotes']]

Unnamed: 0,officialVotes,against_officialVotes,corrupted_officialVotes
545,1409.0,51.0,0.0
