# **Israel Elections Data Analysis**

An example notebook showing how to read and display the Israel 2021 elections results. This is the notebook for the first week.

In [2]:
# Add a check if we run in google colab or locally in jupyter notebook
run_in_colab = False
if 'google.colab' in str(get_ipython()):
    run_in_colab = True
    print('Running on CoLab')
else:
    print('Running locally on Jupyter')



Running on CoLab


In [3]:
# First install tha needed packages using conda in the current Jupyter kernel
# Run once - should take a long time!
import sys
#if not(run_in_colab):
#  !conda install --yes --prefix {sys.prefix} numpy
#  !conda install --yes --prefix {sys.prefix} pandas
#  !conda install --yes --prefix {sys.prefix} matplotlib
#  !conda install --yes --prefix {sys.prefix} statsmodels  # statistical modelling 
#  !conda install --yes --prefix {sys.prefix} xlrd  # read excel 



# Alternatively, if your python wasn't installed using anaconda, we can use pip install: 
# !{sys.executable} -m pip install numpy
# !{sys.executable} -m pip install pandas
# !{sys.executable} -m pip install matplotlib


In [4]:
# Import modules 
import numpy as np  # a module for working with numerical array 
import pandas as pd  # a module for working with data-frames
import statsmodels.api as sm  # a module for statistical modelling (e.g. regression analysis)
from matplotlib import pyplot as plt




  import pandas.util.testing as tm


First, we need to mount our local drive to the colab network

In [5]:
# Mount drive in google colab
if run_in_colab:
    from google.colab import drive
    drive.mount('/content/drive')
else:  # Set local path 
    data_path = "C:/Users/Or Zuk/Google Drive/HUJI/Teaching/Lab_52568/Data/Elections/"  


Mounted at /content/drive


In [6]:
# Loading elections 2021 elections data:
if run_in_colab:
    from google.colab import files
    uploaded = files.upload()

Saving votes per ballot 2021.csv to votes per ballot 2021.csv
Saving votes per city 2021.csv to votes per city 2021.csv


In [38]:
import io

data_type = "city" # "ballot"  # city
if   run_in_colab:
    df_2021_raw = pd.read_csv(io.BytesIO(uploaded['votes per city 2021.csv']),  encoding = 'iso-8859-8', index_col='שם ישוב')
    df_2021_raw_ballot = pd.read_csv(io.BytesIO(uploaded['votes per ballot 2021.csv']),  encoding = 'iso-8859-8', index_col='ברזל')
else:  # read local file
    if data_type == "ballot":
        df_2021_raw = pd.read_csv(data_path + 'votes per ballot 2021.csv',  encoding = 'iso-8859-8', index_col='שם ישוב')
        # preprocessing of ballot
        df_2021 = df_2021_raw.drop('סמל ועדה', axis=1) # new column added in Sep 2019
        df_2021 = df_2021[df_2021.columns[8:-1]] # removing "metadata" columns
    else:
        df_2021_raw = pd.read_csv(data_path + 'votes per city 2021.csv',  encoding = 'iso-8859-8', index_col='שם ישוב')
        # preprocessing of cities 
        df_2021 = df_2021_raw.drop('סמל ועדה', axis=1) # new column added in Sep 2019
        df_2021 = df_2021[df_2021.columns[5:-1]] # removing "metadata" columns
# Dataset is now stored in a Pandas Dataframe
print(df_2021_raw.columns)
df_2021_raw.head()

Index(['סמל ועדה', 'סמל ישוב', 'בזב', 'מצביעים', 'פסולים', 'כשרים', 'אמת', 'ב',
       'ג', 'ודעם', 'ז', 'זץ', 'ט', 'י', 'יז', 'ינ', 'יף', 'יק', 'יר', 'כ',
       'כך', 'כן', 'ל', 'מחל', 'מרצ', 'נ', 'ני', 'נר', 'עם', 'פה', 'ףז', 'צי',
       'צכ', 'צף', 'ץ', 'ק', 'קי', 'קך', 'קץ', 'ר', 'רנ', 'רף', 'רק', 'שס',
       'ת', 'Unnamed: 46'],
      dtype='object')


Unnamed: 0_level_0,סמל ועדה,סמל ישוב,בזב,מצביעים,פסולים,כשרים,אמת,ב,ג,ודעם,ז,זץ,ט,י,יז,ינ,יף,יק,יר,כ,כך,כן,ל,מחל,מרצ,נ,ני,נר,עם,פה,ףז,צי,צכ,צף,ץ,ק,קי,קך,קץ,ר,רנ,רף,רק,שס,ת,Unnamed: 46
שם ישוב,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1
אבירים,6,1220,221,151,1,150,47,0,0,3,0,0,2,0,3,0,0,0,0,0,0,16,2,5,31,0,0,0,0,27,0,0,0,0,0,0,0,0,0,5,0,0,0,1,8,
אדירים,5,113,234,180,0,180,1,7,4,0,0,0,9,0,1,0,0,0,0,0,0,3,0,84,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,61,6,
אושה,8,278,382,276,0,276,72,7,0,3,0,0,0,0,2,0,0,1,0,0,0,37,8,24,20,0,0,0,0,86,0,0,0,0,0,0,0,0,0,1,0,0,0,0,15,
אחווה,17,1157,183,145,0,145,14,4,0,0,0,0,4,0,0,0,0,0,0,0,0,36,3,27,10,0,0,0,0,37,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,
איבים,17,338,138,40,0,40,0,4,0,0,0,1,10,0,0,0,0,0,0,0,0,5,0,17,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,


In [39]:
df_2021 = df_2021_raw.drop('סמל ועדה', axis=1) # new column added in Sep 2019
df_2021 = df_2021[df_2021.columns[5:-1]] # removing "metadata" columns
print("{} votes in March 2021".format(df_2021_raw['כשרים']['מעטפות חיצוניות']) + ' מעטפות חיצוניות ')

df_2021_ballot = df_2021_raw_ballot.drop('סמל ועדה', axis=1) # new column added in Sep 2019
df_2021_ballot = df_2021_ballot[df_2021_ballot.columns[9:-1]] # removing "metadata" columns


421619 votes in March 2021 מעטפות חיצוניות 


Question 1:

In [40]:
#Function that simolation the votes:

def simol_votes(n_tilda, v):
  binom_random = np.random.binomial(n=n_tilda, p=v)
  df_sim = pd.DataFrame(binom_random)
  return df_sim



Question 2:

In [None]:
import random

lst_prob = [0.2,0.25,0.3,0.35,0.4,0.45,0.5,0.55,0.6,0.65,0.7,0.75,0.8]
votes = parties_votes_percents(df_2021_ballot, 0.0325)  # total votes for each party
above_block_df = df_2021_raw_ballot[votes.index.values]
NUMBER_OF_KALPI = 12926


def calculate_v1(df):
  v = df['כשרים'] / df['בזב']
  v[v > 1] = 1 # if the number is greather then 1 
  tran_v = np.transpose(np.asarray([v]))
  v_1 = np.tile(tran_v, (1,13))
  return v_1

def calculate_v2():
  beta = np.tile(lst_prob, (NUMBER_OF_KALPI, 1))
  return beta

def calculate_v3():
  v_3 = []
  for i in range(NUMBER_OF_KALPI):
    beta = np.random.choice(lst_prob, size = 13)
    v_3.append(beta)
  return np.asarray(v_3)

def calculate_n_tilda(df, v):
  q = df.div(v, axis=0)
  n_tilda  = np.round(q)
  return n_tilda


# print(calculate_v3())
v = df_2021_raw_ballot['כשרים'] / df_2021_raw_ballot['בזב']
v[v > 1] = 1 # if the number is greather then 1 
n_tilda = calculate_n_tilda(df_2021_ballot, v)
n_tilda = n_tilda[votes.index.values]

all_q1 = []
all_q2 = []
all_q3 = []
for i in range(50):  
  simol1 = simol_votes(n_tilda, calculate_v1(df_2021_raw_ballot))
  simol2 = simol_votes(n_tilda, calculate_v2())
  simol3 = simol_votes(n_tilda, calculate_v3())
  col_sum = np.sum(simol1, axis=0)
  q = col_sum / np.sum(col_sum)
  all_q1.append(np.asarray(q))
  col_sum = np.sum(simol2, axis=0)
  q = col_sum / np.sum(col_sum)
  all_q2.append(np.asarray(q))
  col_sum = np.sum(simol3, axis=0)
  q = col_sum / np.sum(col_sum)
  all_q3.append(np.asarray(q))

all_q1 = np.asarray(all_q1)
all_q2 = np.asarray(all_q2)
all_q3 = np.asarray(all_q3)

s = np.sum(n_tilda)
s = s / np.sum(s)

s_raw = np.sum(above_block_df)
s_raw = s_raw / np.sum(s_raw)


mean1 = np.mean(all_q1, axis=0)
mean2 = np.mean(all_q2, axis=0)
mean3 = np.mean(all_q3, axis=0)

var1 = np.round(np.var(all_q1, axis=0), decimals=10)
var2 = np.round(np.var(all_q2, axis=0), decimals=10)
var3 = np.round(np.var(all_q3, axis=0), decimals=10)

MSE = np.square(np.subtract(all_q1,s_raw)).mean()
MSE = np.square(np.subtract(all_q2,s_raw)).mean()
MSE = np.square(np.subtract(all_q1,s_raw)).mean()

bias = mean1 - q1
bias = mean2 - q2
bias = mean3 - q3


print("=======================================================")
print("                 Tabel of mean estimateor              ")
print("=======================================================")

data_top = above_block_df.columns
principal_lst_mean = []
for i in range(len(data_top)):
  lst = []
  lst.append(data_top[i])
  lst.append(mean1[i])
  lst.append(mean2[i])
  lst.append(mean3[i])
  principal_lst_mean.append(lst)

print(tabulate(principal_lst, headers=['simol1', 'simol2', 'simol3', 'Miflaga'], tablefmt='orgtbl'))

print("=======================================================")
print("                 Tabel of variance estimateor              ")
print("=======================================================")

principal_lst_var = []
for i in range(len(data_top)):
  lst = []
  lst.append(data_top[i])
  lst.append(var1[i])
  lst.append(var2[i])
  lst.append(var3[i])
  principal_lst_var.append(lst)

print(tabulate(principal_lst, headers=['simol1', 'simol2', 'simol3', 'Miflaga'], tablefmt='orgtbl'))


fig, ax = plt.subplots(figsize = (20,4))

width =0.22
ax.bar(np.arange(len(mean1)), mean1, yerr=var1, width=width, label="simulation 1")
ax.errorbar(np.arange(len(mean1)), mean1, yerr=var1, fmt='o', color='Black', elinewidth=3,capthick=3,errorevery=1, alpha=1, ms=4, capsize = 5)
ax.bar(np.arange(len(s))+ width, s,  width=width, label="data after adding votes")
ax.bar(np.arange(len(s_raw))+ (2*width), s_raw, width=width, label="raw data")
ax.set_xticks(np.arange(len(votes.index.values)))
ax.set_xticklabels(votes.index.values)
plt.legend()
ax.set_xlabel("party name")
ax.set_ylabel("voting precent")
plt.title("graph of 3 different estimators to the election result")
plt.show()


fig, ax = plt.subplots(figsize = (20,4))
ax.bar(np.arange(len(mean2)), mean2, yerr=var2, width=width, label="simulation 2")
ax.errorbar(np.arange(len(mean2)), mean2, yerr=var2, fmt='o', color='Black', elinewidth=3,capthick=3,errorevery=1, alpha=1, ms=4, capsize = 5)
ax.bar(np.arange(len(s))+ width, s,  width=width, label="data after adding votes")
ax.bar(np.arange(len(s_raw))+ (2*width), s_raw, width=width, label="raw data")
ax.set_xticks(np.arange(len(votes.index.values)))
ax.set_xticklabels(votes.index.values)
plt.legend()
ax.set_xlabel("party name")
ax.set_ylabel("voting precent")
plt.title("graph of 3 different estimators to the election result")
plt.show()


fig, ax = plt.subplots(figsize = (20,4))
ax.bar(np.arange(len(mean3)), mean3, yerr=var3, width=width, label="simulation 3")
ax.errorbar(np.arange(len(mean3)), mean3, yerr=var3, fmt='o', color='Black', elinewidth=3,capthick=3,errorevery=1, alpha=1, ms=4, capsize = 5)
ax.bar(np.arange(len(s))+ width, s,  width=width, label="data after adding votes")
ax.bar(np.arange(len(s_raw))+ (2*width), s_raw, width=width, label="raw data")
ax.set_xticks(np.arange(len(votes.index.values)))
ax.set_xticklabels(votes.index.values)
plt.legend()
ax.set_xlabel("party name")
ax.set_ylabel("voting precent")
plt.title("graph of 3 different estimators to the election result")
plt.show()


In [None]:
v = df_2021_raw_ballot['כשרים'] / df_2021_raw_ballot['בזב']
v[v > 1] = 1 # if the number is greather then 1 
lst = [0.2,0.25,0.3,0.35,0.4,0.45,0.5,0.55,0.6,0.65,0.7,0.75,0.8]
votes = parties_votes_percents(df_2021_ballot, 0.0325)  # total votes for each party
above_block_df = df_2021_raw_ballot[votes.index.values]
q = above_block_df.div(v, axis=1) # (votes in kalpi / precent kosher)

#calculate n-tilda

q = df_2021_ballot.div(v, axis=0)
n_tilde  = np.round(q)

#p = np.tile(np.transpose(np.asarray([v])),(1,13))
print(p)

for i in range(50):
    simol_a = simol_votes(n_tilde, alpha)
    simol_b = simol_votes(n_tilde, p)


[[0.67873303 0.67873303 0.67873303 ... 0.67873303 0.67873303 0.67873303]
 [0.76923077 0.76923077 0.76923077 ... 0.76923077 0.76923077 0.76923077]
 [0.72251309 0.72251309 0.72251309 ... 0.72251309 0.72251309 0.72251309]
 ...
 [1.         1.         1.         ... 1.         1.         1.        ]
 [1.         1.         1.         ... 1.         1.         1.        ]
 [1.         1.         1.         ... 1.         1.         1.        ]]
