In [1]:
# Importing the needed the libraries
library(MatchIt)
library(Zelig)
library(rbounds)
library ("Matching")

Loading required package: survival

Loading required package: Matching

Loading required package: MASS

## 
##  Matching (Version 4.9-7, Build Date: 2020-02-05)
##  See http://sekhon.berkeley.edu/matching for additional documentation.
##  Please cite software as:
##   Jasjeet S. Sekhon. 2011. ``Multivariate and Propensity Score Matching
##   Software with Automated Balance Optimization: The Matching package for R.''
##   Journal of Statistical Software, 42(7): 1-52. 
##




In [0]:
#Data preprocessing
####################################################
# File name: "recoding_script.R"
# Goal: Recode data for matching analysis
# Dependency: "salta_data.Rdata"
# Output: "datamatch.Rdata"
#################################################### 
#Note: the file "salta_data.RData" should be in the same directory of the code. "salta_data.RData" data file obtains the orginal dataset before any modification
load ("salta_data.RData")
attach(salta.data)

#___________________________________________________#
# Polling place 
#___________________________________________________#

polling.place <- escuela

#___________________________________________________#
# Voting System
#___________________________________________________#

# electronic (VE) or traditional (VT) voting?

system <- sistema

EV <- NULL
EV[system == "VE"] <- 1
EV[system == "VT"] <- 0

#___________________________________________________# 
# Recode outcome variables 
#___________________________________________________#

# poll workers qualified enough?

#prop.table(table(capaci_autoridades))

capable.auth <- NULL
capable.auth[capaci_autoridades == "Nada Capacitadas"] <- 0
capable.auth[capaci_autoridades == "Poco Capacitadas"] <- 0
capable.auth[capaci_autoridades == "Bastante Capacitadas"] <- 1
capable.auth[capaci_autoridades == "Muy Capacitadas"] <- 1

# quality of voting experience?

#prop.table(table(calif_votac))

eval.voting <- NULL
eval.voting[calif_votac == "Muy Malo"] <- 0
eval.voting[calif_votac == "Malo"] <- 0
eval.voting[calif_votac == "Bueno"] <- 0
eval.voting[calif_votac == "Muy Bueno"] <- 1

# difficulty of voting experience?

#prop.table(table(facil))

easy.voting <- NULL
easy.voting[as.numeric(facil) == 2] <- 1
easy.voting[as.numeric(facil) == 3] <- 0
easy.voting[as.numeric(facil) == 4] <- 0
easy.voting[as.numeric(facil) == 5] <- 0

# how sure vote counted?

#prop.table(table(cuàn_seguro))

sure.counted <- NULL
sure.counted[as.numeric(cuàn_seguro) == 2] <- 1
sure.counted[as.numeric(cuàn_seguro) == 3] <- 1
sure.counted[as.numeric(cuàn_seguro) == 4] <- 0
sure.counted[as.numeric(cuàn_seguro) == 5] <- 0

# how confident vote secret?

#prop.table(table(cuàn_confiado))

conf.secret <- NULL
conf.secret[as.numeric(cuàn_confiado) == 2] <- 1
conf.secret[as.numeric(cuàn_confiado) == 3] <- 1
conf.secret[as.numeric(cuàn_confiado) == 4] <- 0
conf.secret[as.numeric(cuàn_confiado) == 5] <- 0

# believe provincial elections are clean?

#prop.table(table(elecc_limpias))

how.clean <- NULL
how.clean[as.numeric(elecc_limpias) == 2] <- 1
how.clean[as.numeric(elecc_limpias) == 3] <- 1
how.clean[as.numeric(elecc_limpias) == 4] <- 0
how.clean[as.numeric(elecc_limpias) == 5] <- 0

# how quick was process?

#prop.table(table(rapidez_proceso))

speed <- NULL
speed[as.numeric(rapidez_proceso) == 2] <- 1
speed[as.numeric(rapidez_proceso) == 3] <- 1
speed[as.numeric(rapidez_proceso) == 4] <- 0
speed[as.numeric(rapidez_proceso) == 5] <- 0

# agree replacing VT by VE?

#prop.table(table(reemplazoVTxVE))

agree.evoting <- NULL
agree.evoting[as.numeric(reemplazoVTxVE) == 2] <- 1
agree.evoting[as.numeric(reemplazoVTxVE) == 3] <- 1
agree.evoting[as.numeric(reemplazoVTxVE) == 4] <- 0
agree.evoting[as.numeric(reemplazoVTxVE) == 5] <- 0

# select candidates electronically?

#prop.table(table(sist_voto_categ))

eselect.cand <- NULL
eselect.cand[as.numeric(sist_voto_categ) == 2] <- 0
eselect.cand[as.numeric(sist_voto_categ) == 3] <- 1

#___________________________________________________#
# Recode covariates 
#___________________________________________________#

age <- edad
age.group <- NULL
age.group[age < 30] <- 1
age.group[age > 29 & age < 40] <- 2
age.group[age > 39 & age < 50] <- 3
age.group[age > 49 & age < 65] <- 4
age.group[age > 64] <- 5

male <- NULL
male[sexo == "MASCULINO"] <- 1
male[sexo == "FEMENINO"] <- 0

educ <- NULL
educ[educ_enc == "Sin Estudios" | educ_enc == "Primario Incompleto"] <- 1
educ[educ_enc == "Primario Completo"] <- 2
educ[educ_enc == "Secundario Incompleto"] <- 3
educ[educ_enc == "Secundario Completo"] <- 4
educ[educ_enc == "Terciario Incompleto"] <- 5
educ[educ_enc == "Terciario Completo"] <- 6
educ[educ_enc == "Universitario Incompleto"] <- 7
educ[educ_enc == "Universitario Completo" | educ_enc == "Posgrado"] <- 9

white.collar <- NULL
white.collar[ocupac == "EMPLEADO PUBLICO"|ocupac == "COMERCIANTE SIN EMPLEADOS" | ocupac == "EMPLEADO SECTOR PRIVADO" | ocupac == "PROF/COMERCIANTE EMPLEADOS A CARGO"] <- 1
white.collar[ocupac != "EMPLEADO PUBLICO"&ocupac != "COMERCIANTE SIN EMPLEADOS"&ocupac != "EMPLEADO SECTOR PRIVADO"&ocupac != "PROF/COMERCIANTE EMPLEADOS A CARGO"] <- 0

not.full.time <- NULL
not.full.time[ocupac == "ESTUDIANTE" | ocupac == "AMA DE CASA" | ocupac == "DESOCUPADO" | ocupac == "SUBSIDIADO/PLANES/ASIGNACIONES" | ocupac == "TRABAJOS TEMPORARIOS" | ocupac == "EMPLEADO INFORMAL" | ocupac == "JUBILADO/PENSIONADO" | ocupac == "RENTISTA"] <- 1
not.full.time[ocupac == "ESTUDIANTE" | ocupac != "AMA DE CASA" & ocupac != "DESOCUPADO" & ocupac != "SUBSIDIADO/PLANES/ASIGNACIONES" & ocupac != "TRABAJOS TEMPORARIOS" & ocupac != "EMPLEADO INFORMAL" & ocupac != "JUBILADO/PENSIONADO" & ocupac != "RENTISTA"] <- 0

internet.work <- NULL
internet.work[internet_trabajar == "No"] <- 0
internet.work[internet_trabajar == "Si"] <- 1

internet.play <- NULL
internet.play[internet_jugar == "No"] <- 0
internet.play[internet_jugar == "Si"] <- 1

atm <- NULL
atm[cajeros == "No"] <- 0
atm[cajeros == "Si"] <- 1

cell <- NULL
cell[celular == "No"] <- 0
cell[celular == "Si"] <- 1

pc.own <- NULL
pc.own[PC_propia == "No"] <- 0
pc.own[PC_propia == "Si"] <- 1

tech <- internet.work+internet.play+atm+cell+pc.own+1

# table(randazzo)
# table(figueroa)
# table(alperovich)

info1 <- NULL
info1[as.numeric(randazzo) == 1 | as.numeric(randazzo) == 2 | as.numeric(randazzo) == 3] <- 1
info1[as.numeric(randazzo) == 4 | as.numeric(randazzo) == 5] <- 0

info2 <- NULL
info2[as.numeric(figueroa) == 1 | as.numeric(figueroa) == 2 | as.numeric(figueroa) == 3] <- 1
info2[as.numeric(figueroa) == 4 | as.numeric(figueroa) == 5] <- 0


info3 <- NULL
info3[as.numeric(alperovich) == 1 | as.numeric(alperovich) == 2 | as.numeric(alperovich) == 3] <- 1
info3[as.numeric(alperovich) == 4 | as.numeric(alperovich) == 5] <- 0

pol.info <- 1 + info1 + info2 + info3

#table(pol.info)

#___________________________________________________#
# Create and save dataframe for matching analysis 
#___________________________________________________#

datamatch <- data.frame(polling.place, EV, age.group, educ, male, tech, pol.info, white.collar, not.full.time, capable.auth, eval.voting, easy.voting, sure.counted, conf.secret, how.clean, speed, agree.evoting, eselect.cand)

In [3]:
########################################################################
# File name: "matching_script.R"
# Goal: Estimate effect of e-voting using matching
# Dependency: "datamatch.Rdata" 
########################################################################

attach (datamatch)

#Defining the outcome
outcomes <- datamatch[10:18]

#Defining the column names
outcomes.lbls <- names(outcomes)

#Finding the outcomes dimentions
n.outcomes <- dim(outcomes)[2]


# Drop observations with missing values in covariates
datamatch[, 10:18][is.na(datamatch[, 10:18]) == "TRUE"] <- 99999
datamatch <- na.omit(datamatch)

#__________________________ Replicating Table 2, pre-matching section __________________________#

EV <- datamatch[2]

covariates <- datamatch[c("age.group", "educ", "white.collar", "not.full.time", "male", "tech", "pol.info")]
covariate.lbls <- names(covariates)

n.covariates <- dim(covariates)[2]

tab2.pre <- matrix(NA, nrow = n.covariates, ncol = 4)
rownames(tab2.pre) <- covariate.lbls
colnames(tab2.pre) <- c("ev", "tv", "diff", "pvalue")

tab2.pre[, 1:2] <- cbind(apply(covariates[EV == 1,], 2, mean), apply(covariates[EV == 0,], 2, mean))
tab2.pre[, 3] <- tab2.pre[, 1] - tab2.pre[, 2]

for (i in c(1, 2, 6, 7)){
  tab2.pre[i, 4] <- ks.boot(covariates[, i][EV == 1], covariates[, i][EV == 0], nboots = 500)$ks.boot.pvalue
}
for (i in c(3, 4, 5)){
  tab2.pre[i, 4] <- prop.test(table(covariates[, i], EV$EV), n = apply(table(covariates[,i],EV$EV),2, sum))$p.value
}

#__________________________ Replicating Table 3, pre-matching section__________________________#

datamatch[datamatch == 99999] <- NA

outcomes.pre <- datamatch[10:18]

tab3.pre <- matrix(NA,nrow = n.outcomes,ncol = 5)
rownames(tab3.pre) <- outcomes.lbls
colnames(tab3.pre) <- c("N", "prop.ev", "prop.tv", "diff", "pvalue")

for (i in 1:n.outcomes) {
  tab3.pre[i, 1] <- length(na.omit(outcomes.pre[, i]))
  tab3.pre[i, 2:3] <- rev(prop.table(table(outcomes.pre[,i],datamatch$EV),2)[2,])*100
  tab3.pre[i, 4] <- tab3.pre[i, 2] - tab3.pre[i, 3]	
  tab3.pre[i, 5] <- prop.test(table(outcomes.pre[, i], datamatch$EV)[2, ], n = apply(table(outcomes.pre[, i], datamatch$EV), 2, sum))$p.value
}

datamatch[, 10:18][is.na(datamatch[, 10:18]) == "TRUE"] <- 99999

#__________________________ Matching (with MatchIt as impelemented in the original paper) ________________________#

print("Matching")

set.seed(36466)

m.out <- matchit(EV ~ age.group + I(age.group^2) + I(age.group^3) + age.group:educ + age.group:tech + educ + I(educ^2) +
                 tech + I(tech^2) + pol.info + educ:pol.info + age.group:pol.info + tech:pol.info + white.collar + not.full.time + male,
                  caliper = 0.05, data = datamatch, method = "nearest", verbose = "TRUE")


#___________________________________________________________________________#

# matched sample

datamatched <- match.data(m.out)
datamatched[datamatched == 99999] <- NA

#__________________________ Replicating Table 2, post-matching section _________________________#

EV.post <- datamatched[2]

covariates.post <- datamatched[, covariate.lbls]

tab2.post <- matrix(NA, nrow = n.covariates, ncol = 4)
rownames(tab2.post) <- covariate.lbls
colnames(tab2.post) <- c("ev", "tv", "diff", "pvalue")

tab2.post[, 1:2] <- cbind(apply(covariates.post[EV.post == 1, ], 2, mean), apply(covariates.post[EV.post == 0,], 2, mean))
tab2.post[, 3] <- tab2.post[, 1] - tab2.post[, 2]
for (i in c(1, 2, 6 , 7)){
  tab2.post[i, 4]<-ks.boot(covariates.post[,i][EV.post==1],covariates.post[,i][EV.post==0], nboots = 500)$ks.boot.pvalue
}
for (i in c(3, 4, 5)){
  tab2.post[i, 4] <- prop.test(table(covariates.post[, i], EV.post$EV), n = apply(table(covariates.post[, i], EV.post$EV),2 , sum))$p.value
}

tab2 <- cbind(tab2.pre, tab2.post)
tab2[3:5, c(1:3, 5:7)] <- tab2[3:5, c(1:3, 5:7)] * 100

### Table 2 ###
print ("#__________________________ Replicating Table 2_____________________________#")
print(tab2, digits = 4)

#__________________________ Replicating Table 3, post-matching section _________________________#

outcomes.post <- datamatched[10:18]

tab3.post <- matrix(NA, nrow = n.outcomes, ncol = 5)
rownames(tab3.post) <- outcomes.lbls
colnames(tab3.post) <- c("N", "prop.ev", "prop.tv", "diff", "pvalue")

for (i in 1:n.outcomes) {
  tab3.post[i, 1] <- length(na.omit(outcomes.post[, i]))
  tab3.post[i, 2:3] <- rev(prop.table(table(outcomes.post[, i], datamatched$EV), 2)[2, ]) * 100
  tab3.post[i, 4] <- tab3.post[i, 2] - tab3.post[i, 3]	
  tab3.post[i, 5] <- prop.test(table(outcomes.post[, i], datamatched$EV)[2, ], n = apply(table(outcomes.post[, i], datamatched$EV), 2, sum))$p.value
}

tab3 <- cbind(tab3.pre, tab3.post)

tab3 <- tab3[rev(order(tab3[, 9])), ]

### Table 3 ###
print ("#__________________________ Replicating Table 3_______________________________#")
print(tab3, digits = 4)


The following objects are masked _by_ .GlobalEnv:

    age.group, agree.evoting, capable.auth, conf.secret, easy.voting,
    educ, eselect.cand, EV, eval.voting, how.clean, male,
    not.full.time, pol.info, polling.place, speed, sure.counted, tech,
    white.collar




[1] "Matching"
Nearest neighbor matching... 


“Fewer control than treated units and matching without replacement.  Not all treated units will receive a match.  Treated units will be matched in the order specified by m.order: largest”


Matching Treated: 10%...20%...30%...40%...50%...60%...70%...80%...90%...100%...Done
[1] "#__________________________ Replicating Table 2_____________________________#"
                  ev     tv    diff  pvalue     ev     tv      diff pvalue
age.group      2.476  2.443  0.0324 0.56400  2.455  2.457 -0.001718 0.9980
educ           4.771  4.143  0.6285 0.00000  4.225  4.196  0.029210 0.4900
white.collar  30.254 27.586  2.6678 0.29288 29.381 27.320  2.061856 0.4744
not.full.time 27.714 33.498 -5.7839 0.01998 30.584 32.131 -1.546392 0.6133
male          49.654 49.097  0.5567 0.87472 48.969 49.828 -0.859107 0.8146
tech           4.184  3.910  0.2739 0.00000  4.012  3.931  0.080756 0.3140
pol.info       1.475  1.310  0.1643 0.00000  1.361  1.325  0.036082 0.6300
[1] "#__________________________ Replicating Table 3_______________________________#"
                 N prop.ev prop.tv   diff    pvalue    N prop.ev prop.tv   diff
eselect.cand  1388   83.84   53.42 30.428 1.237e-34 1102   81.69  

In [4]:
#__________________________ EXTENSION: GENETIC MATCHING _________________________#

#Define the treatment which is E-voting
Tr <- datamatch$EV

#Adding the covariants, we are matching on in one vector. 
#These are same covariants used in the original match, including the interactions between the variables.
X <- cbind(datamatch$age.group, I(datamatch$age.group^2), I(datamatch$age.group^3), I(datamatch$age.group:datamatch$educ), I(datamatch$age.group:datamatch$tech), datamatch$educ,
           I(datamatch$educ^2), datamatch$tech, I(datamatch$tech^2), datamatch$pol.info, I (datamatch$educ:datamatch$pol.info), I(datamatch$age.group:datamatch$pol.info),
            I(datamatch$tech:datamatch$pol.info), datamatch$white.collar, datamatch$not.full.time, datamatch$male)

#Run GENETIC MATCHING
genout <- GenMatch(Tr = Tr, X = X,
                   pop.size = 3000, wait.generations=40, max.generations=60,
                   ties = FALSE, print.level=0)

#Match based on the results of the GenMatch Weight.matrix=
match.output <- Match(X = X, Tr = Tr, Weight.matrix=genout, ties=FALSE)

#Create dataframe for the matched Data
mout <- rbind(datamatch[  match.output$index.treated,],datamatch[  match.output$index.control,])

#Check the Match Balance for the covariants without the interactions
print ("#__________________________ MatchBalance of  GENETIC MATCHING_______________________________#")
MatchBalance(EV ~ age.group + educ + tech + pol.info + white.collar + not.full.time + male, data=datamatch, match.out = match.output , nboots=500)

“numerical expression has 1475 elements: only the first used”
“numerical expression has 1475 elements: only the first used”
“numerical expression has 1475 elements: only the first used”
“numerical expression has 1475 elements: only the first used”
“numerical expression has 1475 elements: only the first used”
“numerical expression has 1475 elements: only the first used”
“numerical expression has 1475 elements: only the first used”
“numerical expression has 1475 elements: only the first used”
“numerical expression has 1475 elements: only the first used”
“numerical expression has 1475 elements: only the first used”
“number of rows of result is not a multiple of vector length (arg 5)”
Loading required namespace: rgenoud



[1] "#__________________________ MatchBalance of  GENETIC MATCHING_______________________________#"

***** (V1) age.group *****
                       Before Matching 	 	 After Matching
mean treatment........     2.4758 	 	     2.4758 
mean control..........     2.4433 	 	     2.4619 
std mean diff.........     2.4035 	 	     1.0279 

mean raw eQQ diff.....   0.065681 	 	   0.013857 
med  raw eQQ diff.....          0 	 	          0 
max  raw eQQ diff.....          1 	 	          1 

mean eCDF diff........   0.013296 	 	  0.0027714 
med  eCDF diff........   0.017038 	 	  0.0023095 
max  eCDF diff........   0.026754 	 	  0.0069284 

var ratio (Tr/Co).....      1.048 	 	     1.0043 
T-test p-value........    0.64508 	 	    0.21083 
KS Bootstrap p-value..      0.556 	 	      0.982 
KS Naive p-value......    0.96005 	 	          1 
KS Statistic..........   0.026754 	 	  0.0069284 


***** (V2) educ *****
                       Before Matching 	 	 After Matching
mean treatment........     4.

In [5]:
#__________________________ Table 2 After Using GenMatch_________________________#

#Renaming the matched data and removing empty values
gen_datamatched <-mout
gen_datamatched[gen_datamatched == 99999] <- NA

#Defining the treatment
EV.post <- gen_datamatched[2]

#Defining the covariates
covariates.post <- gen_datamatched[, covariate.lbls]

#Creating the table
tab2.post.gen <- matrix(NA, nrow = n.covariates, ncol = 4)
rownames(tab2.post.gen) <- covariate.lbls
colnames(tab2.post.gen) <- c("ev", "tv", "diff", "pvalue")

tab2.post.gen[, 1:2] <- cbind(apply(covariates.post[EV.post == 1, ], 2, mean), apply(covariates.post[EV.post == 0,], 2, mean))
tab2.post.gen[, 3] <- tab2.post.gen[, 1] - tab2.post.gen[, 2]
for (i in c(1, 2, 6 , 7)){
  tab2.post.gen[i, 4]<-ks.boot(covariates.post[,i][EV.post==1],covariates.post[,i][EV.post==0], nboots = 500)$ks.boot.pvalue
}
for (i in c(3, 4, 5)){
  tab2.post.gen[i, 4] <- prop.test(table(covariates.post[, i], EV.post$EV), n = apply(table(covariates.post[, i], EV.post$EV),2 , sum))$p.value
}

tab2.gen <- cbind(tab2.pre, tab2.post.gen)
tab2.gen[3:5, c(1:3, 5:7)] <- tab2.gen[3:5, c(1:3, 5:7)] * 100

### Table 2 GenMatch###
print ("#__________________________ Table 2 After Using GenMatch _______________________________#")
print(tab2.gen, digits = 4)

[1] "#__________________________ Table 2 After Using GenMatch _______________________________#"
                  ev     tv    diff  pvalue     ev     tv      diff pvalue
age.group      2.476  2.443  0.0324 0.56400  2.476  2.462  0.013857 0.9940
educ           4.771  4.143  0.6285 0.00000  4.771  4.751  0.020785 0.5240
white.collar  30.254 27.586  2.6678 0.29288 30.254 31.409 -1.154734 0.6396
not.full.time 27.714 33.498 -5.7839 0.01998 27.714 28.176 -0.461894 0.8724
male          49.654 49.097  0.5567 0.87472 49.654 51.732 -2.078522 0.4139
tech           4.184  3.910  0.2739 0.00000  4.184  4.191 -0.006928 0.9380
pol.info       1.475  1.310  0.1643 0.00000  1.475  1.461  0.013857 0.8820


In [6]:
#__________________________ Table 3 After Using GenMatch _________________________#

#Defining the outcome vars
outcomes.post <- gen_datamatched[10:18]

tab3.post.gen <- matrix(NA, nrow = n.outcomes, ncol = 5)
rownames(tab3.post.gen) <- outcomes.lbls
colnames(tab3.post.gen) <- c("N", "prop.ev", "prop.tv", "diff", "pvalue")

for (i in 1:n.outcomes) {
  tab3.post.gen[i, 1] <- length(na.omit(outcomes.post[, i]))
  tab3.post.gen[i, 2:3] <- rev(prop.table(table(outcomes.post[, i], gen_datamatched$EV), 2)[2, ]) * 100
  tab3.post.gen[i, 4] <- tab3.post.gen[i, 2] - tab3.post.gen[i, 3]	
  tab3.post.gen[i, 5] <- prop.test(table(outcomes.post[, i], gen_datamatched$EV)[2, ], n = apply(table(outcomes.post[, i], gen_datamatched$EV), 2, sum))$p.value
}

tab3.gen <- cbind(tab3.pre, tab3.post.gen)

tab3.gen <- tab3.gen[rev(order(tab3.gen[, 9])), ]

### Table 3  After Using GenMatch###
print ("#__________________________ Table 3 After Using GenMatch _______________________________#")
print(tab3.gen, digits = 4)


[1] "#__________________________ Table 3 After Using GenMatch _______________________________#"
                 N prop.ev prop.tv   diff    pvalue    N prop.ev prop.tv
eselect.cand  1388   83.84   53.42 30.428 1.237e-34 1631   83.84   57.49
eval.voting   1460   46.33   21.30 25.035 1.833e-22 1713   46.33   22.83
easy.voting   1469   33.64   11.53 22.111 5.420e-22 1726   33.64   10.30
agree.evoting 1409   84.14   62.44 21.705 2.864e-20 1657   84.14   68.47
how.clean     1284   57.97   40.99 16.980 2.561e-09 1512   57.97   45.60
sure.counted  1418   86.35   77.02  9.332 7.444e-06 1660   86.35   76.85
capable.auth  1416   85.14   76.25  8.889 2.954e-05 1670   85.14   76.29
speed         1443   84.06   80.85  3.209 1.298e-01 1708   84.06   78.13
conf.secret   1431   77.11   84.53 -7.417 6.506e-04 1686   77.11   87.38
                 diff    pvalue
eselect.cand   26.349 2.735e-31
eval.voting    23.499 2.675e-24
easy.voting    23.342 2.172e-31
agree.evoting  15.669 1.009e-13
how.clean     