In [1]:
import pandas as pd
import numpy as np

In [3]:
data_string = "broockman_kalla_replication_data.dta"
df = pd.read_stata(data_string)

In [9]:
# bysort hh_id: egen scale_t0_hh_avg = mean(scale_for_blocking_t0)

df[["hh_id","id","scale_for_blocking_t0"]]

Unnamed: 0,hh_id,id,scale_for_blocking_t0
0,,60291,
1,,52944,
2,,27568,
3,,41146,
4,,17510,
...,...,...,...
68373,june6_9943,68102,1.515273
68374,june6_9971,42009,0.626062
68375,june6_9971,42061,0.369057
68376,june6_9976,25963,0.620112


In [None]:
### Estimate the treatment effect for ALL canvassers
t1.all <- est.ate(data$trans.tolerance.dv.t1)
t2.all <- est.ate(data$trans.tolerance.dv.t2)
t3.all <- est.ate(data$trans.tolerance.dv.t3)
t4.all <- est.ate(data$trans.tolerance.dv.t4)

### Estimate the treatment effect by trans and cis canvassers
###At 3 day survey
t1.trans <- est.ate(data$trans.tolerance.dv.t1,
                    data$canvasser_trans == 1)
t1.cis <- est.ate(data$trans.tolerance.dv.t1,
                  data$canvasser_trans == 0)

###At 3 week survey
t2.trans <- est.ate(data$trans.tolerance.dv.t2,
                    data$canvasser_trans == 1)
t2.cis <- est.ate(data$trans.tolerance.dv.t2,
                  data$canvasser_trans == 0)

###At 6 week survey
t3.trans <- est.ate(data$trans.tolerance.dv.t3,
                    data$canvasser_trans == 1)
t3.cis <- est.ate(data$trans.tolerance.dv.t3,
                  data$canvasser_trans == 0)

###At 3 month survey
t4.trans <- est.ate(data$trans.tolerance.dv.t4,
                    data$canvasser_trans == 1)
t4.cis <- est.ate(data$trans.tolerance.dv.t4,
                  data$canvasser_trans == 0)


### Make DF of summary stats
summary.stats.df <- as.data.frame(rbind(t1.all, t2.all,
                          t3.all, t4.all,
                          t1.trans, t1.cis,
                          t2.trans, t2.cis,
                          t3.trans, t3.cis,
                          t4.trans, t4.cis),
                          stringsAsFactors = FALSE)

### Change from strings back to numeric, and remove t- and p-values, which are not used.
summary.stats.df <- summary.stats.df[,1:2]
summary.stats.df[,1] <- as.numeric(summary.stats.df[,1])
summary.stats.df[,2] <- as.numeric(summary.stats.df[,2])

### Better variable names
names(summary.stats.df) <- c("point.estimate", "se")

### Map row names of summary.stats.df into days
unique.days <- c(3, 3*7, 6*7, 12*7)
summary.stats.df$days <- unique.days[as.numeric(substr(row.names(summary.stats.df), 2, 2))]

### Read canvasser group from row names
canvasser.label.map <- list(all = "All",
                            tra = "Transgender/Gender\nNon-Conforming Only",
                            cis = "Non-Transgender Only")
summary.stats.df$Canvasser <- factor(as.character(
  canvasser.label.map[substr(row.names(summary.stats.df), 4, 6)]
  ))

### X position of different canvasser groups
summary.stats.df$xpos <- with(summary.stats.df, days + as.numeric(Canvasser) -
                                mean(as.numeric(Canvasser)))

### Point estimate Y
summary.stats.df$point.estimate.y <- summary.stats.df$point.estimate
  ### Fix text overlap of point estimate labels.
  summary.stats.df$point.estimate.y[9] <- summary.stats.df$point.estimate.y[9] + .003
  summary.stats.df$point.estimate.y[3] <- summary.stats.df$point.estimate.y[3] - .003

### Compute CIs
summary.stats.df$se.high <- summary.stats.df$point.estimate + summary.stats.df$se
summary.stats.df$se.low <- summary.stats.df$point.estimate - summary.stats.df$se
summary.stats.df$ci.high <- summary.stats.df$point.estimate + summary.stats.df$se * 1.96
summary.stats.df$ci.low <- summary.stats.df$point.estimate - summary.stats.df$se * 1.96

summary.stats.df$point.estimate.label <- paste0(round(summary.stats.df$point.estimate,
                                                      2)," SDs")

g <- ggplot(summary.stats.df,
            aes(x=xpos, y=point.estimate,
                group=Canvasser, color=Canvasser)) +
  theme_classic() +
  ### CIs
  geom_linerange(aes(ymin=se.low, ymax=se.high), lwd=1) +
  geom_linerange(aes(ymin=ci.low, ymax=ci.high)) +
  ### Point estimate points
  geom_point(color="black") +
  ### Point estimate markers
  annotate("text", label=summary.stats.df$point.estimate.label,
           x = summary.stats.df$xpos + 6.3,
           y = summary.stats.df$point.estimate.y,
           size = 3) +
  ### Canvassing treatment line / label
  geom_vline(xintercept = 0, linetype = "dashed") +
  annotate("text", label = "Canvassing Treatment",
           x = -1.5, y = .3, size = 3.5, angle = 90) +
  ### Day labels
  annotate("text",
           label = c("+3 Days", "+3 Weeks", "+6 Weeks", "+3 Months"),
           x = unique.days, y = -.04,
           colour = "black", size = 3) +
  ### Y axis
  ylab("Effect on Transgender Tolerance Scale, in Standard Deviations") + 
  scale_y_continuous() +
  ### X axis
  xlab("Days After Canvassing Treatment") +
  geom_hline(yintercept = 0) +
  ### Overall Title and Legend
  ggtitle("Differences Between Treatment and Placebo") +
  guides(fill=FALSE) +
  theme(legend.position = "bottom")