In [1]:
# Clear the R environment and set working directory
rm(list = ls())
setwd("/Users/jan/Dropbox/UP_EPQM/2222/MA/powerlinemonsters")
path <- "/Users/jan/Dropbox/UP_EPQM/2222/MA/powerlinemonsters"

In [8]:
# Install and load additional packages needed
install.packages("did")
library(did)
library(rio)
library(ggplot2)
library(ggpubr)
library(cowplot)
library(hash)
.libPaths()


The downloaded binary packages are in
	/var/folders/wm/mtrphj0s0msgrmyshf3hgk740000gn/T//RtmpoK4EOv/downloaded_packages


In [9]:
# read in the data set
btw <-import(sprintf("%s/data/btw_treat.csv", path))

In [20]:
colnames(btw)

In [10]:
# Filter for first and second vote
erst <- subset(btw, first_vote == 1)
zweit <- subset(btw, second_vote == 1)

In [13]:
parties <- c('Union', 'SPD', 'FDP', 'Linke', 'Grüne', 'Andere')
treatments <- hash('treatment_0'='Direct Line', 'treatment_15'='Within 15km', 'treatment_30'='Within 30km', 'treatment_50'='Within 50km')
keys <- keys(treatments)
results  <- list()
for(party in parties){
  figures  <- list()
  for(treatment in keys){
    result <- att_gt(yname = party,
                  gname = treatment,
                  idname = 'AGS',
                  panel = TRUE,
                  tname = 'year',
                  #xformla = ~ 1, #east, # + pop_density, #unemployed + avg_age + pop_density + female,
                  data = erst,
                  est_method = 'reg',
                  anticipation = 0,
                  control_group = 'nevertreated',
                  clustervars = c('AGS', 'state_id'),
                  bstrap = TRUE,
                  cband = TRUE,
                  allow_unbalanced_panel = TRUE,
                  
    )
    att <- aggte(result, type = "simple", bstrap = TRUE, clustervars = c('AGS', 'state_id'))

    figures[[length(figures)+1]] <- ggdid(result, 
              ylim = c(floor(min(result$att - result$se * 2.345 - 1)), ceiling(max(result$att + result$se * 2.345 + 1))),
              title = (sprintf('%s', treatments[[treatment]])),
              ncol = 3, lab_size = 10
              ) +
              labs(caption = sprintf('Overall ATT (SE): %.3f (%.3f) \n 
              P-value for pre-test of parallel trends assumption: %.3f'
              , att$overall.att, att$overall.se, result$Wpval)) +
              theme(plot.caption = element_text(hjust=0.5))
    
    results[[length(results)+1]] <- result
  }
  arranged_fig <- ggarrange(plotlist=figures, nrow = 4, ncol = 1, common.legend = TRUE, legend = 'bottom')
  final_fig <- annotate_figure(arranged_fig,
                  top = text_grob(sprintf("Effect on %s's vote share", party), face = "bold", size = 14),
                  bottom = text_grob('Control Group: Never Treated. Estimation Method: Doubly Robust', size = 10),)
  ggsave(sprintf('%s.png', party), plot = final_fig, path = sprintf('%s/figures/R/Group/', path), units = 'cm', width = 21, height = 29.7, dpi="print")
  print(party)
}

[1] "Union"
[1] "SPD"
[1] "FDP"
[1] "Linke"
[1] "Grüne"
[1] "Andere"


In [20]:
('ATT(SE): %.3f (%.3f) ATT(SE): %.3f (%.3f) ATT(SE): %.3f (%.3f) \n +
              P-value for pre-test of parallel trends assumption: %.3f', + 
              att$att.egt[1], att$se.egt[1], att$att.egt[2], att$att.se[2], att$att.egt[3], att$se.egt[3], result$Wpval)


Call:
aggte(MP = result, type = "group", bstrap = TRUE, clustervars = c("AGS", 
    "state_id"))

Reference: Callaway, Brantly and Pedro H.C. Sant'Anna.  "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015> 


Overall summary of ATT's based on group/cohort aggregation:  
    ATT    Std. Error     [ 95%  Conf. Int.] 
 -1.583        2.0292    -5.5601      2.3941 


Group Effects:
 Group Estimate Std. Error [95% Simult.  Conf. Band] 
  2005  -1.5059     1.7572       -5.0305      2.0188 
  2010  -2.1039     1.6440       -5.4016      1.1937 
  2014  -1.0626     3.3286       -7.7393      5.6140 
---
Signif. codes: `*' confidence band does not cover 0

Control Group:  Never Treated,  Anticipation Periods:  0
Estimation Method:  Outcome Regression

In [25]:
print(results)
    # p = out$Wpval
    # summary(out)

    #es <- aggte(out, type = 'dynamic')
    # summary(es)
    #ggdid(es, ylim = c(floor(min(es$att.egt - es$se.egt * 2.345 - 1)), ceiling(max(es$att.egt + es$se.egt * 2.345 + 1))))
    #ggsave(sprintf('%s/figures/R/ES/ES_%s_%s.png', path, party, treatment))

[[1]]

Call:
att_gt(yname = party, tname = "year", idname = "AGS", gname = treatment, 
    xformla = ~1, data = erst, panel = TRUE, allow_unbalanced_panel = TRUE, 
    control_group = "nevertreated", anticipation = 0, bstrap = TRUE, 
    cband = TRUE, clustervars = c("AGS", "state_id"), est_method = "dr")

Reference: Callaway, Brantly and Pedro H.C. Sant'Anna.  "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015> 

Group-Time Average Treatment Effects:
 Group Time ATT(g,t) Std. Error [95% Simult.  Conf. Band]  
  2005 1998  -0.7387     1.2891       -3.8918      2.4144  
  2005 2002  -1.4835     0.8076       -3.4590      0.4919  
  2005 2005   0.7307     0.9250       -1.5319      2.9934  
  2005 2009   2.5589     0.9255        0.2952      4.8225 *
  2005 2013   1.9219     1.1646       -0.9266      4.7704  
  2005 2017   3.8963     1.2602    