<a href="https://colab.research.google.com/github/gabpiazza/Big-Bang/blob/main/Analysis_full_panel_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
## 1.1 Install & Load packages --------------------------------------------------------

# some setup: a cheeky little bit of code to check and install packages
need <- c("tidyverse","did", "progress") # list packages needed
have <- need %in% rownames(installed.packages()) # checks packages you have
if(any(!have)) install.packages(need[!have]) # install missing packages
invisible(lapply(need, library, character.only=T)) # load needed packages

options(scipen = 999)

In [9]:
full_panel<- readRDS("/content/full_panel")
names(full_panel
)


In [2]:
`%notin%` <- Negate(`%in%`)

perform_analysis <- function(y_var, control_group_type, dataset, covariates = NULL) {
  data_filtered <- dataset %>%
    filter(first_order %notin% years_few_observations)

  if (!is.null(covariates) && length(covariates) > 0) {
    xformla <- as.formula(paste( "~", paste(covariates, collapse = " + ")))
  } else {
    xformla <- as.formula(paste( "~ 1"))

  }

  # Expected column names
  expected_columns <- c("bvd_id_numeric", "year", y_var, "first_order")

  # Check for missing columns
  missing_columns <- expected_columns[!expected_columns %in% colnames(data_filtered)]
  if (length(missing_columns) > 0) {
    stop(paste("The following columns are missing:", paste(missing_columns, collapse = ", ")))
  }

  cs_results <- att_gt(
    yname = y_var,
    tname = "year",
    idname = "bvd_id_numeric",
    gname = "first_order_2",
    xformla = xformla,
    data = data_filtered,
    control_group = control_group_type,
    est_method = "ipw",
    clustervars = "bvd_id_numeric",
    pl=TRUE,
    cores=8,
    bstrap = T,
    panel = TRUE,
    biters = 3000,
    allow_unbalanced_panel = TRUE,
  )

  # You can add further calculations if needed.


  # Compute the different results
  cs_results.dyn <- aggte(cs_results, type = "dynamic", na.rm = TRUE, max_e = 10, min_e = -10, cband = F)
  cs_results.sim <- aggte(cs_results, type = "simple", na.rm = TRUE)
  cs_results.grp <- aggte(cs_results, type = "group", na.rm = TRUE)
  cs_results.cal <- aggte(cs_results, type = "calendar", na.rm = TRUE)

  sim_ratio <- cs_results.sim$overall.att / cs_results.sim$overall.se

  # Visualization and confidence intervals for dynamic results
  cs_results_plot_dyn <- ggdid(cs_results.dyn)
  cs_results_data_dyn <- cs_results_plot_dyn$data
  cs_results_data_dyn$lower <- cs_results_data_dyn$att - cs_results_data_dyn$c * cs_results_data_dyn$att.se
  cs_results_data_dyn$upper <- cs_results_data_dyn$att + cs_results_data_dyn$c * cs_results_data_dyn$att.se

  # Create a list to store all the results
  results_list <- list(
    main = cs_results,
    dynamic = cs_results.dyn,
    simple = cs_results.sim,
    group = cs_results.grp,
    calendar = cs_results.cal,
    sim_ratio = sim_ratio,
    dynamic_plot_data = cs_results_data_dyn
  )

  return(results_list)
}


run_all_analyses_multiple_combinations <- function(y_vars, dataset) {

  results <- list()

  control_groups <- c('nevertreated', 'notyettreated')

  # Create all possible covariate combinations
  all_covariates <- c("pre_log_operating_revenue_turnover","pre_log_ebitda","pre_log_fixed_assets", "age")

  covariate_combinations <- list("none" = NULL)

  # Loop through all sizes of combinations
  for (i in 1:length(all_covariates)) {
    combinations <- combn(all_covariates, i)
    for (j in 1:ncol(combinations)) {
      combination_name <- paste(combinations[,j], collapse="_")
      covariate_combinations[[combination_name]] <- combinations[,j]
    }
  }

  # Calculate total number of iterations
  total_iterations <- length(y_vars) * length(control_groups) * length(covariate_combinations)

  # Create a progress bar
  pb <- progress_bar$new(total = total_iterations, format = "[:bar] :percent :elapsedfull")

  # Analyze data with each combination
  for (y_var in y_vars) {
    for (control_group in control_groups) {
      for (cov_name in names(covariate_combinations)) {
        covariates <- covariate_combinations[[cov_name]]
        results_key <- paste(y_var, control_group, cov_name, sep = "_")

        results[[results_key]] <- perform_analysis(y_var, control_group, dataset, covariates)

        # Update the progress bar
        pb$tick()
      }
    }
  }

  return(results)
}

In [10]:
full_panel$bvd_id_numeric <- as.numeric(as.factor(full_panel$bvd_id_number))
full_panel_ht <- full_panel %>% filter(first_order_tech==1)
full_panel_lt<- full_panel %>% filter(first_order_tech==0)
full_panel_one<- full_panel %>% filter(total_orders ==1)
full_panel_multiple<- full_panel %>% filter(total_orders >1)
full_panel_large_projects<- full_panel %>% filter(subproject_1_first_year %in% c("LHC", "HL"))
full_panel_less_than_100k<- full_panel %>% filter(first_order_amount <=100000)
full_panel_greater_than_100k<- full_panel %>% filter(first_order_amount >100000)
full_panel_SME<- full_panel %>% filter(SME_status ==1)

In [11]:
y_vars<- vars <- c( "probability_publications", "probability_applications",
                    "log_applications", "log_application_stock",
                    "log_weighted_patent_apps",
                   "log_publications", "log_publication_stock"

                   )

years_few_observations <- c(1995, 1996, 2008, 2020)

In [None]:
cs_SME_results <- run_all_analyses_multiple_combinations(y_vars, full_panel_SME)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
“No units in group 1998 in time period 1992”
“No units in group 1998 in time period 1991”
“No units in group 1998 in time period 1993”
“No units in group 1998 in time period 1992”
“No units in group 1998 in time period 1993”
“No units in group 1999 in time period 1991”
“No units in group 1999 in time period 1990”
“No units in group 1999 in time period 1991”
“No units in group 2000 in time period 1991”
“No units in group 2000 in time period 1990”
“No units in group 2000 in time period 1992”
“No units in group 2000 in time period 1991”
“No units in group 2000 in time period 1993”
“No units in group 2000 in time period 1992”
“No units in group 2000 in time period 1994”
“No units in group 2000 in time period 1993”
“No units in group 2000 in time period 1994”
“No units in group 2001 in time period 1991”
“No units in group 2001 in time period 1990”
“No units in group 2001 in time period 1991”
“No units in group 2002 in time per