In [1]:
from IPython.core.interactiveshell import InteractiveShell
import georasters as gr
import rasterio.mask
import rasterio
import geopandas as gpd
import os
import sys
import glob
import re
import itertools
import collections
import multiprocessing
import requests
import pprint
import pickle
from pathlib import Path
from joblib import delayed, Parallel

# pyscience imports
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from plotnine import *
import rioxarray as rxr
from rasterio.merge import merge
import os

  @jit
  @jit
  @jit
  @jit


In [2]:
# Main paths
main_path = "/scratch/gpfs/ar8787/groupdata2/india_forest_land/C_Programs/_vcf_10_augsyth/median_filter/by_state"
root = Path(main_path)

In [3]:
total_files = 35 

In [8]:
n_sample = 1
for i in np.arange(0, total_files):

    text1 = f"""
library(augsynth)
library(data.table)
library(dplyr)
library(fixest)
library(ggplot2)


# Getting the workign directory 
shell_root <- "/scratch/gpfs/ar8787/groupdata2/india_forest_land" 
dbox_root <- "~/Dropbox/india_forest_land" 
root <- shell_root
setwd( root )

### Geting State names
state_names <- fread( "A_MicroData/state_names_shrug.csv" )
state_id_code <- {i}
st_name <- state_names[ state_names$state_id == state_id_code ]$state_name


# Getting the num of files
df1 <- fread("A_MicroData/data_sysdif.csv")
df1 <- df1[df1$pc11_state_id == state_id_code ]
df_year_2010 <- df1[df1$year == 2010]


# Calculate the median of the 'value' column
median_value <- median(df_year_2010$per_treecover)
# Generate a new variable 'above_median'; it will be TRUE if 'value' is greater than the median
df_year_2010[, above_median_all := (per_treecover > median_value)*1 ]
df_year_2010 <- df_year_2010[, .(shrid, above_median_all )]

df2 <- df1 %>% left_join( df_year_2010 )
df2_above <- df2[df2$above_median_all == 1]
df2_below <- df2[df2$above_median_all == 0]



#-------------------------------------------------------------------------------
# above the median
#-------------------------------------------------------------------------------

# Estimatio Result
ppool_syn <- multisynth(per_treecover ~ post_ror_data_entry, 
                        vill_id, year, df2_above, time_cohort = TRUE, n_leads = 9 )

filename <- paste0( "above_median_", state_id_code, ".RDS")
path <- file.path("E_Estimates/_vcf_10_augsyth/median_filter/by_state", filename)
saveRDS( ppool_syn, path )


# Get Summary result
ppool_syn_time_summ <- summary( ppool_syn )
filename <- paste0( "above_median_", state_id_code, "_sum.RDS")
path <- file.path("E_Estimates/_vcf_10_augsyth/median_filter/by_state", filename)
saveRDS( ppool_syn_time_summ,  path )




# Get the plot
est_df <- ppool_syn_time_summ$att
est_df <- est_df[est_df$Level == "Average", ]
est_df <- est_df[apply(!is.na(est_df), 1, all),]


n_df <- df2[t2ev_ror_data_entry %in% est_df$Time][, .N, by = t2ev_ror_data_entry]
n_df$Time <- n_df$t2ev_ror_data_entry
setorder(n_df,Time )
setorder(est_df,Time )



# Getting factors
rect.length <- (max(est_df[,"Estimate"], na.rm = TRUE) - 
                  min(est_df[,"Estimate"], na.rm = TRUE))/2
scale_fac <- 0.8 * rect.length / ( max(n_df[,"N"]) )
min_y_lim <- round(min(est_df$lower_bound), 2) * 1.05
max_y_lim <- round( max(est_df$upper_bound), 2) * 0.95

est_df[,"xmin"] <- est_df[,"Time"] - 0.2
est_df[,"xmax"] <- est_df[,"Time"] + 0.2
est_df[,"ymin"] <- min_y_lim
est_df[,"ymax"] <- est_df[,"ymin"] + ( n_df[,"N"] * scale_fac )


# Getting the plot
p <- ggplot(est_df, aes(x = Time, y = Estimate)) + 
  geom_line(color = "black") +
  geom_point(color = "blue") +
  geom_errorbar(aes(ymin = lower_bound, ymax = upper_bound), 
                width = 0.2, color = "red") +
  geom_rect(data = est_df, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), 
            fill = "grey70", colour = "grey69", alpha = 0.4, size = 0.2) + 
  coord_cartesian(ylim = c( min_y_lim, max_y_lim ) ) +
  scale_y_continuous(name = "Estimate", 
                     sec.axis = sec_axis(~(.+ (-1*min_y_lim)) * (1/scale_fac), 
                                         name = "Number of Observations")) + 
  labs(y = "ATT", x = "Time to Event", title = "Ror Data Entry at Block Level if more than 50% Villages are Treated") + 
  geom_vline(aes(xintercept = 0), linetype = "dashed", color = "gray") +  # Dashed vertical line at x = 5
  geom_hline(aes(yintercept = 0), linetype = "dashed", color = "gray") + 
  ggtitle( st_name ) +
  theme_minimal()


filename <- paste0( "above_median_", state_id_code, "_plot.png")
path <- file.path("F_Figures/_vcf_10_augsyth/median_filter/by_state", filename)
ggsave(filename = path, plot = p, width = 6, height = 4, dpi = 300)




#-------------------------------------------------------------------------------
# Below the median
#-------------------------------------------------------------------------------


# Estimatio Result
ppool_syn <- multisynth(per_treecover ~ post_ror_data_entry, 
                        vill_id, year, df2_below, time_cohort = TRUE, n_leads = 9 )

filename <- paste0( "below_median_", state_id_code, ".RDS")
path <- file.path("E_Estimates/_vcf_10_augsyth/median_filter/by_state", filename)
saveRDS( ppool_syn, path )


# Get Summary result
ppool_syn_time_summ <- summary( ppool_syn )
filename <- paste0( "below_median_", state_id_code, "_sum.RDS")
path <- file.path("E_Estimates/_vcf_10_augsyth/median_filter/by_state", filename)
saveRDS( ppool_syn_time_summ,  path )




# Get the plot
est_df <- ppool_syn_time_summ$att
est_df <- est_df[est_df$Level == "Average", ]
est_df <- est_df[apply(!is.na(est_df), 1, all),]


n_df <- df2[t2ev_ror_data_entry %in% est_df$Time][, .N, by = t2ev_ror_data_entry]
n_df$Time <- n_df$t2ev_ror_data_entry
setorder(n_df,Time )
setorder(est_df,Time )



# Getting factors
rect.length <- (max(est_df[,"Estimate"], na.rm = TRUE) - 
                  min(est_df[,"Estimate"], na.rm = TRUE))/2
scale_fac <- 0.8 * rect.length / ( max(n_df[,"N"]) )
min_y_lim <- round(min(est_df$lower_bound), 2) * 1.05
max_y_lim <- round( max(est_df$upper_bound), 2) * 0.95

est_df[,"xmin"] <- est_df[,"Time"] - 0.2
est_df[,"xmax"] <- est_df[,"Time"] + 0.2
est_df[,"ymin"] <- min_y_lim
est_df[,"ymax"] <- est_df[,"ymin"] + ( n_df[,"N"] * scale_fac )


# Getting the plot
p <- ggplot(est_df, aes(x = Time, y = Estimate)) + 
  geom_line(color = "black") +
  geom_point(color = "blue") +
  geom_errorbar(aes(ymin = lower_bound, ymax = upper_bound), 
                width = 0.2, color = "red") +
  geom_rect(data = est_df, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), 
            fill = "grey70", colour = "grey69", alpha = 0.4, size = 0.2) + 
  coord_cartesian(ylim = c( min_y_lim, max_y_lim ) ) +
  scale_y_continuous(name = "Estimate", 
                     sec.axis = sec_axis(~(.+ (-1*min_y_lim)) * (1/scale_fac), 
                                         name = "Number of Observations")) + 
  labs(y = "ATT", x = "Time to Event", title = "Ror Data Entry at Block Level if more than 50% Villages are Treated") + 
  geom_vline(aes(xintercept = 0), linetype = "dashed", color = "gray") +  # Dashed vertical line at x = 5
  geom_hline(aes(yintercept = 0), linetype = "dashed", color = "gray") + 
  ggtitle( st_name ) +
  theme_minimal()


filename <- paste0( "below_median_", state_id_code, "_plot.png")
path <- file.path("F_Figures/_vcf_10_augsyth/median_filter/by_state", filename)
ggsave(filename = path, plot = p, width = 6, height = 4, dpi = 300)



"""
    
    # Saving R scripts
    final_code = text1
    filename = f"sate_{n_sample}"
    with open( fr"{main_path}/batch/{filename}.R", "w") as f:
        f.write( f"{final_code}")
    
    
    text = f"""#!/bin/bash
#SBATCH --nodes=1                # node count
#SBATCH --ntasks=1               # total number of tasks across all nodes
#SBATCH --cpus-per-task=1        # cpu-cores per task (>1 if multi-threaded tasks)
#SBATCH --mem-per-cpu=150G         # memory per cpu-core (4G per cpu-core is default)
#SBATCH --time=08:00:00          # total run time limit (HH:MM:SS)
#SBATCH --mail-user=futurolos9@gmail.com
#SBATCH --mail-type=BEGIN
#SBATCH --mail-type=END
#SBATCH --mail-type=FAIL
#SBATCH --output={filename}.log

module purge
R CMD BATCH {main_path}/batch/{filename}.R
"""
    
    with open( f"{main_path}/batch/{filename}.sbatch", "w") as f:
            f.write( f"{text}")

    n_sample = n_sample + 1

In [5]:
n1 = n_sample - 1
files = np.arange(1, n_sample).reshape( 1, n1 )
names = ['anzony' ]

text1 = f"""
# submit
cd {main_path}/batch
"""
i = 0
for name in names:
    vlaset = files[ i, :]
    
    text2 = ""
    for val in vlaset:

        if val < n_sample:
            text2 = text2 + f"""
sbatch sate_{val}.sbatch
                    """
            
    with open( fr"{main_path}/anzony.sh", "w") as f:
        f.write( f"{text1 + text2}")
    i = i + 1


## Block Any


In [4]:
# Main paths
main_path = "/scratch/gpfs/ar8787/groupdata2/india_forest_land/C_Programs/_vcf_10_augsyth/median_filter/by_state"
root = Path(main_path)

In [5]:
total_files = 35 

In [7]:
n_sample = 1
for i in np.arange(0, total_files):

    text1 = f"""

rm(list = ls())
library(augsynth)
library(data.table)
library(dplyr)
library(fixest)
library(ggplot2)


# Getting the workign directory 
shell_root <- "/scratch/gpfs/ar8787/groupdata2/india_forest_land" 
dbox_root <- "~/Dropbox/india_forest_land" 
root <- shell_root
setwd( root )

### Geting State names
state_names <- fread( "A_MicroData/state_names_shrug.csv" )
state_id_code <- {i}
st_name <- state_names[ state_names$state_id == state_id_code ]$state_name


# Getting the num of files
# Import data
df1 <- fread( "A_MicroData/data_sysdif_block_lvl.csv" )
names(df1)
df1 <- df1[df1$state_id == state_id_code ]
df_year_2010 <- df1[df1$year == 2010]


# Calculate the median of the 'value' column
median_value <- median(df_year_2010$per_treecover)
# Generate a new variable 'above_median'; it will be TRUE if 'value' is greater than the median
df_year_2010[, above_median_block_any_all := (per_treecover > median_value)*1 ]
df_year_2010 <- df_year_2010[, .(block_id, above_median_block_any_all )]

df2 <- df1 %>% left_join( df_year_2010 )

# Generation of t2event
# Create a data.table with minimum year by block_id
min_years_data <- df2[
  post_ror_data_entry_block_any == 1,
  .(min_year = min(year)), # Replace 'year_column' with your actual year column name
  by = .(block_id)
]

# Merge the min_years_data back to the original dataset
df2 <- merge(df2, min_years_data, by = "block_id", all.x = TRUE)
df2$t2ev_ror_data_entry_block_any <- df2$year - df2$min_year


df2_above <- df2[df2$above_median_block_any_all == 1]
df2_below <- df2[df2$above_median_block_any_all == 0]

"""
    text2 = """

#-------------------------------------------------------------------------------
# above the median
#-------------------------------------------------------------------------------


result <- tryCatch({
  
  # Estimatio Result
  ppool_syn <- multisynth(per_treecover ~ post_ror_data_entry_block_any, 
                          block_id, year, df2_above, time_cohort = TRUE, n_leads = 9 )
  
  filename <- paste0( "above_median_block_any_", state_id_code, ".RDS")
  path <- file.path("E_Estimates/_vcf_10_augsyth/median_filter/by_state", filename)
  saveRDS( ppool_syn, path )
  
  
  # Get Summary result
  ppool_syn_time_summ <- summary( ppool_syn )
  filename <- paste0( "above_median_block_any_", state_id_code, "_sum.RDS")
  path <- file.path("E_Estimates/_vcf_10_augsyth/median_filter/by_state", filename)
  saveRDS( ppool_syn_time_summ,  path )
  
  
  
  
  # Get the plot
  est_df <- ppool_syn_time_summ$att
  est_df <- est_df[est_df$Level == "Average", ]
  est_df <- est_df[apply(!is.na(est_df), 1, all),]
  
  
  n_df <- df2_above[t2ev_ror_data_entry_block_any %in% est_df$Time][, .N, by = t2ev_ror_data_entry_block_any]
  n_df$Time <- n_df$t2ev_ror_data_entry_block_any
  setorder(n_df,Time )
  setorder(est_df,Time )
  
  
  
  # Getting factors
  rect.length <- (max(est_df[,"Estimate"], na.rm = TRUE) - 
                    min(est_df[,"Estimate"], na.rm = TRUE))/2
  scale_fac <- 0.8 * rect.length / ( max(n_df[,"N"]) )
  min_y_lim <- round(min(est_df$lower_bound), 2) * 1.05
  max_y_lim <- round( max(est_df$upper_bound), 2) * 0.95
  
  est_df[,"xmin"] <- est_df[,"Time"] - 0.2
  est_df[,"xmax"] <- est_df[,"Time"] + 0.2
  est_df[,"ymin"] <- min_y_lim
  est_df[,"ymax"] <- est_df[,"ymin"] + ( n_df[,"N"] * scale_fac )
  
  
  # Getting the plot
  p <- ggplot(est_df, aes(x = Time, y = Estimate)) + 
    geom_line(color = "black") +
    geom_point(color = "blue") +
    geom_errorbar(aes(ymin = lower_bound, ymax = upper_bound), 
                  width = 0.2, color = "red") +
    geom_rect(data = est_df, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), 
              fill = "grey70", colour = "grey69", alpha = 0.4, size = 0.2) + 
    coord_cartesian(ylim = c( min_y_lim, max_y_lim ) ) +
    scale_y_continuous(name = "Estimate", 
                       sec.axis = sec_axis(~(.+ (-1*min_y_lim)) * (1/scale_fac), 
                                           name = "Number of Observations")) + 
    labs(y = "ATT", x = "Time to Event", title = "Ror Data Entry at Block Level if any Village is Treated") + 
    geom_vline(aes(xintercept = 0), linetype = "dashed", color = "gray") +  # Dashed vertical line at x = 5
    geom_hline(aes(yintercept = 0), linetype = "dashed", color = "gray") + 
    ggtitle( st_name ) +
    theme_minimal()
  
  
  filename <- paste0( "above_median_block_any_", state_id_code, "_plot.png")
  path <- file.path("F_Figures/_vcf_10_augsyth/median_filter/by_state", filename)
  ggsave(filename = path, plot = p, width = 6, height = 4, dpi = 300)
  
  
}, error = function(err) {
  
  # Code to handle the error
  # You can access information about the error in 'err'
  cat("An error occurred:", conditionMessage(err), "\n")
  
  return( 'NA')
})
print(result)






#-------------------------------------------------------------------------------
# Below the median
#-------------------------------------------------------------------------------

result <- tryCatch({
  
  # Estimatio Result
  ppool_syn <- multisynth(per_treecover ~ post_ror_data_entry_block_any, 
                          block_id, year, df2_below, time_cohort = TRUE, n_leads = 9 )
  
  filename <- paste0( "below_median_block_any_", state_id_code, ".RDS")
  path <- file.path("E_Estimates/_vcf_10_augsyth/median_filter/by_state", filename)
  saveRDS( ppool_syn, path )
  
  
  # Get Summary result
  ppool_syn_time_summ <- summary( ppool_syn )
  filename <- paste0( "below_median_block_any_", state_id_code, "_sum.RDS")
  path <- file.path("E_Estimates/_vcf_10_augsyth/median_filter/by_state", filename)
  saveRDS( ppool_syn_time_summ,  path )
  
  
  
  
  # Get the plot
  est_df <- ppool_syn_time_summ$att
  est_df <- est_df[est_df$Level == "Average", ]
  est_df <- est_df[apply(!is.na(est_df), 1, all),]
  
  
  n_df <- df2_below[t2ev_ror_data_entry_block_any %in% est_df$Time][, .N, by = t2ev_ror_data_entry_block_any]
  n_df$Time <- n_df$t2ev_ror_data_entry_block_any
  setorder(n_df,Time )
  setorder(est_df,Time )
  
  
  
  # Getting factors
  rect.length <- (max(est_df[,"Estimate"], na.rm = TRUE) - 
                    min(est_df[,"Estimate"], na.rm = TRUE))/2
  scale_fac <- 0.8 * rect.length / ( max(n_df[,"N"]) )
  min_y_lim <- round(min(est_df$lower_bound), 2) * 1.05
  max_y_lim <- round( max(est_df$upper_bound), 2) * 0.95
  
  est_df[,"xmin"] <- est_df[,"Time"] - 0.2
  est_df[,"xmax"] <- est_df[,"Time"] + 0.2
  est_df[,"ymin"] <- min_y_lim
  est_df[,"ymax"] <- est_df[,"ymin"] + ( n_df[,"N"] * scale_fac )
  
  
  # Getting the plot
  p <- ggplot(est_df, aes(x = Time, y = Estimate)) + 
    geom_line(color = "black") +
    geom_point(color = "blue") +
    geom_errorbar(aes(ymin = lower_bound, ymax = upper_bound), 
                  width = 0.2, color = "red") +
    geom_rect(data = est_df, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), 
              fill = "grey70", colour = "grey69", alpha = 0.4, size = 0.2) + 
    coord_cartesian(ylim = c( min_y_lim, max_y_lim ) ) +
    scale_y_continuous(name = "Estimate", 
                       sec.axis = sec_axis(~(.+ (-1*min_y_lim)) * (1/scale_fac), 
                                           name = "Number of Observations")) + 
    labs(y = "ATT", x = "Time to Event", title = "Ror Data Entry at Block Level if any Village is Treated") + 
    geom_vline(aes(xintercept = 0), linetype = "dashed", color = "gray") +  # Dashed vertical line at x = 5
    geom_hline(aes(yintercept = 0), linetype = "dashed", color = "gray") + 
    ggtitle( st_name ) +
    theme_minimal()
  
  
  filename <- paste0( "below_median_block_any_", state_id_code, "_plot.png")
  path <- file.path("F_Figures/_vcf_10_augsyth/median_filter/by_state", filename)
  ggsave(filename = path, plot = p, width = 6, height = 4, dpi = 300)
  
  
}, error = function(err) {
  
  # Code to handle the error
  # You can access information about the error in 'err'
  cat("An error occurred:", conditionMessage(err), "\n")
  
  return( 'NA')
})
print(result)






"""
    
    # Saving R scripts
    final_code = text1 + text2
    filename = f"sate_{n_sample}_block_any"
    with open( fr"{main_path}/batch/{filename}.R", "w") as f:
        f.write( f"{final_code}")
    
    
    text = f"""#!/bin/bash
#SBATCH --nodes=1                # node count
#SBATCH --ntasks=1               # total number of tasks across all nodes
#SBATCH --cpus-per-task=1        # cpu-cores per task (>1 if multi-threaded tasks)
#SBATCH --mem-per-cpu=10G         # memory per cpu-core (4G per cpu-core is default)
#SBATCH --time=08:00:00          # total run time limit (HH:MM:SS)
#SBATCH --mail-user=futurolos9@gmail.com
#SBATCH --mail-type=BEGIN
#SBATCH --mail-type=END
#SBATCH --mail-type=FAIL
#SBATCH --output={filename}.log

module purge
R CMD BATCH {main_path}/batch/{filename}.R
"""
    
    with open( f"{main_path}/batch/{filename}.sbatch", "w") as f:
            f.write( f"{text}")

    n_sample = n_sample + 1

In [8]:
n1 = n_sample - 1
files = np.arange(1, n_sample).reshape( 1, n1 )
names = ['anzony' ]

text1 = f"""
# submit
cd {main_path}/batch
"""
i = 0
for name in names:
    vlaset = files[ i, :]
    
    text2 = ""
    for val in vlaset:

        if val < n_sample:
            text2 = text2 + f"""
sbatch sate_{val}_block_any.sbatch
                    """
            
    with open( fr"{main_path}/anzony_block_any.sh", "w") as f:
        f.write( f"{text1 + text2}")
    i = i + 1


## Block 50


In [4]:
# Main paths
main_path = "/scratch/gpfs/ar8787/groupdata2/india_forest_land/C_Programs/_vcf_10_augsyth/median_filter/by_state"
root = Path(main_path)

In [5]:
total_files = 35 

In [9]:
n_sample = 1
for i in np.arange(0, total_files):

    text1 = f"""
rm(list = ls())
library(augsynth)
library(data.table)
library(dplyr)
library(fixest)
library(ggplot2)


# Getting the workign directory 
shell_root <- "/scratch/gpfs/ar8787/groupdata2/india_forest_land" 
dbox_root <- "~/Dropbox/india_forest_land" 
root <- shell_root
setwd( root )

### Geting State names
state_names <- fread( "A_MicroData/state_names_shrug.csv" )
state_id_code <- {i}
st_name <- state_names[ state_names$state_id == state_id_code ]$state_name


# Getting the num of files
# Import data
df1 <- fread( "A_MicroData/data_sysdif_block_lvl.csv" )
names(df1)
df1 <- df1[df1$state_id == state_id_code ]
df_year_2010 <- df1[df1$year == 2010]


# Calculate the median of the 'value' column
median_value <- median(df_year_2010$per_treecover)
# Generate a new variable 'above_median'; it will be TRUE if 'value' is greater than the median
df_year_2010[, above_median_block_50_all := (per_treecover > median_value)*1 ]
df_year_2010 <- df_year_2010[, .(block_id, above_median_block_50_all )]

df2 <- df1 %>% left_join( df_year_2010 )

# Generation of t2event
# Create a data.table with minimum year by block_id
min_years_data <- df2[
  post_ror_data_entry_block_50 == 1,
  .(min_year = min(year)), # Replace 'year_column' with your actual year column name
  by = .(block_id)
]

# Merge the min_years_data back to the original dataset
df2 <- merge(df2, min_years_data, by = "block_id", all.x = TRUE)
df2$t2ev_ror_data_entry_block_50 <- df2$year - df2$min_year


df2_above <- df2[df2$above_median_block_50_all == 1]
df2_below <- df2[df2$above_median_block_50_all == 0]

"""
    text2 = """

#-------------------------------------------------------------------------------
# above the median
#-------------------------------------------------------------------------------


result <- tryCatch({
  
  # Estimatio Result
  ppool_syn <- multisynth(per_treecover ~ post_ror_data_entry_block_50, 
                          block_id, year, df2_above, time_cohort = TRUE, n_leads = 9 )
  
  filename <- paste0( "above_median_block_50_", state_id_code, ".RDS")
  path <- file.path("E_Estimates/_vcf_10_augsyth/median_filter/by_state", filename)
  saveRDS( ppool_syn, path )
  
  
  # Get Summary result
  ppool_syn_time_summ <- summary( ppool_syn )
  filename <- paste0( "above_median_block_50_", state_id_code, "_sum.RDS")
  path <- file.path("E_Estimates/_vcf_10_augsyth/median_filter/by_state", filename)
  saveRDS( ppool_syn_time_summ,  path )
  
  
  
  
  # Get the plot
  est_df <- ppool_syn_time_summ$att
  est_df <- est_df[est_df$Level == "Average", ]
  est_df <- est_df[apply(!is.na(est_df), 1, all),]
  
  
  n_df <- df2_above[t2ev_ror_data_entry_block_50 %in% est_df$Time][, .N, by = t2ev_ror_data_entry_block_50]
  n_df$Time <- n_df$t2ev_ror_data_entry_block_50
  setorder(n_df,Time )
  setorder(est_df,Time )
  
  
  
  # Getting factors
  rect.length <- (max(est_df[,"Estimate"], na.rm = TRUE) - 
                    min(est_df[,"Estimate"], na.rm = TRUE))/2
  scale_fac <- 0.8 * rect.length / ( max(n_df[,"N"]) )
  min_y_lim <- round(min(est_df$lower_bound), 2) * 1.05
  max_y_lim <- round( max(est_df$upper_bound), 2) * 0.95
  
  est_df[,"xmin"] <- est_df[,"Time"] - 0.2
  est_df[,"xmax"] <- est_df[,"Time"] + 0.2
  est_df[,"ymin"] <- min_y_lim
  est_df[,"ymax"] <- est_df[,"ymin"] + ( n_df[,"N"] * scale_fac )
  
  
  # Getting the plot
  p <- ggplot(est_df, aes(x = Time, y = Estimate)) + 
    geom_line(color = "black") +
    geom_point(color = "blue") +
    geom_errorbar(aes(ymin = lower_bound, ymax = upper_bound), 
                  width = 0.2, color = "red") +
    geom_rect(data = est_df, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), 
              fill = "grey70", colour = "grey69", alpha = 0.4, size = 0.2) + 
    coord_cartesian(ylim = c( min_y_lim, max_y_lim ) ) +
    scale_y_continuous(name = "Estimate", 
                       sec.axis = sec_axis(~(.+ (-1*min_y_lim)) * (1/scale_fac), 
                                           name = "Number of Observations")) + 
    labs(y = "ATT", x = "Time to Event", title = "Ror Data Entry at Block Level if more than 50% Villages are Treated") + 
    geom_vline(aes(xintercept = 0), linetype = "dashed", color = "gray") +  # Dashed vertical line at x = 5
    geom_hline(aes(yintercept = 0), linetype = "dashed", color = "gray") + 
    ggtitle( st_name ) +
    theme_minimal()
  
  
  filename <- paste0( "above_median_block_50_", state_id_code, "_plot.png")
  path <- file.path("F_Figures/_vcf_10_augsyth/median_filter/by_state", filename)
  ggsave(filename = path, plot = p, width = 6, height = 4, dpi = 300)
  
  
}, error = function(err) {
  
  # Code to handle the error
  # You can access information about the error in 'err'
  cat("An error occurred:", conditionMessage(err), "\n")
  
  return( 'NA')
})
print(result)






#-------------------------------------------------------------------------------
# Below the median
#-------------------------------------------------------------------------------

result <- tryCatch({
  
  # Estimatio Result
  ppool_syn <- multisynth(per_treecover ~ post_ror_data_entry_block_50, 
                          block_id, year, df2_below, time_cohort = TRUE, n_leads = 9 )
  
  filename <- paste0( "below_median_block_50_", state_id_code, ".RDS")
  path <- file.path("E_Estimates/_vcf_10_augsyth/median_filter/by_state", filename)
  saveRDS( ppool_syn, path )
  
  
  # Get Summary result
  ppool_syn_time_summ <- summary( ppool_syn )
  filename <- paste0( "below_median_block_50_", state_id_code, "_sum.RDS")
  path <- file.path("E_Estimates/_vcf_10_augsyth/median_filter/by_state", filename)
  saveRDS( ppool_syn_time_summ,  path )
  
  
  
  
  # Get the plot
  est_df <- ppool_syn_time_summ$att
  est_df <- est_df[est_df$Level == "Average", ]
  est_df <- est_df[apply(!is.na(est_df), 1, all),]
  
  
  n_df <- df2_below[t2ev_ror_data_entry_block_50 %in% est_df$Time][, .N, by = t2ev_ror_data_entry_block_50]
  n_df$Time <- n_df$t2ev_ror_data_entry_block_50
  setorder(n_df,Time )
  setorder(est_df,Time )
  
  
  
  # Getting factors
  rect.length <- (max(est_df[,"Estimate"], na.rm = TRUE) - 
                    min(est_df[,"Estimate"], na.rm = TRUE))/2
  scale_fac <- 0.8 * rect.length / ( max(n_df[,"N"]) )
  min_y_lim <- round(min(est_df$lower_bound), 2) * 1.05
  max_y_lim <- round( max(est_df$upper_bound), 2) * 0.95
  
  est_df[,"xmin"] <- est_df[,"Time"] - 0.2
  est_df[,"xmax"] <- est_df[,"Time"] + 0.2
  est_df[,"ymin"] <- min_y_lim
  est_df[,"ymax"] <- est_df[,"ymin"] + ( n_df[,"N"] * scale_fac )
  
  
  # Getting the plot
  p <- ggplot(est_df, aes(x = Time, y = Estimate)) + 
    geom_line(color = "black") +
    geom_point(color = "blue") +
    geom_errorbar(aes(ymin = lower_bound, ymax = upper_bound), 
                  width = 0.2, color = "red") +
    geom_rect(data = est_df, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), 
              fill = "grey70", colour = "grey69", alpha = 0.4, size = 0.2) + 
    coord_cartesian(ylim = c( min_y_lim, max_y_lim ) ) +
    scale_y_continuous(name = "Estimate", 
                       sec.axis = sec_axis(~(.+ (-1*min_y_lim)) * (1/scale_fac), 
                                           name = "Number of Observations")) + 
    labs(y = "ATT", x = "Time to Event", title = "Ror Data Entry at Block Level if more than 50% Villages are Treated") + 
    geom_vline(aes(xintercept = 0), linetype = "dashed", color = "gray") +  # Dashed vertical line at x = 5
    geom_hline(aes(yintercept = 0), linetype = "dashed", color = "gray") + 
    ggtitle( st_name ) +
    theme_minimal()
  
  
  filename <- paste0( "below_median_block_50_", state_id_code, "_plot.png")
  path <- file.path("F_Figures/_vcf_10_augsyth/median_filter/by_state", filename)
  ggsave(filename = path, plot = p, width = 6, height = 4, dpi = 300)
  
  
}, error = function(err) {
  
  # Code to handle the error
  # You can access information about the error in 'err'
  cat("An error occurred:", conditionMessage(err), "\n")
  
  return( 'NA')
})
print(result)






"""
    
    # Saving R scripts
    final_code = text1 + text2
    filename = f"sate_{n_sample}_block_50"
    with open( fr"{main_path}/batch/{filename}.R", "w") as f:
        f.write( f"{final_code}")
    
    
    text = f"""#!/bin/bash
#SBATCH --nodes=1                # node count
#SBATCH --ntasks=1               # total number of tasks across all nodes
#SBATCH --cpus-per-task=1        # cpu-cores per task (>1 if multi-threaded tasks)
#SBATCH --mem-per-cpu=10G         # memory per cpu-core (4G per cpu-core is default)
#SBATCH --time=08:00:00          # total run time limit (HH:MM:SS)
#SBATCH --mail-user=futurolos9@gmail.com
#SBATCH --mail-type=BEGIN
#SBATCH --mail-type=END
#SBATCH --mail-type=FAIL
#SBATCH --output={filename}.log

module purge
R CMD BATCH {main_path}/batch/{filename}.R
"""
    
    with open( f"{main_path}/batch/{filename}.sbatch", "w") as f:
            f.write( f"{text}")

    n_sample = n_sample + 1

In [10]:
n1 = n_sample - 1
files = np.arange(1, n_sample).reshape( 1, n1 )
names = ['anzony' ]

text1 = f"""
# submit
cd {main_path}/batch
"""
i = 0
for name in names:
    vlaset = files[ i, :]
    
    text2 = ""
    for val in vlaset:

        if val < n_sample:
            text2 = text2 + f"""
sbatch sate_{val}_block_50.sbatch
                    """
            
    with open( fr"{main_path}/anzony_block_50.sh", "w") as f:
        f.write( f"{text1 + text2}")
    i = i + 1
