# Create data frames for each mine for matching

In [None]:
##load packages
library(tidyverse)
library(sf)
library(raster)

In [None]:
##set path to datasets
data_path = "PATH"
##set the temp directory for raster processing 
rasterOptions(tmpdir="~/scratch/")

In [None]:
##load in the dataset of confounders
conf <- read_csv(paste0(data_path,"dfs/confounders.csv"), show_col_types = FALSE)

In [None]:
##select just final matching confoudners
conf <- conf %>%
        dplyr:::select(ID, year, x, y, province, tc, tc_pre_yr, tc_sd, loss, loss_tcw, loss_2yr_bin, mine_dist, 
                       road_dist, elevation, slope, pop_den, burn_pre_2yr, pa, agri_zone)

In [None]:
#load in the reference raster grid
ref_m <- raster(paste(data_path,'zam_ref_m.tif', sep = "/"))
names(ref_m) <- 'ID'

 ## Assign treatment and controls to pixels

In [None]:
##read in the large scale mining leases
lmls <- st_read(dsn = paste0(data_path, "mine_leases/LML"), layer = "LML") 

In [None]:
##read in the 'active from' details
active <- read_csv('mines_active_from.csv')

In [None]:
##join the the active from infromation and remove the never active ones
lmls <- lmls %>% left_join(active, by = 'Code') %>% filter(active_from != 'Never') 

In [None]:
##convert to meter based cordinate system
lmls <- st_transform(lmls, crs =crs(ref_m))

In [12]:
##crerate a lsit of miens ordered by active year
mines <- unique(lmls$mine)
mines <- sort(mines[mines!=0])
mines

In [None]:
##select treatment mines 
trt_mines = lmls %>% 
            filter(mine %in% mines) %>% 
            dplyr::select(mine, active_from) %>%

#merge the adjecent leases that are part of the same mine 
            group_by(mine, active_from) %>% 
            summarize()  %>% 
            st_union(by_feature = TRUE)

##create a mine active dataframe
ma <- trt_mines %>% st_drop_geometry()

##and split into list of seperate objects
split_tm=split(trt_mines, trt_mines$mine)

In [None]:
##save the list of treatment mines as a df
write.csv(ma,paste(data_path,"dfs/mine_tyr.csv", sep="/"), row.names = FALSE)

In [None]:
###create a lsit of rasters for each mine with buffer
##set empty list for rasters
t = list()
##loop over the elements 
for (i in 1:length(split_tm)){
    ##buffer each mine by 25km and rasterize.
    t[[i]] <- rasterize(st_buffer(split_tm[[i]], dist = 25000), ref_m, field = 1, background = 0)
    ##convert 0's to NA 
    t[[i]][t[[i]]<1] <- NA
}

In [None]:
##calcaute a distance layer for each treatment mine 
d = list()
##loop over the elements to rasterize and then get distnace to a cell with a mine
for (i in 1:length(split_tm)){
    d[[i]] <- gridDistance(rasterize(split_tm[[i]], ref_m, field = 1), origin = 1)
    }

In [None]:
##create new active year variable - either year active or active before 2000 - uncertian are treated as active pre 2000
lmls$active2 <- ifelse(lmls$active_from == 'pre-2000' | lmls$active_from == "Unclear", 2000, lmls$active_from)
##split into year 
split_ym=split(lmls, lmls$active2)

In [17]:
mine_years <- as.numeric(names(split_ym))
mine_years

In [None]:
years = seq(2000,2019)

In [None]:
##calculate a distance layer for active mines each year a new becomes active 
##take the miniumum of the current year or previosu year distance from a lease - unless its the first year

dy = list()

for (i in 1:length(split_ym)){
            if (i == 1) {
         # for the 1st year just the distance
                dy[[i]] <- gridDistance(rasterize(split_ym[[i]], ref_m, field = 1), origin = 1)
            } else {
                #for other years the min of the yearly distance and the distance in the previous year - creating a culamtive min distance to mine
                dy[[i]] <- min(gridDistance(rasterize(split_ym[[i]], ref_m, field = 1), origin = 1),  dy[[i-1]])
        }}

In [None]:
##create a distance to nearest mine for all years equal to either the distance to nearest mine if a new mine year of the previous year if not
dy_all = list()

j = 0

for (i in 1:length(years)) {
    if((i + 1999) %in% mine_years) {
        j = j +1
        dy_all[[i]] <- dy[[j]]
    } else {
        dy_all[[i]] <- dy_all[[i-1]]
}
}

In [None]:
##convert list of rasters to raster stacks 
trt <- stack(unlist(t))
trt_dist <- stack(unlist(d))
nm_stack <- stack(unlist(dy_all))

In [None]:
##add names to the lists of rasters
names(trt) <- rep('treatment',22)
names(trt_dist) <- rep('trt_dist',22)
names(nm_stack) <- paste("near_m", years, sep= "_")

In [None]:
##read in the lel control dataset
lels <- st_read(dsn = paste(data_path, "mine_leases/LEL", sep = "/"), layer = "LEL") 

##read in the small scale mining leases - these will be excluded as controls
smls <- st_read(dsn = paste(data_path, "mine_leases/SML", sep = "/"), layer = "SML") 

In [None]:
##re-project to projected coordinate system
lels <-st_transform(lels, crs=crs(lmls))
smls <-st_transform(smls, crs=crs(lmls))

In [None]:
##create a mask of all treated and prevously active mines to act as a mask for controls 
lml_mask <- trt
lml_mask[is.na(lml_mask)] <- 0
lml_mask <- sum(lml_mask)
lml_mask[lml_mask>0] <- NA

In [None]:
##add the smls rasterized (buffer by 5 to ensure no controls within mines)
sml_mask <- rasterize(st_buffer(smls, dist = 5), ref_m, field = NA, background = 0)

In [None]:
mine_mask <- lml_mask + sml_mask
names(mine_mask) <- 'mine_mask'

In [None]:
##rasterize the LELs and add the mask to create the controls
lel_cons <- rasterize(lels, ref_m, field = 1) + mine_mask
names(lel_cons) <- 'lel_controls'

In [None]:
##create the non-lel controls - everywhere in Zambia that is not in the mask
nlel_cons <- mine_mask
nlel_cons[nlel_cons==0] <- 1
names(nlel_cons) <- 'nlel_controls'

In [None]:
##export the raster stacks
#writeRaster(t_stack, paste(data_path, "treatments.tif",  sep = "/"), overwrite=TRUE)

#writeRaster(lel_cons, paste(data_path, "lel_controls.tif",  sep = "/"), overwrite=TRUE)
#writeRaster(nlel_cons, paste(data_path, "nlel_controls.tif",  sep = "/"), overwrite=TRUE)
#writeRaster(mine_mask, paste(data_path, "mine_mask.tif",  sep = "/"), overwrite=TRUE)

#writeRaster(d_stack, paste(data_path, "mine_dist.tif",  sep = "/"), overwrite=TRUE)
#writeRaster(nm_stack, paste(data_path, "yr_mine_dist.tif",  sep = "/"), overwrite=TRUE)

In [31]:
##create list of cell IDs which had greater than 10% forest cover in 2000

tc_df10 <- filter(conf, year == "2000") %>% 
            filter(tc > 10) %>%
            dplyr:::select(ID)

In [32]:
length(unique(conf$ID))
length(tc_df10$ID)

## a dataset for each mine for mine level matching - Controls from LELs

In [33]:
##create a dataframe of LEL controls with ID value 
#replace NAs with 0 so 1 if a possible control 0 if not
lel_cons_df <- as.data.frame(stack(ref_m, lel_cons)) %>% 
                filter(ID %in% tc_df10$ID) %>% 
                    mutate_all(~replace(., is.na(.), 0))

In [34]:
m = list()

##loop the mines and create dataframes for each.
for (i in 1:length(mines)) {
    
    df = filter(conf, year == (filter(ma, mine == paste(mines[i])))$active_from)  ## selecet the correct year of confounder values
    
    m[[i]] <- as.data.frame(stack(ref_m, trt[[i]], trt_dist[[i]])) %>%  # create datframe of treament pixels (1 if treated, 0 if not)
                filter(ID %in% tc_df10$ID) %>% 
                rename("treatment" = names(.)[2], 'trt_dist'= names(.)[3]) %>%  
                left_join(lel_cons_df, by = "ID") %>%  #join the control dataset
                mutate(trt_con = ifelse(lel_controls == 1, 0, ifelse(treatment == 1,1, -1))) %>% ## create single treatment and control variable
                dplyr:::select(-treatment,-lel_controls) %>%
                filter(trt_con >= 0) %>% 
                left_join(df, by ="ID")    ## join dataset of confounders for matching
} 

In [None]:
##save the mine dfs 
for (i in 1:length(mines)) {
    write.csv(m[[i]], paste(data_path,"/dfs/um_mines/", mines[i], ".csv", sep=""), row.names = FALSE)
}

## a dataset for each mine for mine level matching - Controls from anywhere not mined

In [None]:
##cretae a dataframe of controls from anywhere in zambia not mined with ID value
nlel_cons_df <- as.data.frame(stack(ref_m, nlel_cons)) %>% 
                filter(ID %in% tc_df10$ID) %>% 
                mutate_all(~replace(., is.na(.), 0))

In [None]:
##dataset - Unmatched non-lel controls 
mn = list()
##loop the mines and create dataframes for each
for (i in 1:length(mines)) {
     df = filter(conf, year == (filter(ma, mine == paste(mines[i])))$active_from)
    
     mn[[i]] <- as.data.frame(stack(ref_m, trt[[i]], trt_dist[[i]])) %>% 
                filter(ID %in% tc_df10$ID) %>% 
                rename("treatment" = names(.)[2], 'trt_dist'= names(.)[3]) %>% 
                left_join(nlel_cons_df, by = "ID") %>%
                mutate(trt_con = ifelse(nlel_controls == 1, 0, ifelse(treatment == 1,1, -1))) %>%
                dplyr:::select(-treatment,-nlel_controls) %>%
                filter(trt_con >= 0) %>% 
                left_join(df, by ="ID")
    }

In [None]:
##save the mine dfs 
for (i in 1:length(mines)) {
    write.csv(mn[[i]], paste(data_path,"/dfs/um_mines_nlel/", mines[i], ".csv", sep=""), row.names = FALSE)
}

In [None]:
##dataset of just mines - treatments only 
mt = list()

##loop the mines and create dataframe for each
for (i in 1:length(mines)) {
     df = filter(conf, year == (filter(ma, mine == paste(mines[i])))$active_from)
     mt[[i]] <- as.data.frame(stack(ref_m, trt[[i]], trt_dist[[i]])) %>% 
                rename("treatment" = names(.)[2], 'trt_dist'= names(.)[3]) %>%
                left_join(df, by ="ID")    
} 

In [None]:
##save the mine dfs 
for (i in 1:length(mines)) {
    write.csv(mt[[i]], paste(data_path,"/dfs/only_mines/", mines[i], ".csv", sep=""), row.names = FALSE)
}

## Regular sampling

In [None]:
## 25% sampling - bottom left of a 2 x 2 square is kept
row <- rep(c(NA),(dim(ref_m)[2])) # first row
row2 <- rep(c(NA,0),(dim(ref_m)[2]/2)) # second

samples <- c(rep(c(row,row2), dim(ref_m)[1]/2),row) ##repaeat for the y dimenson plus and extra row as odd number

In [None]:
##add the pattern of NAs and 0's to the refrence raster
sample_rast25 <- setValues(raster(ref_m),samples)

In [None]:
##11% sampling - bottom left of a 3 x 3 square is kept
row <- rep(c(NA),(dim(ref_m)[2])) # first and second row
row2 <- rep(c(NA,0,NA),(dim(ref_m)[2]/3)) # third row

samples <- c(rep(c(row,row, row2), dim(ref_m)[1]/3))

sample_rast11 <- setValues(raster(ref_m),samples)

In [None]:
##save the sampling rasters
writeRaster(sample_rast25, paste(data_path, "sample_rast25.tif",  sep = "/"), overwrite=TRUE)
writeRaster(sample_rast11, paste(data_path, "sample_rast11.tif",  sep = "/"), overwrite=TRUE)

## Repeat with regular sampling to 25%

In [None]:
##create dataframe of contorls but add sampling raster to remove 25% of cells

lel_cons25 <- lel_cons + sample_rast25
names(lel_cons25) <- 'lel_controls'
lel_cons25_df <- as.data.frame(stack(ref_m, lel_cons25)) %>% 
                    filter(ID %in% tc_df10$ID) %>% 
                    mutate_all(~replace(., is.na(.), 0))

In [None]:
ms25 = list()

##loop the mines and create dataframe for each 
for (i in 1:length(mines)) {
    df = filter(conf, year == (filter(ma, mine == paste(mines[i])))$active_from)
    ms25[[i]] <- as.data.frame(stack(ref_m,(trt[[i]] + sample_rast25), trt_dist[[i]])) %>% 
                filter(ID %in% tc_df10$ID) %>% 
                mutate_all(~replace(., is.na(.), 0)) %>%  
                rename("treatment" = names(.)[2], 'trt_dist'= names(.)[3]) %>% 
                left_join(lel_cons25_df, by = "ID") %>%
                mutate(trt_con = ifelse(lel_controls == 1, 0, ifelse(treatment == 1,1, -1))) %>%
                dplyr:::select(-treatment,-lel_controls) %>%
                filter(trt_con >= 0) %>% left_join(df, by ="ID")    
} 

In [None]:
##save the mine dfs 
for (i in 1:length(mines)) {
    write.csv(ms25[[i]], paste(data_path,"/dfs/um_mines_rs_25/", mines[i], ".csv", sep=""), row.names = FALSE)
}

## Repeat with regular sampling to 11%

In [None]:
##create dataframe of contorls but add sampling raster to remove 11% of cells

lel_cons11 <- lel_cons + sample_rast11
names(lel_cons11) <- 'lel_controls'
lel_cons11_df <- as.data.frame(stack(ref_m, lel_cons11)) %>% 
                    filter(ID %in% tc_df10$ID) %>% 
                    mutate_all(~replace(., is.na(.), 0))

In [None]:

ms11 = list()

##loop the mines and create dataframes for each
for (i in 1:length(mines)) {
    df = filter(conf, year == (filter(ma, mine == paste(mines[i])))$active_from)
    ms11[[i]] <- as.data.frame(stack(ref_m,(trt[[i]] + sample_rast11), trt_dist[[i]])) %>% 
                filter(ID > 0 & ID %in% tc_df10$ID) %>% 
                mutate_all(~replace(., is.na(.), 0)) %>%  
                rename("treatment" = names(.)[2], 'trt_dist'= names(.)[3]) %>% 
                left_join(lel_cons11_df, by = "ID") %>%
                mutate(trt_con = ifelse(lel_controls == 1, 0, ifelse(treatment == 1,1, -1))) %>%
                dplyr:::select(-treatment,-lel_controls) %>%
                filter(trt_con >= 0) %>% 
                left_join(df, by ="ID")    
} 

In [None]:
##save the mine dfs 
for (i in 1:length(mines)) {
    write.csv(ms11[[i]], paste(data_path,"/dfs/um_mines_rs_11/", mines[i], ".csv", sep=""), row.names = FALSE)
}

## Non LEL controls at 25% sampling

In [221]:
##repeat for the non-lel dataset
nlel_cons25 <- nlel_cons + sample_rast25
names(nlel_cons25) <- 'nlel_controls'
nlel_cons_df <- as.data.frame(stack(ref_m, nlel_cons25)) %>% 
                filter(ID %in% tc_df10$ID) %>% 
                mutate_all(~replace(., is.na(.), 0))

In [222]:

mn25 = list()
##loop the mines and create dataframe
for (i in 1:length(mines)) {
    mine_year <- filter(ma, mine == paste(mines[i]))$active_from
    df = filter(conf, year == (filter(ma, mine == paste(mines[i])))$active_from)
    mn25[[i]] <- as.data.frame(stack(ref_m,(trt[[i]] + sample_rast25), trt_dist[[i]])) %>% 
                filter(ID %in% tc_df10$ID)  %>%
                mutate_all(~replace(., is.na(.), 0)) %>% 
                rename("treatment" = names(.)[2], 'trt_dist'= names(.)[3]) %>% 
                left_join(nlel_cons_df, by = "ID") %>%
                mutate(trt_con = ifelse(nlel_controls == 1, 0, ifelse(treatment == 1,1, -1))) %>%
                dplyr:::select(-treatment,-nlel_controls) %>%
                filter(trt_con >= 0) %>% 
                left_join(df, by ="ID")
    }

In [223]:
##save the mine dfs 
for (i in 1:length(mines)) {
    write.csv(mn25[[i]], paste(data_path,"/dfs/um_mines_nlel_rs_25/", mines[i], ".csv", sep=""), row.names = FALSE)
}

## non LEL controls at 11% sampling

In [224]:
##repeat for the non-lel dataset
nlel_cons11 <- nlel_cons + sample_rast11
names(nlel_cons11) <- 'nlel_controls'
nlel_cons_df <- as.data.frame(stack(ref_m, nlel_cons11)) %>% 
                filter(ID %in% tc_df10$ID) %>% 
                mutate_all(~replace(., is.na(.), 0))

In [225]:
##dataset - Unmatched non-lel controls 
mn11 = list()
##loop the mines and create dataframe
for (i in 1:length(mines)) {
    mine_year <- filter(ma, mine == paste(mines[i]))$active_from
    df = filter(conf, year == (filter(ma, mine == paste(mines[i])))$active_from)
    mn11[[i]] <- as.data.frame(stack(ref_m,(trt[[i]] + sample_rast11), trt_dist[[i]])) %>% 
                filter(ID %in% tc_df10$ID)  %>%
                mutate_all(~replace(., is.na(.), 0)) %>% 
                rename("treatment" = names(.)[2], 'trt_dist'= names(.)[3]) %>% 
                left_join(nlel_cons_df, by = "ID") %>%
                mutate(trt_con = ifelse(nlel_controls == 1, 0, ifelse(treatment == 1,1, -1))) %>%
                dplyr:::select(-treatment,-nlel_controls) %>%
                filter(trt_con >= 0) %>% 
                left_join(df, by ="ID")
    }

In [226]:
##save the mine dfs 
for (i in 1:length(mines)) {
    write.csv(mn11[[i]], paste(data_path,"/dfs/um_mines_nlel_rs_11/", mines[i], ".csv", sep=""), row.names = FALSE)
}

In [209]:
table(m[[1]]$trt_con)


     0      1 
186671   6365 

In [210]:
table(ms25[[1]]$trt_con)


    0     1 
46681  1592 

In [211]:
table(ms11[[1]]$trt_con)


    0     1 
20694   715 

In [212]:
table(mn[[1]]$trt_con)


     0      1 
584479   6365 

In [219]:
table(mn25[[1]]$trt_con)


     0      1 
146120   1592 

In [227]:
table(mn11[[1]]$trt_con)


    0     1 
64902   715 