## 1. Cropping and Masking Data

In [None]:
library(raster)
library(sf)

sm <- readRDS('./analysis_data/corn_gs_detrended_mean_sm_stack_1960_2019.rds')
kd <- readRDS('./analysis_data/corn_gs_detrended_kdd_stack_1960_2019.rds')
gd <- readRDS('./analysis_data/corn_gs_detrended_gdd_stack_1960_2019.rds')
yd <- readRDS('./analysis_data/corn_detrended_yield_stack_1960_2020.rds')

sm <- projectRaster(sm, yd) # to project all rasters to same CRS (NAD83)
kd <- projectRaster(kd, yd)
gd <- projectRaster(gd, yd)

east <- st_read('./raw_data/east_usa_nad83.shp') # read shapefile

ydc <- crop(yd, east) # crop and mask all rasters to the area of interest
yde <- mask(ydc, east)

smc <- crop(sm, east)
sme <- mask(smc, east)

kdc <- crop(kd, east)
kde <- mask(kdc, east)

gdc <- crop(gd, east)
gde <- mask(gdc, east)

yda <- as.array(yde) # convert rasters to arrays for faster processing
sma <- as.array(sme)
kda <- as.array(kde)
gda <- as.array(gde)


## 2. Multiple Linear Regression (without accounting for collinearity)

In [None]:
ms <- array(numeric(), c(50,80)) # empty arrays to store slopes
mk <- array(numeric(), c(50,80))
mg <- array(numeric(), c(50,80))

# multiple linear regression iterated over pixels
for(x in 1:50){
    for(y in 1:80){
        v.y <- yda[x,y,]
        v.k <- kda[x,y,]
        v.g <- gda[x,y,]
        v.s <- sma[x,y,]
        if(all(is.na(v.y)) || all(is.na(v.k)) || all(is.na(v.g)) || all(is.na(v.s))){
            mk[x,y] <- NA
            mg[x,y] <- NA
            ms[x,y] <- NA
        } else{
            fit <- lm(v.y ~ v.k + v.g + v.s) # form of the model
            s <- summary(fit)$coefficients
            if(nrow(s) < 4){
                mk[x,y] <- NA
                mg[x,y] <- NA
                ms[x,y] <- NA
            } else if(is.na(s[2,4]) || is.na(s[3,4]) || is.na(s[4,4])){
                mk[x,y] <- NA
                mg[x,y] <- NA
                ms[x,y] <- NA
            } else if(s[2,4] >= 0.05 || s[3,4] >= 0.05 || s[4,4] >= 0.05){ # statistical significance
                mk[x,y] <- NA
                mg[x,y] <- NA
                ms[x,y] <- NA
            } else{
                mk[x,y] <- s[2,1]
                mg[x,y] <- s[3,1]
                ms[x,y] <- s[4,1]
            }
        }
    }
}
    

# write all slope arrays into rasters for visualizing    
mk.ras <- raster(mk, xmn = extent(yde)[1], xmx = extent(yde)[2], ymn = extent(yde)[3], ymx = extent(yde)[4], crs = crs(yde))
mg.ras <- raster(mg, xmn = extent(yde)[1], xmx = extent(yde)[2], ymn = extent(yde)[3], ymx = extent(yde)[4], crs = crs(yde))
ms.ras <- raster(ms, xmn = extent(yde)[1], xmx = extent(yde)[2], ymn = extent(yde)[3], ymx = extent(yde)[4], crs = crs(yde))

## 3. Ridge Regression (L2 regularization)

In [None]:
library(glmnet)

ms.r <- array(numeric(), c(50,80)) # empty arrays to store slopes
mk.r <- array(numeric(), c(50,80))
mg.r <- array(numeric(), c(50,80))

# multiple linear regression iterated over pixels
for(x in 1:50){
    for(y in 1:80){
        v.y <- yda[x,y,]
        v.k <- kda[x,y,]
        v.g <- gda[x,y,]
        v.s <- sma[x,y,]
        if(all(is.na(v.y)) || all(is.na(v.k)) || all(is.na(v.g)) || all(is.na(v.s))){
            mk.r[x,y] <- NA
            mg.r[x,y] <- NA
            ms.r[x,y] <- NA
        } else{
            df <- na.omit(as.data.frame(cbind(v.y,v.k,v.g,v.s))) # remove rows with NA
            x <- data.matrix(df[,2:4]) # predictors
            y <- data.matrix(df[,1]) # response variable
            
            cv.ridge <- cv.glmnet(x,y, alpha = 0, nfolds = 10) # 10-fold cross validation to find optimal value for lambda
            best_lambda <- cv.ridge$lambda.min # optimal lambda
            
            ridge.fit <- glmnet(x,y, alpha = 0, lambda = best_lambda) # penalized (alpha = 0 for ridge) regression with optimal lambda 
            mk.r[x,y] <- coef(ridge.fit)[2]
            mg.r[x,y] <- coef(ridge.fit)[3]
            ms.r[x,y] <- coef(ridge.fit)[4]
        }
    }
}

    

# write all slope arrays into rasters for visualizing    
mk.r.ras <- raster(mk.r, xmn = extent(yde)[1], xmx = extent(yde)[2], ymn = extent(yde)[3], ymx = extent(yde)[4], crs = crs(yde))
mg.r.ras <- raster(mg.r, xmn = extent(yde)[1], xmx = extent(yde)[2], ymn = extent(yde)[3], ymx = extent(yde)[4], crs = crs(yde))
ms.r.ras <- raster(ms.r, xmn = extent(yde)[1], xmx = extent(yde)[2], ymn = extent(yde)[3], ymx = extent(yde)[4], crs = crs(yde))

## 4. Multiple Regression - State Level

In [None]:
# model over state - KDD, GDD and SM

state <- data.frame(east$STATEFP)
colnames(states) <- 'STATEFP'
state$kdd <- rep(NA, nrow(state))
state$gdd <- rep(NA, nrow(state))
state$sm <- rep(NA, nrow(state))
state$kdd_sm <- rep(NA, nrow(state))

for(i in 1:nrow(state)){
    st <- states[['STATEFP']][i]
    shp <- east[east$STATEFP %in% st,]
    
    yd <- readRDS('./analysis_data/corn_detrended_yield_stack_1960_2020.rds')
    yd <- yd[[1:60]]
    yd <- crop(yd, shp)
    yd <- mask(yd, shp)
    yda <- as.array(yd)

    kd <- readRDS('./analysis_data/corn_gs_detrended_kdd_stack_1960_2019.rds')
    kd <- crop(kd, shp)
    kd <- mask(kd, shp)
    kd <- projectRaster(kd, yd)
    kda <- as.array(kd)

    gd <- readRDS('./analysis_data/corn_gs_detrended_gdd_stack_1960_2019.rds')
    gd <- crop(gd, shp)
    gd <- mask(gd, shp)
    gd <- projectRaster(gd, yd)
    gda <- as.array(gd)

    sm <- readRDS('./analysis_data/corn_gs_detrended_mean_sm_stack_1960_2019.rds')
    sm <- crop(sm, shp)
    sm <- mask(sm, shp)
    sm <- projectRaster(sm, yd)
    sma <- as.array(sm)
    
    ydv <- c()
    smv <- c()
    gdv <- c()
    kdv <- c()

    for(x in 1:dim(yd)[1]){
        for(y in 1:dim(yd)[2]){
            ydv <- c(ydv, yda[x,y,])
            smv <- c(smv, sma[x,y,])
            gdv <- c(gdv, gda[x,y,])
            kdv <- c(kdv, kda[x,y,])
        }
    }
    df <- as.data.frame(cbind(ydv, kdv, gdv, smv))
    df <- df[!is.na(df$ydv),]
    df <- df[!is.na(df$kdv),]
    
    if(all(is.na(df))){
        state[['kdd']][i] <- NA
        state[['sm']][i] <- NA
        state[['gdd']][i] <- NA
        state[['kdd_sm']][i] <- NA
    } else{
        mod <- lm(ydv~kdv*smv+gdv, data = df)
        s <- summary(mod)$coefficients
        state[['kdd']][i] <- s[2,1]
        state[['sm']][i] <- s[3,1]
        state[['gdd']][i] <- s[4,1]
        state[['kdd_sm']][i] <- s[5,1]
    }    
}

write.csv(state, 'state_model.csv', row.names = F)

# 5. Trends in Sensitivities - 30 year Windows

In [None]:
# model over state - KDD, GDD and SM - 30-year windows

state <- data.frame(east$STATEFP)
colnames(states) <- 'STATEFP'
state$kdd <- rep(NA, nrow(state))
state$gdd <- rep(NA, nrow(state))
state$sm <- rep(NA, nrow(state))
state$kdd_sm <- rep(NA, nrow(state))

for(i in 1:nrow(state)){
    st <- states[['STATEFP']][i]
    shp <- east[east$STATEFP %in% st,]
    
    yd <- readRDS('./analysis_data/corn_detrended_yield_stack_1960_2020.rds')
    yd <- yd[[1:60]]
    yd <- crop(yd, shp)
    yd <- mask(yd, shp)
    yda <- as.array(yd)

    kd <- readRDS('./analysis_data/corn_gs_detrended_kdd_stack_1960_2019.rds')
    kd <- crop(kd, shp)
    kd <- mask(kd, shp)
    kd <- projectRaster(kd, yd)
    kda <- as.array(kd)

    gd <- readRDS('./analysis_data/corn_gs_detrended_gdd_stack_1960_2019.rds')
    gd <- crop(gd, shp)
    gd <- mask(gd, shp)
    gd <- projectRaster(gd, yd)
    gda <- as.array(gd)

    sm <- readRDS('./analysis_data/corn_gs_detrended_mean_sm_stack_1960_2019.rds')
    sm <- crop(sm, shp)
    sm <- mask(sm, shp)
    sm <- projectRaster(sm, yd)
    sma <- as.array(sm)
    
    ydv <- c()
    smv <- c()
    gdv <- c()
    kdv <- c()

    
    ak <- rep(NA, 31)
    ag <- rep(NA, 31)
    as <- rep(NA, 31)
    aks <- rep(NA, 31)

    for(w in 1:31){        
        for(x in 1:dim(yd)[1]){
            for(y in 1:dim(yd)[2]){
                ydv <- c(ydv, yda[x,y,w:(w+29)])
                smv <- c(smv, sma[x,y,w:(w+29)])
                gdv <- c(gdv, gda[x,y,w:(w+29)])
                kdv <- c(kdv, kda[x,y,w:(w+29)])

            }
        }
        
        df <- as.data.frame(cbind(ydv, kdv, gdv, smv))
        df <- df[!is.na(df$ydv),]
        df <- df[!is.na(df$kdv),]
        
        if(all(is.na(df))){
            ak[w] <- ag[w] <- as[w] <- aks[w] <- NA
        } else{
            mod <- lm(ydv~kdv*smv+gdv, data = df)
            s <- summary(mod)$coefficients
            ak[w] <- s[2,1]
            ag[w] <- s[4,1]
            as[w] <- s[3,1]
            aks[w] <- s[5,1]
        }
    }
    
    t <- 1:31
    
    if(all(is.na(ak))){
        state[['kdd']][i] <- NA
    } else{
        mod.kdd <- lm(ak~t)
        if(is.na(summary(mod.kdd)$coefficients[2,4])){
            state[['kdd']][i] <- NA
        } else if(summary(mod.kdd)$coefficients[2,4] >= 0.05){
            state[['kdd']][i] <- NA
        } else{
            state[['kdd']][i] <- summary(mod.kdd)$coefficients[2,1]
        }
         
    }
    
    if(all(is.na(ag))){
        state[['gdd']][i] <- NA
    } else{
        mod.gdd <- lm(ag~t)
        if(is.na(summary(mod.gdd)$coefficients[2,4])){
            state[['gdd']][i] <- NA
        } else if(summary(mod.gdd)$coefficients[2,4] >= 0.05){
            state[['gdd']][i] <- NA
        } else{
            state[['gdd']][i] <- summary(mod.gdd)$coefficients[2,1]
        }
         
    }
    
    if(all(is.na(as))){
        state[['sm']][i] <- NA
    } else{
        mod.sm <- lm(as~t)
        if(is.na(summary(mod.sm)$coefficients[2,4])){
            state[['sm']][i] <- NA
        } else if(summary(mod.sm)$coefficients[2,4] >= 0.05){
            state[['sm']][i] <- NA
        } else{
            state[['sm']][i] <- summary(mod.sm)$coefficients[2,1]
        }
         
    }
    
    if(all(is.na(aks))){
        state[['kdd_sm']][i] <- NA
    } else{
        mod.kddsm <- lm(aks~t)
        if(is.na(summary(mod.kddsm)$coefficients[2,4])){
            state[['kdd_sm']][i] <- NA
        } else if(summary(mod.kddsm)$coefficients[2,4] >= 0.05){
            state[['kdd_sm']][i] <- NA
        } else{
            state[['kdd_sm']][i] <- summary(mod.kddsm)$coefficients[2,1]
        }
         
    }
    
}
    
write.csv(state, 'state_model_window.csv', row.names = F)