In [1]:
library(readxl)
library(tidyverse)
library(writexl)
library(plm)
library(dplyr)
library(tsibble)
library(fpp3)
library(panelvar)
library(lpirfs)

-- [1mAttaching core tidyverse packages[22m ------------------------ tidyverse 2.0.0 --
[32mv[39m [34mdplyr    [39m 1.1.4     [32mv[39m [34mreadr    [39m 2.1.5
[32mv[39m [34mforcats  [39m 1.0.0     [32mv[39m [34mstringr  [39m 1.5.1
[32mv[39m [34mggplot2  [39m 3.5.1     [32mv[39m [34mtibble   [39m 3.2.1
[32mv[39m [34mlubridate[39m 1.9.3     [32mv[39m [34mtidyr    [39m 1.3.1
[32mv[39m [34mpurrr    [39m 1.0.2     
-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mi[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors

Attaching package: 'plm'


The following objects are masked from 'package:dplyr':

    between, lag, lead


Registered S3 method overwritten by 'tsibble':
  method     

In [37]:
library(arrow)

# Read the Parquet file
quart_df <- read_parquet("/Users/dhruvnovaims/Library/Mobile Documents/com~apple~CloudDocs/Documents/GitHub/climate-econometrics/countries/portugal/purchases_paper/datasets/final_data_mun.parquet")



In [15]:
df_ts_series <- quart_df  %>% 
    mutate(time = yearquarter(time)) 
# %>% 
#     select(time, concelho, hpi_log_L1, ur_L1, purchases_log_L1,
#        fwi, high_fire_days,  historical_dev_tp,
#        tp_std_mm_hour, 
#        historical_dev_t2m, t2m_std, tp_2,
#        tp_3, t2m_2, t2m_3, Medit_factor_tp, Medit_factor_t2m,
#        CCA_2)

# df_ts_count <- quart_df  %>% 
#     mutate(time = yearquarter(time)) %>% 
#     select(time, concelho, hpi_log_L1, ur_L1, purchases_log_L1,
#        fwi, high_fire_days, cdd, extreme_precipitation_mm,
#        frequency_of_extreme_precipitation,num_heatwaves,
#        hot_days, num_tropical_nights, frost_days,
#        tp_2,
#        tp_3, t2m_2, t2m_3, Medit_factor_tp, Medit_factor_t2m,
#        CCA_2)


In [16]:
# Step 1: Create linear trends that are specific to each municipality
df_ts_series <- df_ts_series %>%
  group_by(CCA_2) %>%
  mutate(Linear_Trend = row_number())  # Creates a linear trend for each municipality

# # Step 2: Create quadratic trends that are specific to each municipality
# df_ts_series <- df_ts_series %>%
#   mutate(Quadratic_Trend = Linear_Trend^2)  # Quadratic trend is square of linear trend


In [5]:
# # Step 1: Create linear trends that are specific to each municipality
# df_ts_count <- df_ts_count %>%
#   group_by(CCA_2) %>%
#   mutate(Linear_Trend = row_number())  # Creates a linear trend for each municipality

# # Step 2: Create quadratic trends that are specific to each municipality
# df_ts_count <- df_ts_count %>%
#   mutate(Quadratic_Trend = Linear_Trend^2)  # Quadratic trend is square of linear trend


In [6]:
df_ts_series_as_dataframe <- as.data.frame(df_ts_series)

## AIC Lag Selection

In [22]:
library(dplyr)
library(tidyr)
library(stringr)
library(zoo)
library(tibble)

## ---------------- 1. core variable sets (edit if names change) -------------
endog        <- c("purchases_log_D1", "ur_D1", "hpi_log_D1")
exo_base     <- c(                       # contemporaneous exogenous terms
  "fwi_D1", "high_fire_days", "tp_hd",
  "tp_std_mm_hour_D1", "t2m_hd", "t2m_std_D1"
)
max_lag <- 12                            # highest lag order to test

## ---------------- 2. helper: one‐shot AIC at lag‐order p -------------------
calc_aic <- function(p, data) {
  

  exo_with_lags <- exo_base

  for (lag in 1:p) {
    col_lags <- paste0(exo_base, "_L", lag)
    exo_with_lags <- c(exo_with_lags, col_lags)
  }

  ## keep only the variables needed for this model, drop incomplete rows
  df_tmp <-  data  %>% 
    mutate(time = yearquarter(time)) %>%                             
    select(CCA_2, time, all_of(endog), all_of(exo_with_lags)) 
  df_tmp_df <- as.data.frame(df_tmp)
  ## fit PVAR-FE
  model <- pvarfeols(
    dependent_vars   = endog,
    lags             = p,
    exog_vars        = exo_with_lags,
    data             = df_tmp_df,
    panel_identifier = c("CCA_2", "time")
  )
  
  # ## compute concentrated log-likelihood and AIC
  # E         <- as.matrix(model$residuals)         # n × m
  # n_obs     <- nrow(E)
  # m_eq      <- ncol(E)
  # Sigma_hat <- crossprod(E) / n_obs               # m × m
  # log_det   <- as.numeric(determinant(Sigma_hat, logarithm = TRUE)$modulus)
  
  # q_exog <- length(model$exo_with_lags)
  # k_par  <- m_eq * m_eq * p + m_eq * q_exog       # (m²)p + m·q
  # N_pan  <- model$nof_groups
  # T_tot  <- n_obs / N_pan + p                     # total periods
  
  # tibble(lags = p,
  #        AIC  = 2 * k_par + N_pan * (T_tot - p) * log_det)
  ## --- residual-based AIC (scalar RSS version) -------------------------------
    E_mat   <- as.matrix(model$residuals)           # n_obs × m_eq
    n_obs   <- nrow(E_mat)                          # stacked time observations
    m_eq    <- ncol(E_mat)                          # equations

    RSS     <- sum(E_mat^2)                         # ∑ ε² over every cell
    N_total <- n_obs * m_eq                         # total scalar residuals

    q_exog  <- length(model$exog_vars)              # lag-0 + selected lags
    k_par   <- m_eq * m_eq * p + m_eq * q_exog      # (m²)p + m·q

    AIC_val <- N_total * log(RSS / N_total) + 2 * k_par

    tibble(lags = p,
          AIC  = AIC_val)

}

## ---------------- 3. prepare the master data frame -------------------------
df_panel <- quart_df %>% 
  arrange(CCA_2, time)

## ---------------- 4. loop over lag orders 1 … max_lag ----------------------
aic_table <- map_dfr(1:max_lag, ~calc_aic(.x, df_panel))  %>%      # old step
  mutate(Delta = AIC - min(AIC))                   # new Δᵢ column
print(aic_table)


[90m# A tibble: 12 x 3[39m
    lags      AIC  Delta
   [3m[90m<int>[39m[23m    [3m[90m<dbl>[39m[23m  [3m[90m<dbl>[39m[23m
[90m 1[39m     1 -[31m[4m1[24m[4m5[24m[4m3[24m[39m[31m317[39m[31m.[39m     0 
[90m 2[39m     2 -[31m[4m1[24m[4m5[24m[4m0[24m[39m[31m832[39m[31m.[39m  [4m2[24m484.
[90m 3[39m     3 -[31m[4m1[24m[4m4[24m[4m7[24m[39m[31m936[39m[31m.[39m  [4m5[24m380.
[90m 4[39m     4 -[31m[4m1[24m[4m4[24m[4m2[24m[39m[31m739[39m[31m.[39m [4m1[24m[4m0[24m578.
[90m 5[39m     5 -[31m[4m1[24m[4m3[24m[4m5[24m[39m[31m074[39m[31m.[39m [4m1[24m[4m8[24m243.
[90m 6[39m     6 -[31m[4m1[24m[4m2[24m[4m8[24m[39m[31m231[39m[31m.[39m [4m2[24m[4m5[24m086.
[90m 7[39m     7 -[31m[4m1[24m[4m2[24m[4m1[24m[39m[31m682[39m[31m.[39m [4m3[24m[4m1[24m635.
[90m 8[39m     8 -[31m[4m1[24m[4m1[24m[4m6[24m[39m[31m087[39m[31m.[39m [4m3[24m[4m7[24m229.
[90m 9[39m     9 -

In [37]:
library(dplyr)
library(tidyr)
library(stringr)
library(zoo)
library(tibble)

## ---------------- 1. core variable sets (edit if names change) -------------
endog        <- c("purchases_log_D1", "ur_D1", "hpi_log_D1")
exo_base     <- c('fwi_D1', 'high_fire_days', 'cdd', 'extreme_precipitation_mm',
       'frequency_of_extreme_precipitation',
       'hot_days', 'num_tropical_nights', 
max_lag <- 12                            # highest lag order to test

## ---------------- 2. helper: one‐shot AIC at lag‐order p -------------------
calc_aic <- function(p, data) {
  

  exo_with_lags <- exo_base

  for (lag in 1:p) {
    col_lags <- paste0(exo_base, "_L", lag)
    exo_with_lags <- c(exo_with_lags, col_lags)
  }

  ## keep only the variables needed for this model, drop incomplete rows
  df_tmp <-  data  %>% 
    mutate(time = yearquarter(time)) %>%                             
    select(CCA_2, time, all_of(endog), all_of(exo_with_lags)) 
  df_tmp_df <- as.data.frame(df_tmp)
  ## fit PVAR-FE
  model <- pvarfeols(
    dependent_vars   = endog,
    lags             = p,
    exog_vars        = exo_with_lags,
    data             = df_tmp_df,
    panel_identifier = c("CCA_2", "time")
  )
  
  # ## compute concentrated log-likelihood and AIC
  # E         <- as.matrix(model$residuals)         # n × m
  # n_obs     <- nrow(E)
  # m_eq      <- ncol(E)
  # Sigma_hat <- crossprod(E) / n_obs               # m × m
  # log_det   <- as.numeric(determinant(Sigma_hat, logarithm = TRUE)$modulus)
  
  # q_exog <- length(model$exo_with_lags)
  # k_par  <- m_eq * m_eq * p + m_eq * q_exog       # (m²)p + m·q
  # N_pan  <- model$nof_groups
  # T_tot  <- n_obs / N_pan + p                     # total periods
  
  # tibble(lags = p,
  #        AIC  = 2 * k_par + N_pan * (T_tot - p) * log_det)
  ## --- residual-based AIC (scalar RSS version) -------------------------------
    E_mat   <- as.matrix(model$residuals)           # n_obs × m_eq
    n_obs   <- nrow(E_mat)                          # stacked time observations
    m_eq    <- ncol(E_mat)                          # equations

    RSS     <- sum(E_mat^2)                         # ∑ ε² over every cell
    N_total <- n_obs * m_eq                         # total scalar residuals

    q_exog  <- length(model$exog_vars)              # lag-0 + selected lags
    k_par   <- m_eq * m_eq * p + m_eq * q_exog      # (m²)p + m·q

    AIC_val <- N_total * log(RSS / N_total) + 2 * k_par

    tibble(lags = p,
          AIC  = AIC_val)

}

## ---------------- 3. prepare the master data frame -------------------------
df_panel <- quart_df %>% 
  arrange(CCA_2, time)

## ---------------- 4. loop over lag orders 1 … max_lag ----------------------
aic_table <- map_dfr(1:max_lag, ~calc_aic(.x, df_panel))  %>%      # old step
  mutate(Delta = AIC - min(AIC))                   # new Δᵢ column
print(aic_table)


ERROR: Error in parse(text = input): <text>:15:1: unexpected symbol
14: ## ---------------- 2. helper: one<U+2010>shot AIC at lag<U+2010>order p -------------------
15: calc_aic
    ^


## Panel Var Modelling

### Basic Model

In [38]:
df_panel <- quart_df %>% 
  arrange(CCA_2, time) %>% 
    mutate(time = yearquarter(time)) %>%
  group_by(CCA_2) %>%
  mutate(Linear_Trend = row_number())  # Creates a linear trend for each municipality

# # Step 2: Create quadratic trends that are specific to each municipality
df_panel <- df_panel %>%
  mutate(Quadratic_Trend = Linear_Trend^2)  # Quadratic trend is square of linear trend


In [32]:
# Main Model for Panel Var FE!
## ---------------- 1. core variable sets (edit if names change) -------------
endog        <- c("purchases_log_D1", "ur_D1", "hpi_log_D1")
exo_base     <- c(                       # contemporaneous exogenous terms
  "fwi_D1", "high_fire_days", "tp_hd",
  "tp_std_mm_hour_D1", "t2m_hd", "t2m_std_D1"
)
p = 1

exo_with_lags <- exo_base

for (lag in 1:p) {
  col_lags <- paste0(exo_base, "_L", lag)
  exo_with_lags <- c(exo_with_lags, col_lags)
}
df_tmp <-  df_panel  %>% 
  mutate(time = yearquarter(time)) %>%  select(CCA_2, time, all_of(endog), all_of(exo_with_lags)) 

# Step 1: Create linear trends that are specific to each municipality
df_tmp <- df_tmp %>%
  group_by(CCA_2) %>%
  mutate(Linear_Trend = row_number())  # Creates a linear trend for each municipality

# Step 2: Create quadratic trends that are specific to each municipality
df_tmp <- df_tmp %>%
  mutate(Quadratic_Trend = Linear_Trend^2)  # Quadratic trend is square of linear trend

                                  # no missing values
df_tmp_df <- as.data.frame(df_tmp)

## fit PVAR-FE
mun_fe_series_basic <- pvarfeols(
  dependent_vars   = endog,
  lags             = p,
  exog_vars        = c('Linear_Trend', 'Quadratic_Trend', exo_with_lags),
  data             = df_tmp_df,
  panel_identifier = c("CCA_2", "time")
)
  


In [19]:
summary(mun_fe_series_basic)

---------------------------------------------------
Fixed Effects OLS Panel VAR estimation 
---------------------------------------------------
Transformation: demean 
Group variable: CCA_2 
Time variable: time 
Number of observations = 11954 
Number of groups = 278 
Obs per group: min = 43 
               avg = 43 
               max = 43 

                                demeaned_purchases_log_D1  demeaned_ur_D1  demeaned_hpi_log_D1
----------------------------------------------------------------------------------------------
demeaned_lag1_purchases_log_D1  -0.2001 ***                 0.0047 ***      0.0049 ***        
                                (0.0096)                   (0.0005)        (0.0013)           
demeaned_lag1_ur_D1             -2.1240 ***                -0.0958 ***     -0.1653 ***        
                                (0.1842)                   (0.0099)        (0.0251)           
demeaned_lag1_hpi_log_D1        -0.2497 ***                -0.0239 ***      0.0132    

#### Tests

In [34]:
install.packages('lmtest')
library(lmtest)


The downloaded binary packages are in
	/var/folders/nt/03y4p9md50gblp_0svv74zb80000gn/T//RtmpsAyp7J/downloaded_packages


Loading required package: zoo


Attaching package: 'zoo'


The following object is masked from 'package:tsibble':

    index


The following objects are masked from 'package:base':

    as.Date, as.Date.numeric




In [35]:


# Extract residuals
residuals_df <- as.data.frame(mun_fe_series_basic$residuals)

# Perform Durbin-Watson test for each column
dw_purchases <- dwtest(residuals_df$demeaned_purchases_log_D1 ~ 1)
dw_ur <- dwtest(residuals_df$demeaned_ur_D1 ~ 1)
dw_hpi <- dwtest(residuals_df$demeaned_hpi_log_D1 ~ 1)

# Print results
print(dw_purchases)
print(dw_ur)
print(dw_hpi)


	Durbin-Watson test

data:  residuals_df$demeaned_purchases_log_D1 ~ 1
DW = 2.1063, p-value = 1
alternative hypothesis: true autocorrelation is greater than 0


	Durbin-Watson test

data:  residuals_df$demeaned_ur_D1 ~ 1
DW = 2.1009, p-value = 1
alternative hypothesis: true autocorrelation is greater than 0


	Durbin-Watson test

data:  residuals_df$demeaned_hpi_log_D1 ~ 1
DW = 2.0484, p-value = 0.9951
alternative hypothesis: true autocorrelation is greater than 0



### Increased Lags (4)

In [20]:
# Main Model for Panel Var FE!
## ---------------- 1. core variable sets (edit if names change) -------------
endog        <- c("purchases_log_D1", "ur_D1", "hpi_log_D1")
exo_base     <- c(                       # contemporaneous exogenous terms
  "fwi_D1", "high_fire_days", "tp_hd",
  "tp_std_mm_hour_D1", "t2m_hd", "t2m_std_D1"
)
p = 4

exo_with_lags <- exo_base

for (lag in 1:p) {
  col_lags <- paste0(exo_base, "_L", lag)
  exo_with_lags <- c(exo_with_lags, col_lags)
}
df_tmp <-  df_panel  %>% 
  mutate(time = yearquarter(time)) %>%  select(CCA_2, time, all_of(endog), all_of(exo_with_lags)) 

# Step 1: Create linear trends that are specific to each municipality
df_tmp <- df_tmp %>%
  group_by(CCA_2) %>%
  mutate(Linear_Trend = row_number())  # Creates a linear trend for each municipality

# Step 2: Create quadratic trends that are specific to each municipality
df_tmp <- df_tmp %>%
  mutate(Quadratic_Trend = Linear_Trend^2)  # Quadratic trend is square of linear trend

                                  # no missing values
df_tmp_df <- as.data.frame(df_tmp)

## fit PVAR-FE
mun_fe_series_lags_4 <- pvarfeols(
  dependent_vars   = endog,
  lags             = p,
  exog_vars        = c('Linear_Trend', 'Quadratic_Trend', exo_with_lags),
  data             = df_tmp_df,
  panel_identifier = c("CCA_2", "time")
)
  


In [21]:
summary(mun_fe_series_lags_4)

---------------------------------------------------
Fixed Effects OLS Panel VAR estimation 
---------------------------------------------------
Transformation: demean 
Group variable: CCA_2 
Time variable: time 
Number of observations = 11954 
Number of groups = 278 
Obs per group: min = 43 
               avg = 43 
               max = 43 

                                demeaned_purchases_log_D1  demeaned_ur_D1  demeaned_hpi_log_D1
----------------------------------------------------------------------------------------------
demeaned_lag1_purchases_log_D1  -0.1967 ***                 0.0008          0.0054 ***        
                                (0.0100)                   (0.0006)        (0.0015)           
demeaned_lag1_ur_D1             -0.4480 *                  -0.1871 ***     -0.1614 ***        
                                (0.1825)                   (0.0101)        (0.0280)           
demeaned_lag1_hpi_log_D1         0.0756                    -0.0395 ***      0.0479 ***

### With NonLinear Terms and 1 Lags

In [8]:
# Main Model for Panel Var FE!
## ---------------- 1. core variable sets (edit if names change) -------------
endog        <- c("purchases_log_D1", "ur_D1", "hpi_log_D1")
exo_base     <- c(                       # contemporaneous exogenous terms
  "fwi_D1", "high_fire_days", "tp_hd",
  "tp_std_mm_hour_D1", "t2m_hd", "t2m_std_D1", "tp_2", "tp_3", "t2m_2", "t2m_3"
)
p = 1

exo_with_lags <- exo_base

for (lag in 1:p) {
  col_lags <- paste0(exo_base, "_L", lag)
  exo_with_lags <- c(exo_with_lags, col_lags)
}
df_tmp <-  df_panel  %>% 
  mutate(time = yearquarter(time)) %>%  select(CCA_2, time, all_of(endog), all_of(exo_with_lags)) 

# Step 1: Create linear trends that are specific to each municipality
df_tmp <- df_tmp %>%
  group_by(CCA_2) %>%
  mutate(Linear_Trend = row_number())  # Creates a linear trend for each municipality

# Step 2: Create quadratic trends that are specific to each municipality
df_tmp <- df_tmp %>%
  mutate(Quadratic_Trend = Linear_Trend^2)  # Quadratic trend is square of linear trend

                                  # no missing values
df_tmp_df <- as.data.frame(df_tmp)

## fit PVAR-FE
mun_fe_series_lags_1_nonlin2 <- pvarfeols(
  dependent_vars   = endog,
  lags             = p,
  exog_vars        = c('Linear_Trend', 'Quadratic_Trend', exo_with_lags),
  data             = df_tmp_df,
  panel_identifier = c("CCA_2", "time")
)
  


In [9]:
summary(mun_fe_series_lags_1_nonlin2)

---------------------------------------------------
Fixed Effects OLS Panel VAR estimation 
---------------------------------------------------
Transformation: demean 
Group variable: CCA_2 
Time variable: time 
Number of observations = 11954 
Number of groups = 278 
Obs per group: min = 43 
               avg = 43 
               max = 43 

                                demeaned_purchases_log_D1  demeaned_ur_D1  demeaned_hpi_log_D1
----------------------------------------------------------------------------------------------
demeaned_lag1_purchases_log_D1  -0.1882 ***                 0.0040 ***      0.0033 *          
                                (0.0096)                   (0.0005)        (0.0013)           
demeaned_lag1_ur_D1             -2.2575 ***                -0.0837 ***     -0.1390 ***        
                                (0.1847)                   (0.0099)        (0.0251)           
demeaned_lag1_hpi_log_D1        -0.2785 ***                -0.0285 ***      0.0104    

### With Medit Factor and Lag 1

In [32]:

endog        <- c("purchases_log_D1", "ur_D1", "hpi_log_D1")
exo_base     <- c(                       # contemporaneous exogenous terms
  "fwi_D1", "high_fire_days", "tp_hd",
  "tp_std_mm_hour_D1", "t2m_hd", "t2m_std_D1", "Medit_factor_tp",
"Medit_factor_t2m_D1"
)
p = 1

exo_with_lags <- exo_base

for (lag in 1:p) {
  col_lags <- paste0(exo_base, "_L", lag)
  exo_with_lags <- c(exo_with_lags, col_lags)
}
df_tmp <-  df_panel  %>% 
  mutate(time = yearquarter(time)) %>%  select(CCA_2, time, all_of(endog), all_of(exo_with_lags)) 

# Step 1: Create linear trends that are specific to each municipality
df_tmp <- df_tmp %>%
  group_by(CCA_2) %>%
  mutate(Linear_Trend = row_number())  # Creates a linear trend for each municipality

# Step 2: Create quadratic trends that are specific to each municipality
df_tmp <- df_tmp %>%
  mutate(Quadratic_Trend = Linear_Trend^2)  # Quadratic trend is square of linear trend

                                  # no missing values
df_tmp_df <- as.data.frame(df_tmp)

## fit PVAR-FE
mun_fe_series_lags_1_medit <- pvarfeols(
  dependent_vars   = endog,
  lags             = p,
  exog_vars        = c('Linear_Trend', 'Quadratic_Trend', exo_with_lags),
  data             = df_tmp_df,
  panel_identifier = c("CCA_2", "time")
)
  


In [33]:
summary(mun_fe_series_lags_1_medit)

---------------------------------------------------
Fixed Effects OLS Panel VAR estimation 
---------------------------------------------------
Transformation: demean 
Group variable: CCA_2 
Time variable: time 
Number of observations = 11954 
Number of groups = 278 
Obs per group: min = 43 
               avg = 43 
               max = 43 

                                 demeaned_purchases_log_D1  demeaned_ur_D1  demeaned_hpi_log_D1
-----------------------------------------------------------------------------------------------
demeaned_lag1_purchases_log_D1   -0.1748 ***                 0.0038 ***      0.0031 *          
                                 (0.0097)                   (0.0005)        (0.0013)           
demeaned_lag1_ur_D1              -2.2155 ***                -0.0915 ***     -0.1431 ***        
                                 (0.1815)                   (0.0098)        (0.0249)           
demeaned_lag1_hpi_log_D1         -0.1413 *                  -0.0307 ***      0.0

### Replacing with MM Day

In [12]:
# Main Model for Panel Var FE!
## ---------------- 1. core variable sets (edit if names change) -------------
endog        <- c("purchases_log_D1", "ur_D1", "hpi_log_D1")
exo_base     <- c(                       # contemporaneous exogenous terms
  "fwi_D1", "high_fire_days", "tp_hd",
  "tp_std_D1", "t2m_hd", "t2m_std_D1"
)

p = 1

exo_with_lags <- exo_base

for (lag in 1:p) {
  col_lags <- paste0(exo_base, "_L", lag)
  exo_with_lags <- c(exo_with_lags, col_lags)
}
df_tmp <-  df_panel  %>% 
  mutate(time = yearquarter(time)) %>%  select(CCA_2, time, all_of(endog), all_of(exo_with_lags)) 

# Step 1: Create linear trends that are specific to each municipality
df_tmp <- df_tmp %>%
  group_by(CCA_2) %>%
  mutate(Linear_Trend = row_number())  # Creates a linear trend for each municipality

# Step 2: Create quadratic trends that are specific to each municipality
df_tmp <- df_tmp %>%
  mutate(Quadratic_Trend = Linear_Trend^2)  # Quadratic trend is square of linear trend

                                  # no missing values
df_tmp_df <- as.data.frame(df_tmp)

## fit PVAR-FE
mun_fe_series_lags_1_medit_mmday <- pvarfeols(
  dependent_vars   = endog,
  lags             = p,
  exog_vars        = c('Linear_Trend', 'Quadratic_Trend', exo_with_lags),
  data             = df_tmp_df,
  panel_identifier = c("CCA_2", "time")
)


In [13]:
summary(mun_fe_series_lags_1_medit_mmday)

---------------------------------------------------
Fixed Effects OLS Panel VAR estimation 
---------------------------------------------------
Transformation: demean 
Group variable: CCA_2 
Time variable: time 
Number of observations = 11954 
Number of groups = 278 
Obs per group: min = 43 
               avg = 43 
               max = 43 

                                demeaned_purchases_log_D1  demeaned_ur_D1  demeaned_hpi_log_D1
----------------------------------------------------------------------------------------------
demeaned_lag1_purchases_log_D1  -0.2001 ***                 0.0047 ***      0.0049 ***        
                                (0.0096)                   (0.0005)        (0.0013)           
demeaned_lag1_ur_D1             -2.1240 ***                -0.0958 ***     -0.1653 ***        
                                (0.1842)                   (0.0099)        (0.0251)           
demeaned_lag1_hpi_log_D1        -0.2497 ***                -0.0239 ***      0.0132    

### Replacing with Count

In [None]:
# Main Model for Panel Var FE!
## ---------------- 1. core variable sets (edit if names change) -------------
endog        <- c("purchases_log_D1", "ur_D1", "hpi_log_D1")
exo_base     <- c('fwi_D1', 'high_fire_days', 'cdd', 'extreme_precipitation_mm',
       'frequency_of_extreme_precipitation','num_heatwaves',
       'hot_days', 'num_tropical_nights', 'frost_days',  "Medit_factor_tp",
"Medit_factor_t2m_D1")

p = 1

exo_with_lags <- exo_base

for (lag in 1:p) {
  col_lags <- paste0(exo_base, "_L", lag)
  exo_with_lags <- c(exo_with_lags, col_lags)
}
df_tmp <-  df_panel  %>% 
  mutate(time = yearquarter(time)) %>%  select(CCA_2, time, all_of(endog), all_of(exo_with_lags)) 

# Step 1: Create linear trends that are specific to each municipality
df_tmp <- df_tmp %>%
  group_by(CCA_2) %>%
  mutate(Linear_Trend = row_number())  # Creates a linear trend for each municipality

# Step 2: Create quadratic trends that are specific to each municipality
df_tmp <- df_tmp %>%
  mutate(Quadratic_Trend = Linear_Trend^2)  # Quadratic trend is square of linear trend

                                  # no missing values
df_tmp_df <- as.data.frame(df_tmp)

## fit PVAR-FE
mun_fe_l

ags_1_medit_count <- pvarfeols(
  dependent_vars   = endog,
  lags             = p,
  exog_vars        = c('Linear_Trend', 'Quadratic_Trend', exo_with_lags),
  data             = df_tmp_df,
  panel_identifier = c("CCA_2", "time")
)


In [11]:
summary(mun_fe_lags_1_medit_count)

---------------------------------------------------
Fixed Effects OLS Panel VAR estimation 
---------------------------------------------------
Transformation: demean 
Group variable: CCA_2 
Time variable: time 
Number of observations = 11954 
Number of groups = 278 
Obs per group: min = 43 
               avg = 43 
               max = 43 

                                                demeaned_purchases_log_D1  demeaned_ur_D1  demeaned_hpi_log_D1
--------------------------------------------------------------------------------------------------------------
demeaned_lag1_purchases_log_D1                  -0.1725 ***                 0.0035 ***      0.0021            
                                                (0.0097)                   (0.0005)        (0.0014)           
demeaned_lag1_ur_D1                             -1.9462 ***                -0.1134 ***     -0.1507 ***        
                                                (0.1811)                   (0.0097)        (0.0253)  

### Time Slice

In [43]:

# Main Model for Panel Var FE!
## ---------------- 1. core variable sets (edit if names change) -------------
endog        <- c("purchases_log_D1", "ur_D1", "hpi_log_D1")
exo_base     <- c(                       # contemporaneous exogenous terms
  "fwi_D1", "high_fire_days", "tp_hd",
  "tp_std_mm_hour_D1", "t2m_hd", "t2m_std_D1", "Medit_factor_tp",
"Medit_factor_t2m_D1"
)
p = 1

exo_with_lags <- exo_base

for (lag in 1:p) {
  col_lags <- paste0(exo_base, "_L", lag)
  exo_with_lags <- c(exo_with_lags, col_lags)
}
df_tmp <-  df_panel  %>% 
  mutate(time = yearquarter(time)) %>%  select(CCA_2, time, all_of(endog), all_of(exo_with_lags)) 

# Step 1: Create linear trends that are specific to each municipality
df_tmp <- df_tmp %>%
  group_by(CCA_2) %>%
  mutate(Linear_Trend = row_number())  # Creates a linear trend for each municipality

# Step 2: Create quadratic trends that are specific to each municipality
df_tmp <- df_tmp %>%
  mutate(Quadratic_Trend = Linear_Trend^2)  # Quadratic trend is square of linear trend

                                  # no missing values



# Assuming medit_df is your data frame and 'time' is a Date column
df_tmp <- df_tmp %>%
  filter(time >= as.Date('2010-01-01') & time <= as.Date('2019-12-31'))

df_tmp_df <- as.data.frame(df_tmp)

## fit PVAR-FE
mun_fe_series_lags_4_medit_timeslice <- pvarfeols(
  dependent_vars   = endog,
  lags             = p,
  exog_vars        = c('Linear_Trend', 'Quadratic_Trend', exo_with_lags),
  data             = df_tmp_df,
  panel_identifier = c("CCA_2", "time")
)
  


[1m[22m[36mi[39m In argument: `time >= as.Date("2010-01-01") & time <= as.Date("2019-12-31")`.
[36mi[39m In group 1: `CCA_2 = "0101"`.
[33m![39m Incompatible methods (">=.vctrs_vctr", ">=.Date") for ">="


In [44]:
summary(mun_fe_series_lags_4_medit_timeslice)

---------------------------------------------------
Fixed Effects OLS Panel VAR estimation 
---------------------------------------------------
Transformation: demean 
Group variable: CCA_2 
Time variable: time 
Number of observations = 9730 
Number of groups = 278 
Obs per group: min = 35 
               avg = 35 
               max = 35 

                                 demeaned_purchases_log_D1  demeaned_ur_D1  demeaned_hpi_log_D1
-----------------------------------------------------------------------------------------------
demeaned_lag1_purchases_log_D1   -0.1470 ***                 0.0043 ***      0.0046 **         
                                 (0.0107)                   (0.0006)        (0.0014)           
demeaned_lag1_ur_D1              -2.2743 ***                -0.1003 ***     -0.1922 ***        
                                 (0.1996)                   (0.0107)        (0.0266)           
demeaned_lag1_hpi_log_D1         -0.1909 *                  -0.0382 ***      0.02

In [119]:

# Main Model for Panel Var FE!
## ---------------- 1. core variable sets (edit if names change) -------------
endog        <- c("purchases_log", "ur", "hpi_log")
exo_base     <- c('fwi', 'high_fire_days', 'cdd', 'extreme_precipitation_mm',
       'frequency_of_extreme_precipitation','num_heatwaves',
       'hot_days', 'num_tropical_nights', 'frost_days',  "Medit_factor_tp",
"Medit_factor_t2m")
p = 4

exo_with_lags <- exo_base

for (lag in 1:p) {
  col_lags <- paste0(exo_base, "_L", lag)
  exo_with_lags <- c(exo_with_lags, col_lags)
}
df_tmp <-  df_panel  %>% 
  mutate(time = yearquarter(time)) %>%  select(CCA_2, time, all_of(endog), all_of(exo_with_lags)) 

# Step 1: Create linear trends that are specific to each municipality
df_tmp <- df_tmp %>%
  group_by(CCA_2) %>%
  mutate(Linear_Trend = row_number())  # Creates a linear trend for each municipality

# Step 2: Create quadratic trends that are specific to each municipality
df_tmp <- df_tmp %>%
  mutate(Quadratic_Trend = Linear_Trend^2)  # Quadratic trend is square of linear trend

                                  # no missing values



# Assuming medit_df is your data frame and 'time' is a Date column
df_tmp <- df_tmp %>%
  filter(time >= as.Date('2010-01-01') & time <= as.Date('2019-12-31'))

df_tmp_df <- as.data.frame(df_tmp)

## fit PVAR-FE
mun_fe_series_lags_4_medit_timeslice <- pvarfeols(
  dependent_vars   = endog,
  lags             = p,
  exog_vars        = c('Linear_Trend', 'Quadratic_Trend', exo_with_lags),
  data             = df_tmp_df,
  panel_identifier = c("CCA_2", "time")
)
  


[1m[22m[36mi[39m In argument: `time >= as.Date("2010-01-01") & time <= as.Date("2019-12-31")`.
[36mi[39m In group 1: `CCA_2 = "0101"`.
[33m![39m Incompatible methods (">=.vctrs_vctr", ">=.Date") for ">="


In [120]:
summary(mun_fe_series_lags_4_medit_timeslice)

---------------------------------------------------
Fixed Effects OLS Panel VAR estimation 
---------------------------------------------------
Transformation: demean 
Group variable: CCA_2 
Time variable: time 
Number of observations = 8896 
Number of groups = 278 
Obs per group: min = 32 
               avg = 32 
               max = 32 

                                                demeaned_purchases_log  demeaned_ur  demeaned_hpi_log
-----------------------------------------------------------------------------------------------------
demeaned_lag1_purchases_log                      0.6520 ***              0.0018 **   -0.0026         
                                                (0.0120)                (0.0006)     (0.0016)        
demeaned_lag1_ur                                 0.1703                  0.5404 ***  -0.0168         
                                                (0.2483)                (0.0122)     (0.0335)        
demeaned_lag1_hpi_log                        

### Spatial Slice

In [39]:
unique_values <- unique(df_panel$NUTS_NAME)
print(unique_values)

[1] "centro"                       "norte"                       
[3] "alentejo"                     "algarve"                     
[5] "area metropolitana de lisboa"


#### Lisboa

In [40]:


endog        <- c("purchases_log_D1", "ur_D1", "hpi_log_D1")
exo_base     <- c(                       # contemporaneous exogenous terms
  "fwi_D1", "high_fire_days", "tp_hd",
  "tp_std_mm_hour_D1", "t2m_hd", "t2m_std_D1", "Medit_factor_tp",
"Medit_factor_t2m_D1"
)
p = 1

exo_with_lags <- exo_base

for (lag in 1:p) {
  col_lags <- paste0(exo_base, "_L", lag)
  exo_with_lags <- c(exo_with_lags, col_lags)
}
df_lisboa <-  df_panel %>%
  filter(NUTS_NAME %in% c("area metropolitana de lisboa"))  %>% 
  mutate(time = yearquarter(time)) %>%  select(CCA_2, time, all_of(endog), all_of(exo_with_lags)) 


# Step 1: Create linear trends that are specific to each municipality
df_lisboa <- df_lisboa %>%
  group_by(CCA_2) %>%
  mutate(Linear_Trend = row_number())  # Creates a linear trend for each municipality

# df_lisboa 2: Create quadratic trends that are specific to each municipality
df_lisboa <- df_lisboa %>%
  mutate(Quadratic_Trend = Linear_Trend^2)  # Quadratic trend is square of linear trend

                                  # no missing values
df_lisboa_df <- as.data.frame(df_lisboa)

## fit PVAR-FE
mun_fe_series_lags_1_lisb <- pvarfeols(
  dependent_vars   = endog,
  lags             = p,
  exog_vars        = c('Linear_Trend', 'Quadratic_Trend', exo_with_lags),
  data             = df_lisboa_df,
  panel_identifier = c("CCA_2", "time")
)
  


In [24]:
summary(mun_fe_series_lags_1_lisb)

---------------------------------------------------
Fixed Effects OLS Panel VAR estimation 
---------------------------------------------------
Transformation: demean 
Group variable: CCA_2 
Time variable: time 
Number of observations = 387 
Number of groups = 9 
Obs per group: min = 43 
               avg = 43 
               max = 43 

                                 demeaned_purchases_log_D1  demeaned_ur_D1  demeaned_hpi_log_D1
-----------------------------------------------------------------------------------------------
demeaned_lag1_purchases_log_D1    0.0872                     0.0006         -0.0051            
                                 (0.0549)                   (0.0007)        (0.0036)           
demeaned_lag1_ur_D1               6.1846                     0.1607 **      -0.2965            
                                 (4.3363)                   (0.0587)        (0.2862)           
demeaned_lag1_hpi_log_D1         -0.1981                     0.0025          0.1468 

In [25]:
unique_values <- unique(df_panel$NUTS_NAME)
print(unique_values)

[1] "Centro (PT)"          "Norte"                "Alentejo"            
[4] "Algarve"              "Oeste e Vale do Tejo" "Grande Lisboa"       
[7] "Pen<U+00ED>nsula de Set<U+00FA>bal"


#### Porto

In [43]:


endog        <- c("purchases_log_D1", "ur_D1", "hpi_log_D1")
exo_base     <- c(                       # contemporaneous exogenous terms
  "fwi_D1", "high_fire_days", "tp_hd",
  "tp_std_mm_hour_D1", "t2m_hd", "t2m_std_D1", "Medit_factor_tp",
"Medit_factor_t2m_D1"
)
p = 1

exo_with_lags <- exo_base

for (lag in 1:p) {
  col_lags <- paste0(exo_base, "_L", lag)
  exo_with_lags <- c(exo_with_lags, col_lags)
}
df_porto <-  df_panel %>%
  filter(NUTS_NAME %in% c("norte"))  %>% 
  mutate(time = yearquarter(time)) %>%  select(CCA_2, time, all_of(endog), all_of(exo_with_lags)) 


# Step 1: Create linear trends that are specific to each municipality
df_porto <- df_porto %>%
  group_by(CCA_2) %>%
  mutate(Linear_Trend = row_number())  # Creates a linear trend for each municipality

# df_lisboa 2: Create quadratic trends that are specific to each municipality
df_porto <- df_porto %>%
  mutate(Quadratic_Trend = Linear_Trend^2)  # Quadratic trend is square of linear trend

                                  # no missing values
df_lisboa_df <- as.data.frame(df_porto)

## fit PVAR-FE
mun_fe_series_lags_1_port <- pvarfeols(
  dependent_vars   = endog,
  lags             = p,
  exog_vars        = c('Linear_Trend', 'Quadratic_Trend', exo_with_lags),
  data             = df_lisboa_df,
  panel_identifier = c("CCA_2", "time")
)
  


In [44]:
summary(mun_fe_series_lags_1_port)

---------------------------------------------------
Fixed Effects OLS Panel VAR estimation 
---------------------------------------------------
Transformation: demean 
Group variable: CCA_2 
Time variable: time 
Number of observations = 3698 
Number of groups = 86 
Obs per group: min = 43 
               avg = 43 
               max = 43 

                                 demeaned_purchases_log_D1  demeaned_ur_D1  demeaned_hpi_log_D1
-----------------------------------------------------------------------------------------------
demeaned_lag1_purchases_log_D1   -0.2530 ***                -0.0023 *        0.0066 *          
                                 (0.0168)                   (0.0010)        (0.0030)           
demeaned_lag1_ur_D1              -0.7718 **                 -0.1250 ***     -0.0089            
                                 (0.2862)                   (0.0176)        (0.0513)           
demeaned_lag1_hpi_log_D1         -0.0349                    -0.0335 ***      0.008

#### Algarve

In [45]:


endog        <- c("purchases_log_D1", "ur_D1", "hpi_log_D1")
exo_base     <- c(                       # contemporaneous exogenous terms
  "fwi_D1", "high_fire_days", "tp_hd",
  "tp_std_mm_hour_D1", "t2m_hd", "t2m_std_D1", "Medit_factor_tp",
"Medit_factor_t2m_D1"
)
p = 1

exo_with_lags <- exo_base

for (lag in 1:p) {
  col_lags <- paste0(exo_base, "_L", lag)
  exo_with_lags <- c(exo_with_lags, col_lags)
}
df_alg <-  df_panel %>%
  filter(NUTS_NAME %in% c("algarve"))  %>% 
  mutate(time = yearquarter(time)) %>%  select(CCA_2, time, all_of(endog), all_of(exo_with_lags)) 


# Step 1: Create linear trends that are specific to each municipality
df_alg <- df_alg %>%
  group_by(CCA_2) %>%
  mutate(Linear_Trend = row_number())  # Creates a linear trend for each municipality

# df_lisboa 2: Create quadratic trends that are specific to each municipality
df_alg <- df_alg %>%
  mutate(Quadratic_Trend = Linear_Trend^2)  # Quadratic trend is square of linear trend

                                  # no missing values
df_lisboa_df <- as.data.frame(df_alg)

## fit PVAR-FE
mun_fe_series_lags_1_alg <- pvarfeols(
  dependent_vars   = endog,
  lags             = p,
  exog_vars        = c('Linear_Trend', 'Quadratic_Trend', exo_with_lags),
  data             = df_lisboa_df,
  panel_identifier = c("CCA_2", "time")
)
  


In [46]:
summary(mun_fe_series_lags_1_alg)

---------------------------------------------------
Fixed Effects OLS Panel VAR estimation 
---------------------------------------------------
Transformation: demean 
Group variable: CCA_2 
Time variable: time 
Number of observations = 688 
Number of groups = 16 
Obs per group: min = 43 
               avg = 43 
               max = 43 

                                 demeaned_purchases_log_D1  demeaned_ur_D1  demeaned_hpi_log_D1
-----------------------------------------------------------------------------------------------
demeaned_lag1_purchases_log_D1   -0.2054 ***                 0.0020         -0.0021            
                                 (0.0401)                   (0.0030)        (0.0059)           
demeaned_lag1_ur_D1              -2.6857 ***                -0.0349         -0.1818 *          
                                 (0.5513)                   (0.0411)        (0.0811)           
demeaned_lag1_hpi_log_D1          0.3837                    -0.0539 **       0.0777