In [None]:
library(haven)
library(tidyverse)
options(scipen = 999)

In [None]:
# Load data on annual crime at county level over time
crimedata<- read_dta("CRIME4.DTA")
head(crimedata,10)
nrow(crimedata)/7

In [None]:
# Data already set up for first difference estimation
head(crimedata[,c("lcrmrte", "clcrmrte", "year", "d83", "d84")])
colnames(crimedata)

In [None]:
#does being "hard on crime" reduce the crime rate?
#run pooled regression in logs
reg0 <- lm(lcrmrte ~ lprbarr + lprbconv + lprbpris + lavgsen + lpolpc + 
           d82 + d83 + d84 + d85 + d86 + d87, data = crimedata)
# number of crimes per capita
# "probabilities" that any crime results in an arrest, 
    # that arrests result in a conviction, 
    # and that convictions result in a prison sentence
# average prison sentence in days
# number of police officers per capita
summary(reg0)

In [None]:
# Interpret coefficient on lprbpris. Does it make sense?
# Interpret coefficient on lpolpc. Does it make sense?
# Interpret coefficients on year dummies (ref year 1981)

In [None]:
#run first differenced regression
reg1<- lm(clcrmrte~ clprbarr + clprbcon + clprbpri + clavgsen + clpolpc
          + d83 + d84 + d85 + d86 + d87, data = crimedata)
# Note we leave out d82: the reference change in years is d82-d81
summary(reg1)

In [None]:
# Interpret coefficient on clprbpri. Did it change? Does it make sense?
# Interpret coefficient on clpolpc. Did it change? Does it make sense?

In [None]:
# Back to lecture

In [None]:
#run fixed effects regression
crimedata$county<-as.factor(crimedata$county)
reg2<-lm(lcrmrte ~ lprbarr + lprbconv + lprbpris + lavgsen + lpolpc
         + d82 + d83 + d84 + d85 + d86 + d87 + county, data = crimedata)
summary(reg2)

In [None]:
# Are the results similar to the FD results?
# Interpret the coefficient on d83 (ref category is d81)
# Interpet the coefficient on county3 (ref category is county1)

In [None]:
#run fixed effects regression using felm
library(lfe)
reg3 <- felm(lcrmrte~ lprbarr + lprbconv + lprbpris + lavgsen + lpolpc 
             + d82 + d83 + d84 + d85 + d86 + d87|county, data = crimedata)
summary(reg3)

In [None]:
# Back to lecture

In [None]:
#Compute clustered standard errors using vcovCL - can be used for first differences or fixed effects
library(sandwich)
#The vcovCL command outputs the variance-covariance matrix from a regression. 
#The diagonal elements of this matrix are the variances of the beta coefficients.
#If we take the square root of these variances we get our standard errors.
#vcovCL allows you to estimate clustered standard errors based on regression output
#Here we compute SE from the FE estimation clustered at the country level
reg2$clus_se <- sqrt(diag(vcovCL(reg2, cluster = crimedata$county)))
#cbind will "bind" the column of coefficients to the column of clustered standard errors
cbind(reg2$coefficients, reg2$clus_se)[1:12,] # don't show the country FE estimates

In [None]:
# How do these clustered SEs compare to the unadjusted SEs?
# What are the implications for inference?

In [None]:
#compute clustered standard errors in fixed effects regression directly using felm
#see ?felm for syntax
reg4 <- felm(lcrmrte~ lprbarr + lprbconv + lprbpris + lavgsen + lpolpc 
             + d83 + d84 + d85 + d86 + d87|county|0|county, data = crimedata)
summary(reg4)
summary(reg3)