In [None]:
#load tidyverse for some needed packages
library(tidyverse)
#load our dataset
co2 <- read.csv("co2gdp_all.csv")

In [None]:
#examine data
head(co2,10)
nrow(co2)

In [None]:
#eliminate missing values
co2 <-subset(co2, is.na(co2[,"co2percaptons"])==F)
co2 <-subset(co2, is.na(co2[,"gdp_dollars"])==F)

In [None]:
head(co2,10)
nrow(co2)

In [None]:
#generate random samples 
#runif draws a uniform random number between 0-1.  10% of observations will be less than 0.1
#drawing sample 1
co2[,"sample1"]<-runif(nrow(co2))<0.1
#drawing sample 2
co2[,"sample2"]<-runif(nrow(co2))<0.1
#for simplicity, make a new dataset with each of our samples
co2_sample1 <-subset(co2,sample1==TRUE)
co2_sample2 <-subset(co2,sample2==TRUE)

In [None]:
#view sample 1
co2_sample1

In [None]:
#view sample 2
co2_sample2

In [None]:
#scatter plot sample 1
plot(co2_sample1$gdppercapitapppconstant2005inter,co2_sample1$co2percaptons)

In [None]:
#scatter plot sample 2
plot(co2_sample2$gdppercapitapppconstant2005inter,co2_sample2$co2percaptons)

In [None]:
#regress co2 on gdp in sample 1
s1_reg <- lm(co2percaptons ~gdppercapitapppconstant2005inter, data = co2_sample1)
summary(s1_reg)

In [None]:
#regress co2 on gdp in sample 2
s2_reg <- lm(co2percaptons ~gdppercapitapppconstant2005inter, data = co2_sample2)
summary(s2_reg)

In [None]:
#Back to lecture notes

In [None]:
#Calculating standard errors adjusting for heteroskedasticity
#Load required packages
library(sandwich)
library(lmtest)
#Adjusted standard errors
coeftest(s1_reg, vcov = vcovHC)
#Unadjusted standard errors; assumes SLR5 holds
summary(s1_reg)

In [None]:
#Back to lecture notes

In [None]:
#Compute the "true" beta_1 for the population
reg<-summary(lm(co2percaptons ~gdppercapitapppconstant2005inter, data = co2))
beta1<-reg$coefficients[2]
reg

In [None]:
#Let's draw a 10% sample 100 times and see how the estimates compare
betas1<-numeric(100)
for(s in 1:100) {
    co2[,"sample"]<-runif(nrow(co2))<0.1
    betas1[s]<-summary(lm(co2percaptons ~gdppercapitapppconstant2005inter, 
                         data = subset(co2,sample==TRUE)))$coefficients[2]
}
#Histogram of estimated betas
hist(betas1,20,xlim=c(0,1))
abline(v=mean(betas1),col="red",lwd=2)
abline(v=beta1,col="blue",lwd=2)

In [None]:
#What if we take a 25% sample?
betas2<-numeric(100)
for(s in 1:100) {
    co2[,"sample"]<-runif(nrow(co2))<0.25
    betas2[s]<-summary(lm(co2percaptons ~gdppercapitapppconstant2005inter, 
                         data = subset(co2,sample==TRUE)))$coefficients[2]
}
#Histogram of estimated betas
hist(betas2,20,xlim=c(0,1))
abline(v=mean(betas2),col="red",lwd=2)
abline(v=beta1,col="blue",lwd=2)