Linear regression

In [None]:
# Latihan 1
# Percentage of yes votes from the US by year: US_by_year
US_by_year <- by_year_country %>%
  filter(country == "United States")

# Print the US_by_year data
US_by_year

# Perform a linear regression of percent_yes by year: US_fit
US_fit <- lm(percent_yes ~ year, data = US_by_year)

# Perform summary() on the US_fit object
summary(US_fit)

Tidying models with broom

In [None]:
# Latihan 1
# Load the broom package
library(broom)
# Call the tidy() function on the US_fit object
tidy(US_fit)

In [None]:
# Latihan 2
# Linear regression of percent_yes by year for US
US_by_year <- by_year_country %>%
  filter(country == "United States")
US_fit <- lm(percent_yes ~ year, US_by_year)

# Fit model for the United Kingdom
UK_by_year <- by_year_country %>%
  filter(country == "United Kingdom")
UK_fit <- lm(percent_yes ~ year, UK_by_year)

# Create US_tidied and UK_tidied
US_tidied <- tidy(US_fit)
UK_tidied <- tidy(UK_fit)

# Combine the two tidied models
bind_rows(US_tidied, UK_tidied)

Nesting for multiple models

In [None]:
# Latihan 1
# Load the tidyr package
library(tidyr)

# Nest all columns besides country
by_year_country %>%
  nest(-country)

In [None]:
# Latihan 2
# All countries are nested besides country
nested <- by_year_country %>%
  nest(-country)

nested$data[[1]]
nested$data[[1]]$percent_yes

# Print the nested data for Brazil
nested$data[nested$country == 'Brazil']

In [None]:
# Latihan 3
# All countries are nested besides country
nested <- by_year_country %>%
  nest(-country)

# Unnest the data column to return it to its original form
nested %>%
  unnest()

Fitting multiple models

In [None]:
# Latihan 1
# Load tidyr and purrr
library(tidyr)
library(purrr)

# Perform a linear regression on each item in the data column
by_year_country %>%
  nest(-country) %>%
  mutate(model = map(data, ~ lm(percent_yes ~ year, data = .)))

In [None]:
# Latihan 2
# Load the broom package
library(broom)

# Add another mutate that applies tidy() to each model
by_year_country %>%
  nest(-country) %>%
  mutate(model = map(data, ~ lm(percent_yes ~ year, data = .)),
         tidied = map(model, tidy))

In [None]:
# Latihan 3
# Add one more step that unnests the tidied column
country_coefficients <- by_year_country %>%
  group_by(country) %>%
  nest() %>%
  mutate(model = map(data, ~ lm(percent_yes ~ year, data = .)),
         tidied = map(model, tidy)) %>%
  unnest(tidied)

# Print the resulting country_coefficients variable
country_coefficients

Working with many tidy models

In [None]:
# Latihan 1
# Print the country_coefficients dataset
country_coefficients

# Filter for only the slope terms
country_coefficients %>%
  filter(term == 'year')

In [None]:
# Latihan 2
# Filter for only the slope terms
slope_terms <- country_coefficients %>%
  filter(term == "year")

# Add p.adjusted column, then filter
slope_terms %>%
  mutate(p.adjusted = p.adjust(p.value)) %>% 
  data.frame() %>% head()
  
slope_terms %>%
  mutate(p.adjusted = p.adjust(p.value)) %>% 
  filter(p.adjusted < 0.05)

In [None]:
# Latihan 3
# Filter by adjusted p-values
filtered_countries <- country_coefficients %>%
  filter(term == "year") %>%
  mutate(p.adjusted = p.adjust(p.value)) %>%
  filter(p.adjusted < .05)

# Sort for the countries increasing most quickly
filtered_countries %>%
  arrange(desc(estimate))

# Sort for the countries decreasing most quickly
filtered_countries %>%
  arrange(estimate)