In [None]:
library('tidyverse')
library('tidymodels')

In [None]:
options(repr.plot.res = 250, repr.plot.height = 3, repr.plot.width = 5)

In [None]:
data_url = "https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-11-23/episodes.csv"

dr_who_raw = read_csv(data_url, col_types = cols())

dr_who_raw |> head()


In [None]:
dr_who_raw |> summarize(max(first_aired))

## Line chart

1. Point out encodings
2. Point out scales
3. Change x using “scale_x_date” date_breaks = ‘2 months’
4. Each row is a “vertex” in the line chart

## `dplyrR`

1. Grab top_n
2. Calculate mean viewership by year
3. Grab top_n by year

## Visualize model

In [None]:
rec = recipe(uk_viewers ~ first_aired, data = dr_who_raw) |>
    step_naomit(uk_viewers) |>
    step_date(first_aired, features = 'year') |>
    step_holiday(first_aired, holidays = c('NewYearsDay', 'ChristmasDay'), keep_original_cols = FALSE) |>
    prep()

rec |> juice() |> head()

In [None]:
mod = linear_reg() |> set_engine('lm')

mod_fit = mod |> fit(uk_viewers ~ ., data = rec |> juice())

In [None]:
dummy_data = tibble(
    first_aired = seq(as.Date("2005-03-01"), as.Date("2021-12-01"), "days")
)

dummy_data |> head()

In [None]:
predictions = predict(mod_fit, rec |> bake(dummy_data)) |>
    bind_cols(dummy_data)

predictions |> head()

In [None]:
ggplot(dr_who_raw, aes(x = first_aired, y = uk_viewers)) + 
    geom_line(color = 'steelblue') +
    # geom_line(data = predictions, mapping = aes(y = .pred), color = 'coral') +
    scale_x_date(date_breaks = "2 years", date_labels = "%Y")