# Depth of snow cover in Kaisaniemi Helsinki

This notebook investigates the depth of snow in Kaisaniemi Helsinki over last 60 years. Especially we look in to the probability of snow depth being more than 0 cm on a single day each year.



The data is originally from FMI: https://ilmatieteenlaitos.fi/havaintojen-lataus#!/

In [None]:
# Using `suppressMessages` do disable noisy import log
suppressMessages(library(readr)) # For reading data files
suppressMessages(library(dplyr)) # de-facto library for data preprosessing
suppressMessages(library(ggplot2)) # de-facto library for visualization
suppressMessages(library(rstan))
suppressMessages(library(bayesplot))
suppressMessages(library(boot))

In [None]:
options(mc.cores = 1)
rstan_options(auto_write = TRUE)

In [None]:
# Load and cleanup data
# ovewrite original column names
# parse dates from several columns
# clean up negative snow depths
d <-  read_csv("https://raw.githubusercontent.com/dins/snow-depth/master/kaisaniemi.csv", 
               col_types = cols(`Lumensyvyys (cm)` = col_character())) %>%
      setNames(c("year", "month", "day", "clock", "tzone", "snow", "temp")) %>%
      mutate(date = ISOdate(year, month, day), 
             snow = ifelse(snow==FALSE, 0, as.numeric(snow)), 
             snow = ifelse(snow < 0, 0, snow), 
             is_snow = snow > 0) %>%
      select(date, year, month, day, snow, is_snow, temp)

In [None]:
glimpse(d)

In [None]:
# Look into a specific day of the year
# Remove years with out snow depth measurement
christmas <- d %>% filter(!is.na(snow))%>% filter(day==24 & month==12)
readr::write_csv(christmas, "kaisaniemi_christmas.csv")

In [None]:
ggplot(christmas, aes(x=year, y=snow)) + 
  geom_point(aes(shape=is_snow)) + 
  scale_shape_manual(values=c(19, 1)) + 
  #geom_smooth(method="lm") + 
  labs(title = "Snow depth in Kaisaniemi on 24.12. each year")

In [None]:
stan_data <- with(christmas, list(decade=(year-2000)/10, is_snow=is_snow, N=length(is_snow)))

In [None]:
first_model_code <- "
data {
   int N;
   int<lower=0, upper=1> is_snow[N];
}
parameters {
   real b;
}
model {
  for (i in 1:N) {
    is_snow[i] ~ bernoulli_logit(b);
  }
}
"

In [None]:
m0 <- stan_model(model_code = first_model_code)
fit0 <- sampling(m0, data=stan_data)
fit0

In [None]:
traceplot(fit0)

In [None]:
hist(extract(fit0, "b")[[1]], n=100)

In [None]:
hist(1/(1+exp(-extract(fit0, "b")[[1]])), n=100)

In [None]:
second_model_code <- "
data {
  int N;
  int<lower=0, upper=1> is_snow[N];
  real decade[N]; 
}
parameters {
  real b;
  real k; 
}
model {
  for (i in 1:N) {
    is_snow[i] ~ bernoulli_logit(k * decade[i] + b);
  }
}
generated quantities {
  real prob[N];
  for (i in 1:N) {
    prob[i] = inv_logit(k* decade[i] + b);
  }
}
"

In [None]:
m <- stan_model(model_code = second_model_code)
fit <- sampling(m, data=stan_data)
fit

In [None]:
traceplot(fit)

In [None]:
#plot(fit, pars=c("k", "b"))
plot(fit, pars="prob")

In [None]:
# Has snow cover decreased significantly?
diff_samples <- extract(fit, "prob[1]")[[1]] - extract(fit, "prob[60]")[[1]]
# probability of decrease
mean(diff_samples > 0)

In [None]:
# Now estimate snow propability for 2019
decade_2019 <- (2019 - 2000) / 10
post_draws <- extract(fit)
predictions <- inv.logit(post_draws$b + post_draws$k * decade_2019)

In [None]:
mean(predictions)

In [None]:
hist(predictions)