# Depth of snow cover in Kaisaniemi Helsinki

This notebook investigates the depth of snow in Kaisaniemi Helsinki over last 60 years. Especially we look in to the probability of snow depth being more than 0 cm on a single day each year.



The data is originally from FMI: https://ilmatieteenlaitos.fi/havaintojen-lataus#!/

In [None]:
import pandas as pd
import pystan
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.special as ss

%matplotlib inline

In [None]:
# Load and cleanup data
# ovewrite original column names
# parse dates from several columns
# clean up negative snow depths
df = (pd.read_csv("https://raw.githubusercontent.com/dins/snow-depth/master/kaisaniemi.csv", 
                 names=['year', 'month', 'day', 'clock', 'tzone', 'snow', 'temp'],
                 header=0)
                .assign(date = lambda d: pd.to_datetime(d[['year', 'month', 'day']]),
                        snow = lambda d: d['snow'].clip(0),
                        is_snow = lambda d: d['snow'] > 0)
                  [['date', 'snow', 'is_snow', 'temp']])

In [None]:
df.tail()

In [None]:
# Look into a specific day of the year
# Remove years with out snow depth measurement
christmas = df.loc[lambda d: ~d['snow'].isnull() & (d['date'].dt.day == 24) & (d['date'].dt.month == 12)]

In [None]:
christmas.head()

In [None]:
plt.scatter(christmas['date'].dt.year, christmas['snow'])

In [None]:
stan_data = christmas.assign(decade=lambda d: (d['date'].dt.year - 2000) / 10,
                             is_snow=lambda d: d['is_snow'].astype(int))[['decade', 'is_snow']]

In [None]:
first_model_code = '''
data {
   int N;
   int<lower=0, upper=1> is_snow[N];
}
parameters {
   real b;
}
model {
  for (i in 1:N) {
    is_snow[i] ~ bernoulli_logit(b);
  }
}
'''

In [None]:
model = pystan.StanModel(model_code=first_model_code)
fit = model.sampling(data={'N': len(stan_data), **stan_data.to_dict(orient='list')}, iter=1000, chains=4)
fit

In [None]:
b_param = fit.extract('b')['b']
plt.hist(b_param)
pass

In [None]:
plt.hist(1/(1+np.exp(-b_param)))

In [None]:
second_model_code = '''
data {
  int N;
  int<lower=0, upper=1> is_snow[N];
  real decade[N]; 
}
parameters {
  real b;
  real k; 
}
model {
  for (i in 1:N) {
    is_snow[i] ~ bernoulli_logit(k * decade[i] + b);
  }
}
generated quantities {
  real prob[N];
  for (i in 1:N) {
    prob[i] = inv_logit(k* decade[i] + b);
  }
}
'''

In [None]:
model2 = pystan.StanModel(model_code=second_model_code)
fit2 = model2.sampling(data={'N': len(stan_data), **stan_data.to_dict(orient='list')}, iter=1000, chains=4)
fit2

In [None]:
# Has snow cover decreased significantly?
params = fit2.extract(['prob[1]', 'prob[60]'])
diff_samples = params['prob[1]'] - params['prob[60]']
# probability of decrease
np.mean(diff_samples > 0)

In [None]:
# Now estimate snow propability for 2019
decade_2019 = (2019 - 2000) / 10
post_draws = fit2.extract(['b', 'k'])
predictions = ss.expit(post_draws['b'] + post_draws['k'] * decade_2019)

In [None]:
np.mean(predictions)

In [None]:
plt.hist(predictions)