Skip to content

Commit

Permalink
Fix credible intervals references (#12)
Browse files Browse the repository at this point in the history
- Removed wrong references to "confidence intervals".

- Updated computation of lower and upper predictions based now on percentiles extraction.
  • Loading branch information
WillianFuks committed Feb 27, 2021
1 parent 9fc9e85 commit 2bf3a26
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 97 deletions.
3 changes: 1 addition & 2 deletions README.md
Expand Up @@ -17,7 +17,6 @@ Please refer to this medium [post](https://towardsdatascience.com/implementing-c
## Requirements

- python{3.6, 3.7, 3.8}
- numpy
- matplotlib
- jinja2
- tensorflow>=2.3.0
Expand All @@ -41,7 +40,7 @@ import pandas as pd
from causalimpact import CausalImpact


data = pd.read_csv('https://raw.githubusercontent.com/WillianFuks/tfcausalimpact/master/tests/fixtures/arma_data.csv')
data = pd.read_csv('https://raw.githubusercontent.com/WillianFuks/tfcausalimpact/master/tests/fixtures/arma_data.csv')[['y', 'X']]
data.iloc[70:, 0] += 5

pre_period = [0, 69]
Expand Down
2 changes: 1 addition & 1 deletion causalimpact/__version__.py
Expand Up @@ -13,4 +13,4 @@
# limitations under the License.


__version__ = '0.0.2'
__version__ = '0.0.3'
84 changes: 29 additions & 55 deletions causalimpact/inferences.py
Expand Up @@ -26,7 +26,7 @@
import tensorflow as tf
import tensorflow_probability as tfp

from causalimpact.misc import get_z_score, maybe_unstandardize
from causalimpact.misc import maybe_unstandardize

tfd = tfp.distributions

Expand All @@ -38,8 +38,8 @@ def get_lower_upper_percentiles(alpha: float) -> List[float]:
Args
----
alpha: float
Sets the size of the confidence interval. If `alpha=0.05` then extracts the
95% confidence interval for forecasts.
Sets the size of the credible interval. If `alpha=0.05` then extracts the
95% credible interval for forecasts.
Returns
-------
Expand Down Expand Up @@ -78,7 +78,7 @@ def compile_posterior_inferences(
First value is the mean used for standardization and second value is the
standard deviation.
alpha: float
Sets confidence interval size.
Sets credible interval size.
niter: int
Total mcmc samples to sample from the posterior structural model.
Expand All @@ -88,78 +88,52 @@ def compile_posterior_inferences(
Final dataframe with all data related to one-step predictions and forecasts.
"""
lower_percen, upper_percen = get_lower_upper_percentiles(alpha)
z_score = get_z_score(1 - alpha / 2)
# Integrates pre and post index for cumulative index data.
cum_index = build_cum_index(pre_data.index, post_data.index)
# We create a pd.Series with a single 0 (zero) value to work as the initial value
# when computing the cumulative inferences. Without this value the plotting of
# cumulative data breaks at the initial point.
zero_series = pd.Series([0])
simulated_ys = posterior_dist.sample(niter) # shape (niter, n_forecasts, 1)
simulated_ys = maybe_unstandardize(
np.squeeze(simulated_ys.numpy()),
simulated_pre_ys = one_step_dist.sample(niter) # shape (niter, n_train_timestamps, 1)
simulated_pre_ys = maybe_unstandardize(
np.squeeze(simulated_pre_ys.numpy()),
mu_sig
) # shape (niter, n_forecasts)
simulated_post_ys = posterior_dist.sample(niter) # shape (niter, n_forecasts, 1)
simulated_post_ys = maybe_unstandardize(
np.squeeze(simulated_post_ys.numpy()),
mu_sig
) # shape (niter, n_forecasts)
# Pre inference
pre_preds_means = one_step_dist.mean()
pre_preds_stds = one_step_dist.stddev()
# First points in predictions of pre-data can be quite noisy due the lack of observed
# data coming before these points. We try to remove those by applying a filter that
# removes all points that falls above 3 standard deviations from the 50% quantile of
# the array of standard deviations for predictions, replacing those with `np.nan`.
pre_preds_stds = tf.where(
tf.math.greater(
tf.abs(pre_preds_stds),
np.quantile(pre_preds_stds, 0.5) + 3 * tf.math.reduce_std(pre_preds_stds)
),
np.nan,
pre_preds_stds
)
pre_preds_lower = pd.Series(
np.squeeze(
maybe_unstandardize(pre_preds_means - z_score * pre_preds_stds, mu_sig)
),
index=pre_data.index
)
pre_preds_upper = pd.Series(
np.squeeze(
maybe_unstandardize(pre_preds_means + z_score * pre_preds_stds, mu_sig)
),
index=pre_data.index
)
pre_preds_means = pd.Series(
np.squeeze(
maybe_unstandardize(pre_preds_means, mu_sig)
),
index=pre_data.index
)
pre_preds_lower, pre_preds_upper = np.percentile(
simulated_pre_ys,
[lower_percen, upper_percen],
axis=0
)
pre_preds_lower = pd.Series(pre_preds_lower, index=pre_data.index)
pre_preds_upper = pd.Series(pre_preds_upper, index=pre_data.index)
# Post inference
post_preds_means = posterior_dist.mean()
post_preds_stds = posterior_dist.stddev()
post_preds_lower = pd.Series(
np.squeeze(
maybe_unstandardize(
post_preds_means - z_score * post_preds_stds,
mu_sig
)
),
index=post_data.index
)
post_preds_upper = pd.Series(
np.squeeze(
maybe_unstandardize(
post_preds_means + z_score * post_preds_stds,
mu_sig
)
),
index=post_data.index
)
post_preds_means = pd.Series(
np.squeeze(
maybe_unstandardize(post_preds_means, mu_sig)
),
index=post_data.index
)
post_preds_lower, post_preds_upper = np.percentile(
simulated_post_ys,
[lower_percen, upper_percen],
axis=0
)
post_preds_lower = pd.Series(post_preds_lower, index=post_data.index)
post_preds_upper = pd.Series(post_preds_upper, index=post_data.index)
# Concatenations
complete_preds_means = pd.concat([pre_preds_means, post_preds_means])
complete_preds_lower = pd.concat([pre_preds_lower, post_preds_lower])
Expand All @@ -172,7 +146,7 @@ def compile_posterior_inferences(
post_cum_preds_means = pd.concat([zero_series, post_cum_preds_means])
post_cum_preds_means.index = cum_index
post_cum_preds_lower, post_cum_preds_upper = np.percentile(
np.cumsum(simulated_ys, axis=1),
np.cumsum(simulated_post_ys, axis=1),
[lower_percen, upper_percen],
axis=0
)
Expand Down Expand Up @@ -202,7 +176,7 @@ def compile_posterior_inferences(
post_cum_effects_means = pd.concat([zero_series, post_cum_effects_means])
post_cum_effects_means.index = cum_index
post_cum_effects_lower, post_cum_effects_upper = np.percentile(
np.cumsum(post_data.iloc[:, 0].values - simulated_ys, axis=1),
np.cumsum(post_data.iloc[:, 0].values - simulated_post_ys, axis=1),
[lower_percen, upper_percen],
axis=0
)
Expand Down Expand Up @@ -304,7 +278,7 @@ def summarize_posterior_inferences(
"""
After running the posterior inferences compilation, this function aggregates the
results and gets the final interpretation for the Causal Impact results, such as
the expected absolute impact of the given intervention and its confidence interval.
the expected absolute impact of the given intervention and its credible interval.
Args
----
Expand Down
2 changes: 1 addition & 1 deletion causalimpact/main.py
Expand Up @@ -232,7 +232,7 @@ def plot(
----
panels: List[str]
Which graphics to plot. 'original' plots the original data, forecasts means
and confidence intervals related to the fitted model.
and credible intervals related to the fitted model.
'pointwise' plots the point wise differences between observed data and
predictions. Finally, 'cumulative' is a cumulative summation over real
data and its forecasts.
Expand Down
2 changes: 1 addition & 1 deletion causalimpact/summary.py
Expand Up @@ -52,7 +52,7 @@ def summary(
p_value: float
p-value test for testing presence of signal in data.
alpha: float
Sets confidence interval width.
Sets credible interval width.
output: str
Can be either "summary" or "report". The first is a simpler output just
informing general metrics such as expected absolute or relative effect.
Expand Down
71 changes: 34 additions & 37 deletions tests/test_inferences.py
Expand Up @@ -18,7 +18,7 @@
import tensorflow as tf

import causalimpact.inferences as inferrer
from causalimpact.misc import get_z_score, maybe_unstandardize
from causalimpact.misc import maybe_unstandardize


def test_get_lower_upper_percentiles():
Expand Down Expand Up @@ -59,6 +59,15 @@ def test_compile_posterior_inferences():
niter = 10

class OneStepDist:
def sample(self, niter):
tmp = tf.convert_to_tensor(
np.tile(np.arange(start=3.1, stop=6.1, step=1), (niter, 1)) +
np.arange(niter).reshape(-1, 1),
dtype=np.float32
)
tmp = tmp[..., tf.newaxis]
return tmp

def mean(self):
return np.ones((len(pre_data), 1)) * one_step_mean

Expand Down Expand Up @@ -105,17 +114,26 @@ def stddev(self):
expec_complete_preds_means['complete_preds_means'],
inferences['complete_preds_means']
)
simulated_pre_ys = maybe_unstandardize(
np.squeeze(one_step_dist.sample(niter)),
mu_sig
)
simulated_post_ys = maybe_unstandardize(
np.squeeze(posterior_dist.sample(niter)),
mu_sig
)
lower_percen, upper_percen = inferrer.get_lower_upper_percentiles(alpha)
# test complete_preds_lower
pre_preds_lower = (
np.array([1, 1, 1]) * one_step_mean -
get_z_score(1 - alpha / 2) * one_step_stddev
) * sig + mu
pre_preds_lower[np.abs(pre_preds_lower) > np.quantile(pre_preds_lower, 0.5) +
3 * np.std(pre_preds_lower)] = np.nan
post_preds_lower = (
np.array([1, 1, 1]) * posterior_mean -
get_z_score(1 - alpha / 2) * posterior_stddev
) * sig + mu
pre_preds_lower, pre_preds_upper = np.percentile(
simulated_pre_ys,
[lower_percen, upper_percen],
axis=0
)
post_preds_lower, post_preds_upper = np.percentile(
simulated_post_ys,
[lower_percen, upper_percen],
axis=0
)
expec_complete_preds_lower = np.concatenate([pre_preds_lower, post_preds_lower])
expec_complete_preds_lower = pd.DataFrame(
data=expec_complete_preds_lower,
Expand All @@ -128,16 +146,6 @@ def stddev(self):
inferences['complete_preds_lower']
)
# test complete_preds_upper
pre_preds_upper = (
np.array([1, 1, 1]) * one_step_mean +
get_z_score(1 - alpha / 2) * one_step_stddev
) * sig + mu
pre_preds_upper[np.abs(pre_preds_upper) > np.quantile(pre_preds_upper, 0.5) +
3 * np.std(pre_preds_upper)] = np.nan
post_preds_upper = (
np.array([1, 1, 1]) * posterior_mean +
get_z_score(1 - alpha / 2) * posterior_stddev
) * sig + mu
expec_complete_preds_upper = np.concatenate([pre_preds_upper, post_preds_upper])
expec_complete_preds_upper = pd.DataFrame(
data=expec_complete_preds_upper,
Expand All @@ -149,7 +157,7 @@ def stddev(self):
expec_complete_preds_upper['complete_preds_upper'],
inferences['complete_preds_upper']
)
# test post_preds_means
# test pre and post_preds_means
expec_post_preds_means = pd.DataFrame(
data=np.array([np.nan] * 3 + [posterior_mean * sig + mu] * len(pre_data)),
index=expected_index,
Expand All @@ -161,12 +169,8 @@ def stddev(self):
inferences['post_preds_means']
)
# test post_preds_lower
post_preds_lower = (
np.array([np.nan] * 3 + [1, 1, 1]) * posterior_mean -
get_z_score(1 - alpha / 2) * posterior_stddev
) * sig + mu
expec_post_preds_lower = pd.DataFrame(
data=post_preds_lower,
data=np.concatenate([[np.nan] * len(pre_data), post_preds_lower]),
index=expected_index,
dtype=np.float64,
columns=['post_preds_lower']
Expand All @@ -176,12 +180,8 @@ def stddev(self):
inferences['post_preds_lower']
)
# test post_preds_upper
post_preds_upper = (
np.array([np.nan] * 3 + [1, 1, 1]) * posterior_mean +
get_z_score(1 - alpha / 2) * posterior_stddev
) * sig + mu
expec_post_preds_upper = pd.DataFrame(
data=post_preds_upper,
data=np.concatenate([[np.nan] * len(pre_data), post_preds_upper]),
index=expected_index,
dtype=np.float64,
columns=['post_preds_upper']
Expand Down Expand Up @@ -216,8 +216,7 @@ def stddev(self):
)
# test post_cum_preds_lower
post_cum_preds_lower, post_cum_preds_upper = np.percentile(
np.cumsum(maybe_unstandardize(np.squeeze(posterior_dist.sample(niter)), mu_sig),
axis=1),
np.cumsum(simulated_post_ys, axis=1),
[100 * alpha / 2, 100 - 100 * alpha / 2],
axis=0
)
Expand Down Expand Up @@ -303,9 +302,7 @@ def stddev(self):
)
# test post_cum_effects_lower
post_cum_effects_lower, post_cum_effects_upper = np.percentile(
np.cumsum(post_data.iloc[:, 0].values - maybe_unstandardize(np.squeeze(
posterior_dist.sample(niter)), mu_sig),
axis=1),
np.cumsum(post_data.iloc[:, 0].values - simulated_post_ys, axis=1),
[100 * alpha / 2, 100 - 100 * alpha / 2],
axis=0
)
Expand Down

0 comments on commit 2bf3a26

Please sign in to comment.