In [25]:
import numpy as np
from fredapi import Fred
from bokeh.plotting import figure, show
from bokeh.io import output_notebook, curdoc
from bokeh.models import DatetimeTickFormatter, HoverTool, Band, ColumnDataSource
from statsmodels.tsa import stattools
import pandas as pd
import config

In [2]:
settings = config.Settings()
output_notebook()
# apply theme to current document
curdoc().theme = "dark_minimal"

In [3]:
fred = Fred(api_key=settings.fred_api_key)
y = fred.get_series('CPILFENS')

In [4]:
print(f"Range is from {y.index.min().strftime('%B %d, %Y')} to {y.index.max().strftime('%B %d, %Y')}")

Range is from January 01, 1957 to September 01, 2021


In [5]:
p = figure(
    title="Core CPI for All Urban Consumers", 
    y_axis_label="Index, 1982-84=100", 
    #x_axis_label="t",
    sizing_mode="stretch_width",
    height=300
    )
p.line(x = y.index, y=y, legend_label="Core CPI", line_width=2)
p.xaxis.formatter = DatetimeTickFormatter(
    days = ['%b %d, %Y'],
    months = ['%b %Y'],
    years = ['%Y']
    )
p.add_tools(HoverTool(
    tooltips="@x{%b %d, %Y}: @y",
    formatters={'@x': 'datetime'},
    mode='vline'
))

show(p)

In [6]:
hist, edges = np.histogram(np.log(y).diff().dropna())
p = figure(title="Histogram")
p.quad(
    top=hist,
    bottom=0,
    left=edges[:-1],
    right=edges[1:],
    line_color="white",
    alpha=0.6
    )

mu = 

show(p)

SyntaxError: invalid syntax (2160686316.py, line 12)

In [28]:
def hist_plot(y):
    p = figure(title="Histogram")
    
def make_plot(title, hist, edges, x, pdf, cdf):
    p = figure(title=title, tools='', background_fill_color="#fafafa")
    p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
           fill_color="navy", line_color="white", alpha=0.5)
    p.line(x, pdf, line_color="#ff8888", line_width=4, alpha=0.7, legend_label="PDF")
    p.line(x, cdf, line_color="orange", line_width=2, alpha=0.7, legend_label="CDF")

    p.y_range.start = 0
    p.legend.location = "center_right"
    p.legend.background_fill_color = "#fefefe"
    p.xaxis.axis_label = 'x'
    p.yaxis.axis_label = 'Pr(x)'
    p.grid.grid_line_color="white"
    return p

# Normal Distribution

mu, sigma = 0, 0.5

measured = np.random.normal(mu, sigma, 1000)
hist, edges = np.histogram(measured, density=True, bins=50)

x = np.linspace(-2, 2, 1000)
pdf = 1/(sigma * np.sqrt(2*np.pi)) * np.exp(-(x-mu)**2 / (2*sigma**2))
cdf = (1+scipy.special.erf((x-mu)/np.sqrt(2*sigma**2)))/2

p1 = make_plot("Normal Distribution (μ=0, σ=0.5)", hist, edges, x, pdf, cdf)

# Log-Normal Distribution

mu, sigma = 0, 0.5

measured = np.random.lognormal(mu, sigma, 1000)
hist, edges = np.histogram(measured, density=True, bins=50)

x = np.linspace(0.0001, 8.0, 1000)
pdf = 1/(x* sigma * np.sqrt(2*np.pi)) * np.exp(-(np.log(x)-mu)**2 / (2*sigma**2))
cdf = (1+scipy.special.erf((np.log(x)-mu)/(np.sqrt(2)*sigma)))/2

p2 = make_plot("Log Normal Distribution (μ=0, σ=0.5)", hist, edges, x, pdf, cdf)

# Gamma Distribution

k, theta = 7.5, 1.0

measured = np.random.gamma(k, theta, 1000)
hist, edges = np.histogram(measured, density=True, bins=50)

x = np.linspace(0.0001, 20.0, 1000)
pdf = x**(k-1) * np.exp(-x/theta) / (theta**k * scipy.special.gamma(k))
cdf = scipy.special.gammainc(k, x/theta)

p3 = make_plot("Gamma Distribution (k=7.5, θ=1)", hist, edges, x, pdf, cdf)

# Weibull Distribution

lam, k = 1, 1.25
measured = lam*(-np.log(np.random.uniform(0, 1, 1000)))**(1/k)
hist, edges = np.histogram(measured, density=True, bins=50)

x = np.linspace(0.0001, 8, 1000)
pdf = (k/lam)*(x/lam)**(k-1) * np.exp(-(x/lam)**k)
cdf = 1 - np.exp(-(x/lam)**k)

p4 = make_plot("Weibull Distribution (λ=1, k=1.25)", hist, edges, x, pdf, cdf)

show(gridplot([p1,p2,p3,p4], ncols=2, width=400, height=400, toolbar_location=None))

DatetimeIndex(['1957-01-01', '1957-02-01', '1957-03-01', '1957-04-01',
               '1957-05-01', '1957-06-01', '1957-07-01', '1957-08-01',
               '1957-09-01', '1957-10-01',
               ...
               '2020-12-01', '2021-01-01', '2021-02-01', '2021-03-01',
               '2021-04-01', '2021-05-01', '2021-06-01', '2021-07-01',
               '2021-08-01', '2021-09-01'],
              dtype='datetime64[ns]', length=777, freq=None)

In [61]:
y_t = np.log(y).diff().dropna()
nlags = 30
alpha = 0.5
is_wn = True
acf, acf_confint, qstat, qstat_pvalue= stattools.acf(y_t, nlags = nlags, qstat=True, alpha=alpha, bartlett_confint=not is_wn)
pacf, pacf_confint = stattools.pacf(y_t, nlags=nlags, alpha=alpha)
df = pd.DataFrame(dict(
    lag = [str(l) for l in range(1,nlags+1)],
    acf = acf[1:],
    acf_confint_lower = (acf_confint[:,0] - acf)[1:],
    acf_confint_upper = (acf_confint[:,1]-acf)[1:],
    pacf = pacf[1:],
    pacf_confint_lower = pacf_confint[1:, 0],
    pacf_confint_upper = pacf_confint[1:, 1],
    qstat = qstats,
    qstat_pvalue = qstat_pvalue   
    ))
df.head()

Unnamed: 0,lag,acf,acf_confint_lower,acf_confint_upper,pacf,pacf_confint_lower,pacf_confint_upper,qstat,qstat_pvalue
0,1,0.617634,-0.024213,0.024213,0.618431,0.594219,0.642644,297.168312,1.363674e-66
1,2,0.396676,-0.024213,0.024213,0.024684,0.000471,0.048896,419.904083,6.591539e-92
2,3,0.303817,-0.024213,0.024213,0.080775,0.056562,0.104988,491.995945,2.589636e-106
3,4,0.367892,-0.024213,0.024213,0.238199,0.213986,0.262411,597.839742,4.5473120000000005e-128
4,5,0.496008,-0.024213,0.024213,0.288973,0.26476,0.313186,790.487272,1.321838e-168


In [62]:
source = ColumnDataSource(df)
p = figure(title="ACF", x_range=df.lag, sizing_mode="stretch_width", height=300)
p.vbar(x="lag", top="acf", width = 0.6, source = df)
confint = Band(base = "lag", lower = "acf_confint_lower", upper= "acf_confint_upper", source = source, level='underlay',fill_alpha=1.0, line_width=1, line_color='black')
p.add_layout(confint)
p.add_tools(HoverTool(
    tooltips="Lag @lag: @acf",
    mode='vline'
))
show(p)

In [33]:
df.lag.astype(str)

0      1
1      2
2      3
3      4
4      5
5      6
6      7
7      8
8      9
9     10
10    11
11    12
12    13
13    14
14    15
15    16
16    17
17    18
18    19
19    20
20    21
21    22
22    23
23    24
24    25
25    26
26    27
27    28
28    29
29    30
Name: lag, dtype: object