In [58]:
import numpy as np
from fredapi import Fred
from bokeh.plotting import figure, show
from bokeh.io import output_notebook, curdoc
from bokeh.models import DatetimeTickFormatter, HoverTool
import config

In [15]:
settings = config.Settings()
output_notebook()
# apply theme to current document
curdoc().theme = "dark_minimal"

In [8]:
fred = Fred(api_key=settings.fred_api_key)
y = fred.get_series('CPILFENS')

In [9]:
print(f"Range is from {y.index.min().strftime('%B %d, %Y')} to {y.index.max().strftime('%B %d, %Y')}")

Range is from January 01, 1957 to September 01, 2021


In [57]:
p = figure(
    title="Core CPI for All Urban Consumers", 
    y_axis_label="Index, 1982-84=100", 
    #x_axis_label="t",
    sizing_mode="stretch_width",
    height=300
    )
p.line(x = y.index, y=y, legend_label="Core CPI", line_width=2)
p.xaxis.formatter = DatetimeTickFormatter(
    days = ['%b %d, %Y'],
    months = ['%b %Y'],
    years = ['%Y']
    )
p.add_tools(HoverTool(
    tooltips="@x{%b %d, %Y}: @y",
    formatters={'@x': 'datetime'},
    mode='vline'
))

show(p)

In [74]:
hist, edges = np.histogram(np.log(y).diff().dropna())
p = figure(title="Histogram")
p.quad(
    top=hist,
    bottom=0,
    left=edges[:-1],
    right=edges[1:],
    line_color="white",
    alpha=0.6
    )

show(p)

In [28]:
def hist_plot(y):
    p = figure(title="Histogram")
    
def make_plot(title, hist, edges, x, pdf, cdf):
    p = figure(title=title, tools='', background_fill_color="#fafafa")
    p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
           fill_color="navy", line_color="white", alpha=0.5)
    p.line(x, pdf, line_color="#ff8888", line_width=4, alpha=0.7, legend_label="PDF")
    p.line(x, cdf, line_color="orange", line_width=2, alpha=0.7, legend_label="CDF")

    p.y_range.start = 0
    p.legend.location = "center_right"
    p.legend.background_fill_color = "#fefefe"
    p.xaxis.axis_label = 'x'
    p.yaxis.axis_label = 'Pr(x)'
    p.grid.grid_line_color="white"
    return p

# Normal Distribution

mu, sigma = 0, 0.5

measured = np.random.normal(mu, sigma, 1000)
hist, edges = np.histogram(measured, density=True, bins=50)

x = np.linspace(-2, 2, 1000)
pdf = 1/(sigma * np.sqrt(2*np.pi)) * np.exp(-(x-mu)**2 / (2*sigma**2))
cdf = (1+scipy.special.erf((x-mu)/np.sqrt(2*sigma**2)))/2

p1 = make_plot("Normal Distribution (μ=0, σ=0.5)", hist, edges, x, pdf, cdf)

# Log-Normal Distribution

mu, sigma = 0, 0.5

measured = np.random.lognormal(mu, sigma, 1000)
hist, edges = np.histogram(measured, density=True, bins=50)

x = np.linspace(0.0001, 8.0, 1000)
pdf = 1/(x* sigma * np.sqrt(2*np.pi)) * np.exp(-(np.log(x)-mu)**2 / (2*sigma**2))
cdf = (1+scipy.special.erf((np.log(x)-mu)/(np.sqrt(2)*sigma)))/2

p2 = make_plot("Log Normal Distribution (μ=0, σ=0.5)", hist, edges, x, pdf, cdf)

# Gamma Distribution

k, theta = 7.5, 1.0

measured = np.random.gamma(k, theta, 1000)
hist, edges = np.histogram(measured, density=True, bins=50)

x = np.linspace(0.0001, 20.0, 1000)
pdf = x**(k-1) * np.exp(-x/theta) / (theta**k * scipy.special.gamma(k))
cdf = scipy.special.gammainc(k, x/theta)

p3 = make_plot("Gamma Distribution (k=7.5, θ=1)", hist, edges, x, pdf, cdf)

# Weibull Distribution

lam, k = 1, 1.25
measured = lam*(-np.log(np.random.uniform(0, 1, 1000)))**(1/k)
hist, edges = np.histogram(measured, density=True, bins=50)

x = np.linspace(0.0001, 8, 1000)
pdf = (k/lam)*(x/lam)**(k-1) * np.exp(-(x/lam)**k)
cdf = 1 - np.exp(-(x/lam)**k)

p4 = make_plot("Weibull Distribution (λ=1, k=1.25)", hist, edges, x, pdf, cdf)

show(gridplot([p1,p2,p3,p4], ncols=2, width=400, height=400, toolbar_location=None))

DatetimeIndex(['1957-01-01', '1957-02-01', '1957-03-01', '1957-04-01',
               '1957-05-01', '1957-06-01', '1957-07-01', '1957-08-01',
               '1957-09-01', '1957-10-01',
               ...
               '2020-12-01', '2021-01-01', '2021-02-01', '2021-03-01',
               '2021-04-01', '2021-05-01', '2021-06-01', '2021-07-01',
               '2021-08-01', '2021-09-01'],
              dtype='datetime64[ns]', length=777, freq=None)

In [68]:
np.log(y).diff()

1957-01-01         NaN
1957-02-01    0.000000
1957-03-01    0.006993
1957-04-01    0.003478
1957-05-01    0.000000
                ...   
2021-05-01    0.007002
2021-06-01    0.008392
2021-07-01    0.003330
2021-08-01    0.001292
2021-09-01    0.001348
Length: 777, dtype: float64