In [1]:
import numpy as np
import matplotlib.pyplot as plt

# Test for discrete array

In [8]:
import numpy as np

def to_linear_cdf(x, f):
    
    assert isinstance(x, np.ndarray)
    
    _mid_arr = 0.5 * (f[1:] + f[:-1])
    _norm = np.sum(_mid_arr * np.diff(x))
    _pdf = _mid_arr / _norm

    _pdf_dx = np.empty(x.size)
    _pdf_dx[0] = 0.0
    _pdf_dx[1:] = _pdf * np.diff(x)
    _cdf = np.add.accumulate(_pdf_dx)
    
    return _cdf


def quantile(x, f, q, return_cdf=False):
    
    assert np.all((0 <= q) & (q < 1.0))

    _cdf = to_linear_cdf(x, f)
    
    _xq_arr, _xq = np.empty_like(q), None
    for _i in range(q.size):
        _q0 = q[_i]
        if _q0 == 1.0: _xq = 1.0
        else:
            _ind = np.where((_cdf - _q0) > 0)[0][0] - 1
            _p = (_q0 - _cdf[_ind]) / (_cdf[_ind+1] - _cdf[_ind])
            _xq = _p * x[_ind+1] + (1 - _p) * x[_ind]
        _xq_arr[_i] = _xq
    
    if not return_cdf: return _xq_arr
    else: return (_xq_arr, _cdf)

In [9]:
x_arr = np.linspace(-2, 5, 30)
f = np.exp(-x_arr*x_arr) + 0.5 * np.exp(-3*(x_arr-3)*(x_arr-3))
q_arr = np.linspace(0.1, 0.9, 9)

In [10]:
xq_arr, cdf = quantile(x_arr, f, q_arr, return_cdf=True)

In [None]:
figs, axs = plt.subplots()

axs.plot(x_arr, cdf, color='darksalmon')
axs.plot(xq_arr, q_arr, '.')
axs.set_ylabel("CDF")

ax2 = axs.twinx()
ax2.plot(x_arr, f)
ax2.set_ylabel("original data")

Text(0, 0.5, 'original data')

In [2]:
fig, (ax, axn) = plt.subplots(nrows=2, figsize=(7,8))

lf, = ax.plot(x_arr, f, 'o', color='darksalmon')
ax.vlines(x_arr, 0, f, linewidth=2.0, colors=lf.get_color())
baa = ax.bar(x_arr[:-1], mid_arr, align='edge', width=np.diff(x_arr))
ax.legend(
    (lf, baa), 
    ("origial discrete data", "corresponding continuous distribution"))

axn.plot(x_arr, cdf)
axn.plot(xq_arr, q_arr, '.')
axn.hlines(q_arr, x_arr[0], x_arr[-1])

In [5]:
# fig.savefig("data-to-cdf-two-gaussian-test-1.png")