This notebooks checks the approximation quality of a distribution derived from quantile values.

In [1]:
import numpy as np
import math
import scipy.stats as stats
from matplotlib import rcParams
import matplotlib.pyplot as plt
import seaborn as sns
from naslib.optimizers.bananas.distribution import PointwiseInterpolatedDist

In [2]:
sns.set_style("darkgrid")
rcParams['axes.titlepad'] = 20 
rcParams['font.size'] = 9

In [4]:
def fit_distribution(samples, num_quantiles) -> PointwiseInterpolatedDist:
    sample_size = len(samples)
    # estimate standard deviation and quantiles
    std = np.std(samples)

    pk = np.linspace(0, 1, num_quantiles + 1)
    qk = []
    for p in pk:
        adj_p = min(math.ceil((sample_size + 1) * p) / sample_size, 1.0)
        qk.append(np.quantile(samples, adj_p))
    
    return PointwiseInterpolatedDist(values=(pk, qk), std=std)


In [5]:
num_quantiles = 20
sample_size = 100
true_dist = stats.skewnorm(loc=0, scale=1, a=5)
true_dist.name = "Skewed Gaussian Distribution"

samples = true_dist.rvs(size=sample_size)
pointwise_dist = fit_distribution(samples=samples, num_quantiles=num_quantiles)
discrete_dist = stats.rv_discrete(values=(samples, np.array([1 / len(samples)] * len(samples))))

In [None]:
pointwise_dist.densities

In [None]:
pointwise_dist.intervals

In [8]:
def _mid(interval):
    left, right = interval
    return (right + left) / 2

interval_centers = list(map(_mid, pointwise_dist.intervals))

### Density Plot

In [9]:
dist_name = true_dist.name
mean = true_dist.mean()
std = true_dist.std()

In [None]:
fig = plt.figure()

title = f"{dist_name} (mean={round(mean, 2)}, std={round(std, 2)})"
plt.title(title)
# plot true distribution
x = np.linspace(mean - 3 * std, mean + 3 * std, 100)
plt.plot(x, true_dist.pdf(x), label="Actual", alpha=0.8)

# plot estimated distribution 
plt.step(interval_centers, pointwise_dist.densities, where="mid", label="Estimated", alpha=0.8)
plt.legend(loc="upper right")
plt.xlabel("x")
plt.ylabel("pdf(x)")
plt.tight_layout()

In [11]:
# save
# fig.savefig(output_dir + f"/{title}_pdf_plot.pdf")

### CDF Plot

In [None]:
fig = plt.figure()

title = f"{dist_name} (mean={round(mean, 2)}, std={round(std, 2)})"
plt.title(title)
# plot true distribution
x = np.linspace(mean - 3 * std, mean + 3 * std, 100)
plt.plot(x, true_dist.cdf(x), label="Actual", alpha=0.8)

# plot estimated distribution 
cdfs = [pointwise_dist.cdf(i) for i in x]
plt.plot(x, cdfs, label="Estimated", alpha=0.8)
plt.legend(loc="lower right")
plt.xlabel("x")
plt.ylabel("cdf(x)")
plt.tight_layout()

In [53]:
# save
# fig.savefig(output_dir + f"/{title}_cdf_plot.pdf")

### Sampling Histogram Plot

In [None]:
n_samples = len(samples)
true_samples = true_dist.rvs(size=n_samples)
bins = np.histogram_bin_edges(true_samples, bins=num_quantiles)


fig = plt.figure()
title = f"{dist_name} (mean={round(mean, 2)}, std={round(std, 2)})"
plt.title(title)


# plot true distribution
plt.hist(true_samples, bins=bins, alpha=0.6, label="Actual")

# plot estimated distribution 
plt.hist(pointwise_dist.rvs(size=n_samples), bins=bins, alpha=0.6, label="Estimated")


plt.legend(loc="upper right") 
plt.xlabel("value")
plt.ylabel("Frequency")
plt.tight_layout()

In [60]:
# save
# fig.savefig(output_dir + f"/{title}_rvs_plot.pdf")