In [None]:
import numpy as np
import scipy as sp
from math import pi

In [None]:
xs = np.arange(-6, 6, 0.01)


def phi(x):
    return (2 * pi) ** (-1 / 2) * np.exp(-1 / 2 * x**2)


def ind(x, lb=-1 / 2, ub=1 / 2):
    return np.where((x >= lb) & (x < ub), 1 / (ub - lb), 0)

In [None]:
from matplotlib import pyplot as plt


plt.plot(xs, phi(xs))
plt.plot(xs, ind(xs, -1.2, 1.2))

In [None]:
xs = np.sort(sp.stats.norm().rvs(200)).reshape(-1, 1)

In [None]:
from sklearn.neighbors import KernelDensity
import numpy as np
for bw in [0.03, 0.1, 0.3, 1]:
    kde = KernelDensity(kernel='gaussian', bandwidth=bw).fit(xs)
    grid = np.arange(-5, 5, 0.01).reshape(-1,1)
    dens = np.exp(kde.score_samples(grid))
    plt.plot(grid, dens, label=f"h = {bw}")
plt.legend()
plt.show()

In [None]:
sp.stats.norm().pdf(grid).max()

In [None]:
from sklearn.neighbors import KernelDensity
import numpy as np

grid = np.arange(-5, 5, 0.01).reshape(-1, 1)
fig, axs = plt.subplots(1, 2, figsize=(16, 6))
for kernel, ax in zip(["gaussian", "tophat"], axs):
    ax.plot(
        grid, sp.stats.norm().pdf(grid), alpha=0.5, color="gray", linestyle="dashed"
    )
    for bw in [0.1, 0.3, 1, 3]:
        kde = KernelDensity(kernel=kernel, bandwidth=bw).fit(xs)
        dens = np.exp(kde.score_samples(grid))
        ax.plot(grid, dens, label=f"h = {bw}", alpha=0.5)
        ax.set_title(f"Kernel = {kernel}")
        ax.legend()
plt.show()

In [None]:
import numpy as np
import pandas as pd

hs = np.array([0.1, 0.5, 0.9, 0.98])
ds = np.array([1, 2, 5, 10, 20, 25])
df = pd.DataFrame([(h, d, h**d) for h in hs for d in ds], columns=["h", "d", "h**d"])

In [None]:
df.pivot(index="h", columns="d", values="h**d")

In [None]:
stats.uniform(-1, 2).support()

In [None]:
from scipy import stats
lb, ub = -1, 1
span = ub - lb
N = 100_000
d = 3
X = stats.uniform(lb, span).rvs((N, d))

In [None]:
np.abs(X) < 0.5

In [None]:
def fhat_0(X, h):
    N, d = X.shape
    return sum(np.apply_along_axis(all, 1, (np.abs(X) < h)))/ (N * (2 * h) ** d)

In [None]:
expected_fhat_0_1k = pd.Series({h: fhat_0(X, h) for h in np.linspace(0.01, 1, 201)})

In [None]:
expected_fhat_0.plot()
expected_fhat_0_10k.plot()
expected_fhat_0_1k.plot()

In [None]:
fhat_0(X, 0.3)

In [None]:
X[np.apply_along_axis(all, 1, (np.abs(X) < h))]

In [None]:
from sklearn.neighbors import KernelDensity

h=0.88

sk_fhat = KernelDensity(kernel="tophat", bandwidth=h).fit(X)
np.exp(sk_fhat.score([np.zeros_like(X[0,:])])), fhat_0(X, h)

In [None]:
import numpy as np
import pandas as pd

# hs = np.array([0.1, 0.5, 0.9, 0.98])
hs = [0.25, 0.5, 0.9, 0.95]
ds = np.array([1, 2, 5, 10, 20, 25])
ds = np.arange(1, 51, 1)
df = pd.DataFrame([(h, d, h**d) for h in hs for d in ds], columns=["h", "d", "h**d"])

In [None]:
from matplotlib import pyplot as plt

data = df.set_index(["d", "h"]).unstack()["h**d"]
data.plot(figsize=(12, 4))
plt.title("Proporción de las X dentro de un $d$-cubo de lado $h$")
# plt.xscale("log")

In [None]:
data.loc[50]

In [None]:
.95**50