# DDP detention time: histogram and density (<185 days)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# If SciPy is available, we'll use it; otherwise we'll fall back to pandas' KDE
try:
    from scipy.stats import gaussian_kde
    HAVE_SCIPY = True
except Exception:
    HAVE_SCIPY = False

# 1) Load data (tab-delimited)
df = pd.read_csv("DDPdata.csv", sep="\t")

# 2) Filter to nonnegative and <185 days
x = df["detentiontime"].dropna()
x = x[(x >= 0) & (x < 185)].astype(float)
print(f"N in range [0, 185): {x.shape[0]:,}")
print(df.head())
print(df.info())


In [None]:
# 3) Histogram
plt.figure(figsize=(8,6))
plt.hist(x.values, bins=45, edgecolor="black")
plt.xlabel("Detention time (days)")
plt.ylabel("Number of individuals")
plt.title("Histogram of Detention Time (<185 days, 45 bins)")
plt.grid(axis="y", alpha=0.5)
plt.show()


In [None]:
# 4) Boundary-corrected KDE (reflection at 0)
import numpy as np

grid = np.linspace(0, 185, 800)

plt.figure(figsize=(8,6))
if HAVE_SCIPY:
    kde = gaussian_kde(x.values)
    dens = kde(grid) + kde(-grid)          # reflection at 0
    area = np.trapz(dens, grid)            # normalize on [0, 185]
    if area > 0:
        dens = dens / area
    plt.plot(grid, dens)
else:
    # Fallback to pandas' KDE (not boundary-corrected, but available everywhere)
    x.plot(kind="kde")

plt.xlabel("Detention time (days)")
plt.ylabel("Density")
plt.title("Kernel Density of Detention Time (<185 days)\nBoundary-corrected at 0 (reflection)")
plt.grid(axis="y", alpha=0.5)
plt.show()
