In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import statsmodels.api as sm
from sklearn.mixture import GaussianMixture

# example data: replace `arr` with your array/series (target, preds, or residuals)
arr = np.array()  # e.g. residuals = preds - y

# 1) Visuals
plt.figure(figsize=(10,4))
plt.subplot(1,2,1)
plt.hist(arr, bins=40, density=True, alpha=0.6)
xs = np.linspace(arr.min(), arr.max(), 200)
kde = stats.gaussian_kde(arr)
plt.plot(xs, kde(xs), lw=2, label='KDE')
plt.title('Histogram + KDE')
plt.legend()

plt.subplot(1,2,2)
sm.qqplot(arr, line='45', fit=True)
plt.title('Q-Q plot (vs Normal)')
plt.tight_layout()
plt.show()

# 2) Numeric summary
print("count", len(arr))
print("mean", arr.mean(), "median", np.median(arr))
print("std", arr.std(ddof=1))
print("skewness", stats.skew(arr))
print("kurtosis (excess)", stats.kurtosis(arr))  # >0 heavy-tailed
print("percentiles", np.percentile(arr, [1,5,25,50,75,95,99]))

# 3) Normality tests
print("Shapiro-Wilk p:", stats.shapiro(arr).pvalue)        # small samples
print("Jarque-Bera p:", stats.jarque_bera(arr)[1])
print("Anderson-Darling statistic:", stats.anderson(arr, dist='norm'))

# 4) Fit simple distributions and compare log-likelihood / AIC
def fit_and_aic(dist_name):
    dist = getattr(stats, dist_name)
    params = dist.fit(arr)
    ll = np.sum(dist.logpdf(arr, *params))
    k = len(params)
    aic = 2*k - 2*ll
    return params, ll, aic

for dist_name in ['norm', 'lognorm', 'gamma', 'expon', 't']:
    try:
        params, ll, aic = fit_and_aic(dist_name)
        print(dist_name, "AIC:", aic, "params:", params)
    except Exception as e:
        print(dist_name, "failed:", e)

# 5) If multimodal, fit Gaussian Mixture
gmm = GaussianMixture(n_components=2, random_state=0).fit(arr.reshape(-1,1))
print("GMM weights:", gmm.weights_, "means:", gmm.means_.ravel(), "covars:", gmm.covariances_.ravel())


NameError: name 'your_series' is not defined