<a href="https://colab.research.google.com/github/jingjieyuan573-bite/Composite_Distribution_analysis/blob/main/composite_tail_ks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#tail-ks
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import norm
from scipy.integrate import quad
from scipy.interpolate import interp1d
from scipy import stats
import warnings
warnings.filterwarnings("ignore")

# -----------------------------
# 1. 读取数据 & 计算收益率
# -----------------------------
file_path = r"D:\TSLA1.xlsx"  # 替换为你的文件路径
df = pd.read_excel(file_path)

# 假设收盘价列为 "close"
close_prices = df["close"].dropna()
close_prices = close_prices[close_prices > 0]

# 对数收益率 (%)
returns = 100 * np.log(close_prices / close_prices.shift(1))
returns = returns.dropna().values

print("收益率样本大小:", len(returns))
print("基本统计信息:")
print(pd.Series(returns).describe())

# -----------------------------
# 2. 定义 Skew-Normal PDF & CDF
# -----------------------------
xi = 4.8892
omega = 7.4419
alpha = -2.2178

def skew_normal_pdf(x, xi, omega, alpha):
    z = (x - xi) / omega
    return (2 / omega) * norm.pdf(z) * norm.cdf(alpha * z)

def skew_normal_cdf(x, xi, omega, alpha):
    # 数值积分求 CDF
    return quad(lambda t: skew_normal_pdf(t, xi, omega, alpha), -np.inf, x, limit=200)[0]

# -----------------------------
# 3. 建立数值 CDF 网格（加速尾部 KS）
# -----------------------------
x_min = np.percentile(returns, 0.1) - 1
x_max = np.percentile(returns, 99.9) + 1
xs_grid = np.linspace(x_min, x_max, 1000)
ys_grid = np.array([skew_normal_cdf(x, xi, omega, alpha) for x in xs_grid])
cdf_interp = interp1d(xs_grid, ys_grid, kind='linear', bounds_error=False, fill_value=(0.0,1.0))

# -----------------------------
# 4. Tail-KS 函数
# -----------------------------
def tail_ks(sample, model_cdf_interp, tail='right', theta=None):
    sample = np.asarray(sample)
    if tail == 'left':
        mask = sample <= theta
        sample_tail = np.sort(sample[mask])
        n = len(sample_tail)
        ecdf = np.arange(1, n+1) / n
        F_theta = float(model_cdf_interp(theta))
        model_vals = np.array([float(model_cdf_interp(x))/F_theta for x in sample_tail])
        D_vals = np.abs(ecdf - model_vals)
        idx = np.argmax(D_vals)
        D = D_vals[idx]
        return D, sample_tail[idx], sample_tail, ecdf, model_vals
    elif tail == 'right':
        mask = sample >= theta
        sample_tail = np.sort(sample[mask])
        n = len(sample_tail)
        ecdf = np.arange(1, n+1) / n
        F_theta = float(model_cdf_interp(theta))
        model_vals = np.array([(float(model_cdf_interp(x))-F_theta)/(1-F_theta) for x in sample_tail])
        D_vals = np.abs(ecdf - model_vals)
        idx = np.argmax(D_vals)
        D = D_vals[idx]
        return D, sample_tail[idx], sample_tail, ecdf, model_vals
    else:
        raise ValueError("tail must be 'left' or 'right'")

# -----------------------------
# 5. Tail-KS 计算
# -----------------------------
# 阈值选择（和 composite 保持一致）
theta_left = -1.4395
theta_right = 1.171

D_left, x_left, sample_tail_left, ecdf_left, model_left = tail_ks(returns, cdf_interp, tail='left', theta=theta_left)
D_right, x_right, sample_tail_right, ecdf_right, model_right = tail_ks(returns, cdf_interp, tail='right', theta=theta_right)

print("Left-tail KS D = {:.6f} at x = {:.4f} ({} obs)".format(D_left, x_left, len(sample_tail_left)))
print("Right-tail KS D = {:.6f} at x = {:.4f} ({} obs)".format(D_right, x_right, len(sample_tail_right)))

# -----------------------------
# 6. 绘图
# -----------------------------
fig, axes = plt.subplots(1,2, figsize=(12,5))
axes[0].step(sample_tail_left, ecdf_left, where='post', label='Empirical ECDF')
axes[0].plot(sample_tail_left, model_left, 'r.-', label='Model CDF')
axes[0].set_title("Left tail: D={:.4f}".format(D_left))
axes[0].legend()
axes[0].grid(True)

axes[1].step(sample_tail_right, ecdf_right, where='post', label='Empirical ECDF')
axes[1].plot(sample_tail_right, model_right, 'r.-', label='Model CDF')
axes[1].set_title("Right tail: D={:.4f}".format(D_right))
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.show()