<a href="https://colab.research.google.com/github/jingjieyuan573-bite/Composite_Distribution_analysis/blob/main/composite_skew_t.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import norm, t
from scipy.integrate import quad
from scipy.optimize import minimize
from scipy import stats

In [None]:
def skew_t_pdf(x, xi_t, omega_t, alpha_t, nu):
    # 标准化变量
    z = (x - xi_t) / omega_t
    # 计算 t 分布的 PDF
    t_pdf = t.pdf(z, df=nu)
    # 计算调整因子
    a = alpha_t * z * np.sqrt((nu + 1) / (nu + z**2))
    t_cdf = t.cdf(a, df=nu + 1)
    # Skew-t 分布的 PDF
    return (2 / omega_t) * t_pdf * t_cdf

#skew-t_CDF
def skew_t_cdf(x, xi_t, omega_t, alpha_t, nu):
    return quad(lambda t: skew_t_pdf(t, xi_t, omega_t, alpha_t, nu), -np.inf, x)[0]

def composite_f1(x , xi_t, omega_t, alpha_t, nu , theta1 , theta2):
    if x <= theta1:
         return  1 / skew_t_cdf(theta1, xi_t, omega_t, alpha_t, nu) * skew_t_pdf(x, xi_t, omega_t, alpha_t, nu)
    else:
         return  1 / (1 - skew_t_cdf(theta2, xi_t, omega_t, alpha_t, nu)) * skew_t_pdf(x, xi_t, omega_t, alpha_t, nu)

In [None]:
# 读取 Excel 文件
file_path = r"D:\HS300.xlsx"  # 替换为你的文件路径
df = pd.read_excel(file_path)

# 显示数据信息
print("数据前 5 行：")
print(df.head())

print("\n数据基本信息：")
print(df.info())

print("\n数据统计信息：")
print(df.describe())
close_price = df["收盘价"]  # 替换为你的列名

# 打印第二列数据
print(type(close_price))

#close_price = close_price.astype(float)
print(close_price)
# 计算对数收益率
log_return = 100 * ( np.log(close_price / close_price.shift(1)).dropna())
print("计算结果：")
print(log_return)

statistics = log_return.describe()
print(statistics)

# 计算偏度和峰度
skewness = stats.skew(log_return)
kurtosis = stats.kurtosis(log_return)

print("偏度：", skewness)
print("峰度：", kurtosis)

#转化为数组
log_return = log_return.values

import seaborn as sns
# 绘制直方图
sns.histplot(log_return, bins=80, kde=True, color='blue')
plt.title("Histogram")
plt.xlabel("Value")
plt.ylabel("Frequency")
plt.show()

In [None]:
theta1 = -1.4395
theta2 = 1.7066

f1_data = [x for x in log_return if x <= theta1 or x >= theta2]


In [None]:
# 定义复合分布的负对数似然函数
def neg_log_likelihood(params, data, theta1 , theta2):
    xi_t, omega_t, alpha_t, nu = params
    pdf_values = [composite_f1(x, xi_t, omega_t, alpha_t, nu, theta1 , theta2) for x in f1_data]
    pdf_values = np.clip(pdf_values, 1e-10, None)  # 避免概率密度为 0
    return -np.sum(np.log(pdf_values))

In [None]:
x=[round(x, 2) for x in f1_data]
#initial_guess = [0.1677, 2.5634, -0.0192 ,3.0587]  # [xi, omega, alpha ,nu ]
initial_guess = [np.mean(x), np.std(x), -1 , 4.0]  # [xi, omega, alpha ,nu ]

# 参数约束
bounds = [(None, None), (1e-6, None), (None, None) , (2.1, None)]  # omega > 0, nu > 2

# 最大似然估计
result = minimize(neg_log_likelihood, initial_guess, args=(x,theta1,theta2) , bounds=bounds)

# 估计的参数
xi_t_hat , omega_t_hat , alpha_t_hat , nu_hat  = result.x

print(f"Estimated xi: {xi_t_hat}")
print(f"Estimated omega: {omega_t_hat}")
print(f"Estimated alpha: {alpha_t_hat}")
print(f"Estimated nu: {nu_hat}")
