<a href="https://colab.research.google.com/github/jingjieyuan573-bite/Composite_Distribution_analysis/blob/main/composite_skew_normal.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import norm, t
from scipy.integrate import quad
from scipy.optimize import minimize
from scipy import stats

In [None]:
def skew_normal_pdf(x, xi, omega, alpha):
    # 标准化变量
    z = (x - xi) / omega
    # 计算标准正态分布的 PDF 和 CDF
    phi_z = norm.pdf(z)
    Phi_alpha_z = norm.cdf(alpha * z)
    # Skew-Normal 分布的 PDF
    return (2 / omega) * phi_z * Phi_alpha_z

#skew-normal 分布的 CDF
def skew_normal_cdf(x, xi, omega, alpha):
    return quad(lambda t: skew_normal_pdf(t, xi, omega, alpha), -np.inf, x)[0]

def composite_f2(x, xi, omega , alpha , theta1 , theta2):
    return  (1 / (skew_normal_cdf(theta2, xi, omega, alpha) - skew_normal_cdf(theta1, xi, omega, alpha))) * skew_normal_pdf(x, xi, omega, alpha)

In [None]:
# 读取 Excel 文件
file_path = r"D:\HS300.xlsx"  # 替换为你的文件路径
df = pd.read_excel(file_path)

# 显示数据信息
print("数据前 5 行：")
print(df.head())

print("\n数据基本信息：")
print(df.info())

print("\n数据统计信息：")
print(df.describe())
close_price = df["收盘价"]  # 替换为你的列名

# 打印第二列数据
print(type(close_price))

#close_price = close_price.astype(float)
print(close_price)
# 计算对数收益率
log_return = 100* ( np.log(close_price / close_price.shift(1)).dropna())
print("计算结果：")
print(log_return)

statistics = log_return.describe()
print(statistics)

# 计算偏度和峰度
skewness = stats.skew(log_return)
kurtosis = stats.kurtosis(log_return)

print("偏度：", skewness)
print("峰度：", kurtosis)

#转化为数组
log_return = log_return.values

import seaborn as sns
# 绘制直方图
sns.histplot(log_return, bins=200, kde=True, color='blue')
plt.title("Histogram")
plt.xlabel("Value")
plt.ylabel("Frequency")
plt.show()

In [None]:
theta1 = -1.4395
theta2 = 1.7066

f2_data = [x for x in log_return if theta1 <= x <= theta2]


In [None]:
# 定义复合分布的负对数似然函数
def neg_log_likelihood(params, data, theta1 , theta2):
    xi, omega, alpha = params
    pdf_values = [composite_f2(x, xi, omega , alpha , theta1 , theta2) for x in f2_data]
    pdf_values = np.clip(pdf_values, 1e-10, None)  # 避免概率密度为 0
    return -np.sum(np.log(pdf_values))

In [None]:
x=[round(x, 2) for x in f2_data]
#initial_guess = [0, 1, -1]  # [xi, omega, alpha ]
initial_guess = [np.mean(x), np.std(x), -1]  # [xi, omega, alpha ]

# 参数约束
bounds = [(None, None), (1e-6, None), (None, None)]  # omega > 0

# 最大似然估计
result = minimize(neg_log_likelihood, initial_guess, args=(x, theta1 , theta2) , bounds=bounds)

# 估计的参数
xi_hat, omega_hat, alpha_hat  = result.x

print(f"Estimated xi: {xi_hat}")
print(f"Estimated omega: {omega_hat}")
print(f"Estimated alpha: {alpha_hat}")
