In [None]:
import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt
from datetime import datetime
from scipy.stats import zscore
from statsmodels.tsa.stattools import grangercausalitytests
import jieba
from snownlp import SnowNLP
import tushare as ts
import akshare as ak
from statsmodels.tsa.stattools import adfuller, coint, kpss,grangercausalitytests
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import statsmodels.api as sm
from linearmodels.panel import PanelOLS, RandomEffects, compare
from linearmodels.iv import IV2SLS
from scipy import stats
from statsmodels.stats.outliers_influence import variance_inflation_factor

In [None]:
# 平稳性检验
# 单位根检验
def adf_test(series):
    target = adfuller(series)
    print('ADF Statistic: {}'.format(target[0]))
    print('p-value: {}'.format(target[1]))
    print('Critical Values:')
    for key, value in target[4].items():
        print('\t{}: {}'.format(key, value))
    if target[1] <= 0.05:
        print("序列平稳")
    else:
        print("序列不平稳")

In [None]:
def stock_test(result):
    for column in result.columns:
        print(f"单位根检验 - {column}:")
        adf_test(result[column])

In [None]:
def stock_diff_process(data_processed):
    for column in ['pe_new', 'pb_new', 'turnover_rate_new']:
        data_processed[column] = data_processed[column].diff()
    return data_processed

In [None]:
def test_stationarity(series, alpha=0.05):
    # ADF检验
    adf_result = adfuller(series.dropna())
    print(f"ADF Statistic: {adf_result[0]:.4f}")
    print(f"ADF p-value: {adf_result[1]:.4f}")
    print("ADF结论: 序列平稳" if adf_result[1] < alpha else "ADF结论: 序列非平稳")

    # KPSS检验
    kpss_result = kpss(series.dropna())
    print(f"\nKPSS Statistic: {kpss_result[0]:.4f}")
    print(f"KPSS p-value: {kpss_result[1]:.4f}")
    print("KPSS结论: 序列趋势平稳" if kpss_result[1] > alpha else "KPSS结论: 序列存在单位根")

# 对每个变量执行检验

## 数据载入

In [None]:
data = result[['trade_date', 'sentiment_index', 'sentiment_index_lag_1', 'sentiment_index_square', 'agreement_index',
               'returns_new', 'volatility', 'log_volatility', 'pe_new', 'pe', 'pb', 'pb_new', 'm2_yoy',
               'turnover_rate_new']].copy()

## 数据处理

In [None]:
# 初步数据处理，标注化
# 1.1 Z-Score标准化（适用于收益率、波动率）
cols_to_standardize = ['returns_new', 'log_volatility', 'sentiment_index', 'sentiment_index_lag_1']
data_std = data[cols_to_standardize].apply(lambda x: (x - x.mean()) / x.std())


In [None]:
# 1.2 Min-Max标准化（适用于交易量）
cols_to_minmax = []
# data_minmax = data[cols_to_minmax].apply(lambda x: (x - x.min())/(x.max() - x.min()))

In [None]:
# 合并标准化后数据（保留原始数据）
data_processed = pd.concat([data.drop(columns=cols_to_standardize + cols_to_minmax),
                            data_std], axis=1)

## 单位根平稳性检验

In [None]:
for stock_id in stock_id_list:
    test_data = data_processed[data_processed['ts_code'] == stock_id]
    print(f"\n=== {stock_id} 平稳性检验 ===")
    stock_test(dtest_data)
    test_data = stock_diff_process(test_data)

    for column in ['pe_new', 'pb_new', 'turnover_rate_new']:
        print(f"\n=== {stock_id}_{column} 平稳性检验 ===")
        test_stationarity(test_data[column])
