In [6]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.impute import SimpleImputer
from stockPublicFunction import *
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingRegressor, HistGradientBoostingClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, accuracy_score

In [7]:
def analyze_stock(stock_name, stock_code, stock_type, revenue_per_share_yoy, price_data, revenue_per_share,
                  PB, revenue_t3m_avg, revenue_t3m_yoy, majority_shareholders_share_ratio, total_shareholders_count,
                  latest_close_price):
    """分析股票数据"""

    # 创建有效数据列表
    valid_data = [
        (revenue, price, rev_per_share)
        for revenue, price, rev_per_share in zip(revenue_t3m_yoy, price_data, revenue_per_share)
        if None not in (revenue, price, rev_per_share) and not (np.isnan(revenue) or np.isnan(price) or np.isnan(rev_per_share))
    ]

    if not valid_data:
        return None

    # 解包有效数据
    valid_revenue, valid_price, valid_rev_per_share = zip(*valid_data)

    # 对数据进行样条插值
    interpolated_revenue = spline_interpolation(np.array(valid_revenue))
    interpolated_price = spline_interpolation(np.array(valid_price))
    interpolated_rev_per_share = spline_interpolation(np.array(valid_rev_per_share))

    # 准备时间序列数据
    price_series = interpolated_price.reshape(-1, 1)
    revenue_series = interpolated_revenue.reshape(-1, 1)
    rev_per_share_series = interpolated_rev_per_share.reshape(-1, 1)

    # 设置权重：对负的营收可赋予更高的负权重
    revenue_weights = np.where(revenue_series < 0, 2.0, 1.0)

    # 正规化与归一化数据，加入权重参数
    revenue_normalized, _, scaler_X1 = normalize_and_standardize_data_weight(revenue_series, weights=revenue_weights)
    rev_per_share_normalized, _, scaler_X2 = normalize_and_standardize_data(rev_per_share_series)
    price_normalized, min_max_scaler_y, scaler_y = normalize_and_standardize_data(price_series)

    # 准备数据用于回归和分类模型
    X_combined = np.hstack((revenue_normalized.reshape(-1, 1), rev_per_share_normalized.reshape(-1, 1)))

    # 回归模型数据
    y_regression = price_normalized.flatten()  # 目标是预测价格

    # 划分回归模型的训练集和测试集
    X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_combined, y_regression, test_size=0.2, random_state=42)

    # 使用 GridSearchCV 进行超参数优化
    hgb_regressor = HistGradientBoostingRegressor(random_state=42)
    param_grid_reg = {
        'max_iter': [50, 100, 150],
        'learning_rate': [0.01, 0.1, 0.2],
        'max_depth': [3, 5, 7],
        'min_samples_split': [2, 5, 10],
        'max_leaf_nodes': [10, 20, 30]
    }
    grid_search_reg = GridSearchCV(hgb_regressor, param_grid_reg, scoring='neg_mean_squared_error', cv=5, n_jobs=-1)
    grid_search_reg.fit(X_train_reg, y_train_reg)

    # 获取最佳回归模型
    regressor_model = grid_search_reg.best_estimator_

    # 预测和评估
    y_pred_reg = regressor_model.predict(X_test_reg)
    final_mse = mean_squared_error(y_test_reg, y_pred_reg)

    # 使用最新数据进行回归预测
    current_feature = np.array([[revenue_t3m_yoy[-1], revenue_per_share[-1]]])
    current_feature_scaled = np.hstack((
        scaler_X1.transform(current_feature[:, 0].reshape(-1, 1)),
        scaler_X2.transform(current_feature[:, 1].reshape(-1, 1))
    ))
    estimated_price_scaled = regressor_model.predict(current_feature_scaled)
    estimated_price = scaler_y.inverse_transform(estimated_price_scaled.reshape(-1, 1)).ravel()[0]

    # 计算价格差异
    price_difference = estimated_price - latest_close_price
    price_diff_percentage = price_difference / latest_close_price * 100

    if abs(price_diff_percentage) > 60:
        color = 'darkred' if latest_close_price > estimated_price else 'lightseagreen'
        action = '强力卖出' if latest_close_price > estimated_price else '强力买入'
    elif 30 <= abs(price_diff_percentage) <= 60:
        color = 'red' if latest_close_price > estimated_price else 'green'
        action = '卖出' if latest_close_price > estimated_price else '买入'
    else:
        color = 'black'
        action = ''

    result_message = (f'<span style="color: {color};">{stock_name} {stock_code} ({stock_type}) - '
                      f'实际股价: {latest_close_price:.2f}, 推算股价: {estimated_price:.2f} ({price_diff_percentage:.2f}%) {action} '
                      f'MSE: {final_mse:.2f} </span><br>')

SyntaxError: unterminated string literal (detected at line 90) (2553182416.py, line 90)

In [None]:
def main():
    NUM_DATA_POINTS = 40  # 控制要使用的数据点数量
    FETCH_LATEST_CLOSE_PRICE_ONLINE = False  # 設置為 True 以從線上獲取最新股價，False 則使用本地文>件數據
    output_file_name = 'ridge.html'  # 输出文件名
    results = []  # 收集结果以便于同时写入文件和屏幕显示

    # 确保输出目录存在
    if not os.path.exists('docs'):
        os.makedirs('docs')

    with open('stockList.txt', 'r', encoding='utf-8') as file_list:
        lines = file_list.readlines()

    for line in lines:
        parts = line.strip().split(' ')
        if len(parts) != 3:
            continue

        stock_code = parts[0]
        stock_name = parts[1]
        stock_type = parts[2]

        try:
            (revenue_per_share_yoy, price_data, revenue_per_share, PB,
             revenue_t3m_avg, revenue_t3m_yoy, majority_shareholders_share_ratio,
             total_shareholders_count, latest_close_price) = fetch_stock_data(NUM_DATA_POINTS, FETCH_LATEST_CLOSE_PRICE_ONLINE,  stock_code)

            result = analyze_stock(stock_name, stock_code, stock_type, revenue_per_share_yoy, price_data,
                                   revenue_per_share, PB, revenue_t3m_avg, revenue_t3m_yoy,
                                   majority_shareholders_share_ratio, total_shareholders_count,
                                   latest_close_price)

            if result:
                print(result)
                results.append(result)

        except ValueError as e:
            error_message = f"<p>处理股票 {stock_code} 时出错: {e}</p>"
            # 收集错误信息
            results.append(error_message)

    # 写入 HTML 文件
    with open(f'docs/{output_file_name}', 'w', encoding='utf-8') as file:
        file.write('<html><head><title>股票分析结果</title></head><body>\n')
        file.write('<h1>股票分析结果</h1>\n')
        for result in results:
            file.write(result)
        file.write('</body></html>\n')


    # 打印实际使用的数据点数量
    if 'price_data' in locals():
        num_data_points_used = len(price_data)
        print(f"本次使用了 {num_data_points_used} 个数据点分析")

In [None]:
if __name__ == "__main__":
    main()
