In [1]:
"""
参数回归-模型2-自适应权重-第一版

介绍：
在模型2-自适应权重-第一版的基础上，优化权重策略：离均值曲线越远的样本点，权重越高

"""
# todo 优化权重策略：离均值曲线越远的样本点，权重越高。

'\n参数回归-模型2-自适应权重版\n\n介绍：\n在模型2的基础上，在回归过程中添加了自适应权重。\n给局部最高值和最低值予以更高的权重，从而优化回归效果。\n\n'

In [56]:
# 导入包和数据
import math

import pandas as pd
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from lmfit import Model
from scipy.signal import find_peaks

# -------------------------------
# 1. 加载数据
# 假设CSV文件 'btc_prices.csv' 中包含 Date 和 Close 两列
df = pd.read_csv('btc_prices.csv', parse_dates=['open_date'], index_col='open_date')
df = df.sort_index()  # 确保按日期排序

# 将日期转换为数值（天数）用于回归分析
df['Time'] = np.arange(len(df))

x_data = df['Time']
y_data = df['open_price']
y_data_log = np.log(df['open_price'])

In [57]:
# 模型和参数

# 自定义参数
# DISTANCE_PERCENTAGE = 0.25 # 相邻极值之间的距离至少为0.25个涨跌周期
# VALLEY_WEIGHT = 0.4 # 极小值的权重是极大值的40%

# 模型
def fn_m(x, l:float, k:float, x0:float, c:float):
    return l*((x+x0)**k) + c

def fn_a(x, a:float):
    return a

def fn_s(x, x2: float, t:float):
    return np.sin(x/t + x2)

def fn_price(x, a:float, c:float, l:float, k:float, x0:float, x2:float, t:float):
    return fn_m(x, l, k, x0, c) * ( 1 + fn_a(x, a) * fn_s(x, x2, t))

def fn_log_price(x, a:float, c:float, l:float, k:float, x0:float, x2:float, t:float):
    prices = fn_price(x, a, c, l, k, x0, x2, t)
    # 将小于0的值都置为一个非常小的正数
    prices[prices < 0] = 1e-9
    return np.log(prices)


# 手动提供的参数
INIT_A = 0.8
INIT_C = 1
INIT_L = 600
INIT_K = 0.58
INIT_X0 = 1
# INIT_X1 = 600
INIT_X2 = math.pi * 0.7
INIT_T = 1400/math.pi/2

# # 回归获得的参数
# INIT_A = 5.68868564
# INIT_C = 291.096217
# INIT_L = 18.5985387
# INIT_K = 0.99999000
# INIT_X0 = 11.8565495
# INIT_X1 = 6.24777991
# INIT_X2 = 1.21494460
# INIT_T = 208.538659

# 创建 lmfit 模型
model = Model(fn_price)
model_log = Model(fn_log_price)

# 根据先验知识给出初始猜测，若无先验则可均设为1
# params = model.make_params(a=INIT_A, c=INIT_C, l=INIT_L, k=INIT_K, x0=INIT_X0, x2=INIT_X2, t=INIT_T)
params = model_log.make_params(a=INIT_A, c=INIT_C, l=INIT_L, k=INIT_K, x0=INIT_X0, x2=INIT_X2, t=INIT_T)

# 如果有参数边界、固定值或者其他约束，可使用：
params['a'].set(min=0+1e-9, max=1-1e-9) # a>0
params['c'].set(min=0+1e-9) # c>0
params['l'].set(min=0+1e-9) # L>0
params['k'].set(min=0+1e-9, max=1-1e-9) # 0<k<1
params['x0'].set(min=0+1e-9) # x0>0
params['x2'].set(min=0, max=2*math.pi) # 0<=x2<=2pi
params['t'].set(min=0) # T>0


# 准备数据
true_params = [INIT_A, INIT_C, INIT_L, INIT_K, INIT_X0, INIT_X2, INIT_T]
y_true = fn_price(x_data, *true_params) # 初始参数曲线
y_true_log = fn_log_price(x_data, *true_params) # 初始参数log曲线
y_true_m = fn_m(x_data, l=INIT_L, k=INIT_K, x0=INIT_X0, c=INIT_C) # 均值曲线

In [61]:
# 回归过程
# 初始拟合
result = model_log.fit(y_data_log, params, x=x_data, method='trust-constr')

new_params = result.best_values

result_exp = np.exp(result.best_fit)
result_exp_m = fn_m(x_data, l=new_params['l'], k=new_params['k'], x0=new_params['x0'], c=new_params['c'])

# 构造权重
weights = np.pow(np.abs(result_exp - result_exp_m) / result_exp_m, 3)

# 二次拟合 加入权重
tmp_params = model_log.make_params(
    a=new_params['a'],
    c=new_params['c'],
    l=new_params['l'],
    k=new_params['k'],
    x0=new_params['x0'],
    x2=new_params['x2'],
    t=new_params['t']
)
result = model_log.fit(y_data_log, params=tmp_params, x=x_data, weights=weights, method='trust-constr')
y_regression_fit = np.exp(result.best_fit)

# 输出拟合报告
print(result.fit_report())

# 最终参数
new_params = result.best_values

#
y_regression_fit_m = fn_m(x_data, l=new_params['l'], k=new_params['k'], x0=new_params['x0'], c=new_params['c'])
y_regression_fit_m_log = np.log(y_regression_fit_m)

[[Model]]
    Model(fn_log_price)
[[Fit Statistics]]
    # fitting method   = equality_constrained_sqp
    # function evals   = 1240
    # data points      = 2743
    # variables        = 7
    chi-square         = 1.24705886
    reduced chi-square = 4.5580e-04
    Akaike info crit   = -21096.1818
    Bayesian info crit = -21054.7642
    R-squared          = 0.76152604
    this fitting method does not natively calculate uncertainties
    and numdifftools is not installed for lmfit to do this. Use
    `pip install numdifftools` for lmfit to estimate uncertainties
    with this fitting method.
[[Variables]]
    a:   0.51310514 (init = 0.5190215)
    c:   58.0701422 (init = 57.04499)
    l:   21.3495347 (init = 16.78569)
    k:   0.99293232 (init = 1)
    x0:  165.971613 (init = 147.2876)
    x2:  1.07106131 (init = 1.25298)
    t:   198.644105 (init = 207.5277)


In [53]:
# 输出权重曲线
fig = make_subplots()

# 添加log价格曲线
fig.add_trace(go.Scatter(x=df.index, y=weights, mode='lines', name='权重'))


# 更新布局
fig.update_layout(
    title='日期-权重曲线',
    xaxis_title='日期',
    yaxis_title='权重',
    legend=dict(x=0, y=1)
)

# 显示图表
fig.show()

In [54]:
# 创建log图表
fig = make_subplots()

# 添加log价格曲线
fig.add_trace(go.Scatter(x=df.index, y=y_data_log, mode='lines', name='比特币log价格'))

# 添加初始参数拟合曲线
fig.add_trace(go.Scatter(x=df.index, y=y_true_log, mode='lines', name='初始参数log拟合曲线'))

# 添加初始参数均值曲线
fig.add_trace(go.Scatter(x=df.index, y=np.log(y_true_m), mode='lines', name='初始参数log均值曲线'))

# 添加拟合价格曲线
fig.add_trace(go.Scatter(x=df.index, y=result.best_fit, mode='lines', name='拟合log价格'))

# 添加拟合log均值曲线
fig.add_trace(go.Scatter(x=df.index, y=y_regression_fit_m_log, mode='lines', name='拟合log均值曲线'))

# 更新布局
fig.update_layout(
    title='比特币拟合log价格',
    xaxis_title='日期',
    yaxis_title='价格',
    legend=dict(x=0, y=1)
)

# 显示图表
fig.show()

In [55]:
# 创建价格图表
fig = make_subplots()

# 添加实际价格曲线
fig.add_trace(go.Scatter(x=df.index, y=y_data, mode='lines', name='比特币价格'))

# 添加初始参数拟合曲线
fig.add_trace(go.Scatter(x=df.index, y=y_true, mode='lines', name='初始参数拟合曲线'))

# 添加初始参数均值曲线
fig.add_trace(go.Scatter(x=df.index, y=y_true_m, mode='lines', name='初始参数均值曲线'))

# 添加拟合价格曲线
fig.add_trace(go.Scatter(x=df.index, y=y_regression_fit, mode='lines', name='拟合价格'))

# 添加拟合log均值曲线
fig.add_trace(go.Scatter(x=df.index, y=y_regression_fit_m, mode='lines', name='拟合均值曲线'))

# 更新布局
fig.update_layout(
    title='比特币拟合价格',
    xaxis_title='日期',
    yaxis_title='价格',
    legend=dict(x=0, y=1)
)

# 显示图表
fig.show()