In [68]:
import pandas as pd
import yfinance as yf
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Dense, Flatten, LayerNormalization
import transformers 
import matplotlib.pyplot as plt
import plotly.graph_objects as go


In [69]:
def get_stock_data(stock_symbols, period='1y'):
    stock_data = {}
    for symbol in stock_symbols:
        try:
            stock_data[symbol] = yf.download(symbol, period=period)['Close']
        except Exception as e:
            print(f"Error downloading data for {symbol}: {e}")
    return pd.DataFrame(stock_data)


In [70]:
from io import BytesIO
from zipfile import ZipFile
import requests

def get_fama_french_factors():
    url = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_daily_CSV.zip"
    response = requests.get(url)
    with ZipFile(BytesIO(response.content)) as zipfile:
        with zipfile.open('F-F_Research_Data_Factors_daily.CSV') as file:
            ff_data = pd.read_csv(file, skiprows=3)
            # 移除最后一行的版权信息
            ff_data = ff_data[:-1]
            # 重命名未命名的列
            ff_data.rename(columns={'Unnamed: 0': 'Date'}, inplace=True)
            ff_data['Date'] = pd.to_datetime(ff_data['Date'], format='%Y%m%d')
            ff_data = ff_data.set_index('Date')
            ff_data = ff_data.rename(columns={'Mkt-RF': 'Mkt_RF', 'RF': 'Risk_Free'})
    return ff_data[['Mkt_RF', 'SMB', 'HML', 'Risk_Free']]


In [71]:
def align_data(stock_data, factors):
    common_dates = stock_data.index.intersection(factors.index)
    stock_data = stock_data.loc[common_dates]
    factors = factors.loc[common_dates]
    return stock_data, factors

In [72]:
def generate_arbitrage_portfolio(stock_data, factors):
    stock_returns = stock_data.pct_change().dropna()
    print(len(stock_returns))
    factors = factors.loc[stock_returns.index]
    print(len(factors))
    factors = factors[['Mkt_RF', 'SMB', 'HML']]
    
    betas = np.linalg.lstsq(factors, stock_returns, rcond=None)[0]
    residuals = stock_returns - factors @ betas
    return residuals


In [73]:
def build_cnn_model(input_shape):
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(LayerNormalization())
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dense(1, activation='linear'))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model


In [74]:
def train_model(stock_data, factors):
    residuals = generate_arbitrage_portfolio(stock_data, factors)
    print(residuals)
    factors = factors.loc[residuals.index]
    
    # 检查数据中的NaN和无穷大值
    print("检查输入数据和目标数据")
    print(f"factors中NaN值的数量: {np.isnan(factors).sum()}")
    print(f"factors中无穷大值的数量: {np.isinf(factors).sum()}")
    print(f"residuals中NaN值的数量: {np.isnan(residuals).sum()}")
    print(f"residuals中无穷大值的数量: {np.isinf(residuals).sum()}")
    
    X_train, X_test, y_train, y_test = train_test_split(factors, residuals, test_size=0.2, random_state=42)
    
    model = build_cnn_model((X_train.shape[1], 1))
    model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))
    
    return model


In [75]:
def generate_trading_signals(model, factors, threshold=1.5):
    signals = model.predict(factors)
    buy_signal = signals < -threshold
    sell_signal = signals > threshold
    close_signal = (signals > -threshold) & (signals < threshold)
    return buy_signal, sell_signal, close_signal


In [76]:
def backtest_strategy(stock_data, buy_signal, sell_signal, close_signal, initial_capital=100000):
    capital = initial_capital
    position = 0
    portfolio_values = []

    for i in range(len(stock_data)):
        if buy_signal[i]:
            position = capital / stock_data[i]
        elif sell_signal[i]:
            capital = position * stock_data[i]
            position = 0
        portfolio_values.append(capital + position * stock_data[i])
        
    return portfolio_values


In [77]:
def visualize_results(stock_data, portfolio_values, buy_signal, sell_signal):
    fig = go.Figure()
    
    fig.add_trace(go.Scatter(x=stock_data.index, y=stock_data, mode='lines', name='Stock Price'))
    fig.add_trace(go.Scatter(x=stock_data.index[buy_signal], y=stock_data[buy_signal], mode='markers', marker=dict(color='green', size=10), name='Buy Signal'))
    fig.add_trace(go.Scatter(x=stock_data.index[sell_signal], y=stock_data[sell_signal], mode='markers', marker=dict(color='red', size=10), name='Sell Signal'))
    
    fig.add_trace(go.Scatter(x=stock_data.index, y=portfolio_values, mode='lines', name='Portfolio Value'))
    
    fig.show()


In [78]:
if __name__ == "__main__":
    stock_symbols = ['AAPL', 'MSFT', 'GOOG']
    stock_data = get_stock_data(stock_symbols)
    
    ff_factors = get_fama_french_factors()
    
    stock_data, ff_factors = align_data(stock_data, ff_factors)
    
    model = train_model(stock_data, ff_factors)
    
    common_dates = stock_data.index.intersection(ff_factors.index)
    factors = ff_factors[['Mkt_RF', 'SMB', 'HML']].loc[common_dates]
    stock_data = stock_data.loc[common_dates]
    buy_signal, sell_signal, close_signal = generate_trading_signals(model, factors)
    
    portfolio_values = backtest_strategy(stock_data['AAPL'], buy_signal, sell_signal, close_signal)
    
    visualize_results(stock_data['AAPL'], portfolio_values, buy_signal, sell_signal)


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


210
210
             0   1   2  AAPL  GOOG  MSFT
Date                                    
2023-08-02 NaN NaN NaN   NaN   NaN   NaN
2023-08-03 NaN NaN NaN   NaN   NaN   NaN
2023-08-04 NaN NaN NaN   NaN   NaN   NaN
2023-08-07 NaN NaN NaN   NaN   NaN   NaN
2023-08-08 NaN NaN NaN   NaN   NaN   NaN
...         ..  ..  ..   ...   ...   ...
2024-05-24 NaN NaN NaN   NaN   NaN   NaN
2024-05-28 NaN NaN NaN   NaN   NaN   NaN
2024-05-29 NaN NaN NaN   NaN   NaN   NaN
2024-05-30 NaN NaN NaN   NaN   NaN   NaN
2024-05-31 NaN NaN NaN   NaN   NaN   NaN

[210 rows x 6 columns]
检查输入数据和目标数据
factors中NaN值的数量: Mkt_RF       0
SMB          0
HML          0
Risk_Free    0
dtype: int64
factors中无穷大值的数量: Mkt_RF       0
SMB          0
HML          0
Risk_Free    0
dtype: int64
residuals中NaN值的数量: 0       210
1       210
2       210
AAPL    210
GOOG    210
MSFT    210
dtype: int64
residuals中无穷大值的数量: 0       0
1       0
2       0
AAPL    0
GOOG    0
MSFT    0
dtype: int64
Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: nan - val_loss: nan
Epoch 2/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: nan - val_loss: nan
Epoch 3/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: nan - val_loss: nan
Epoch 4/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: nan - val_loss: nan
Epoch 5/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: nan - val_loss: nan
Epoch 6/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: nan - val_loss: nan
Epoch 7/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: nan - val_loss: nan
Epoch 8/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: nan - val_loss: nan
Epoch 9/50
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: nan - val_loss: nan
Epoch 10/50
[1m6/6

ValueError: Exception encountered when calling Sequential.call().

[1mInput 0 of layer "dense_4" is incompatible with the layer: expected axis -1 of input shape to have value 128, but received input with shape (32, 64)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(32, 3), dtype=float32)
  • training=False
  • mask=None