In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# 读取数据
message_df = pd.read_csv('message_file.csv', header=None, names=['Time', 'Type', 'OrderID', 'Size', 'Price', 'Direction'])
orderbook_df = pd.read_csv('orderbook_file.csv', header=None, names=['AskPrice1', 'AskSize1', 'BidPrice1', 'BidSize1'])

# 将时间转换为相对时间(秒)
message_df['Time'] = message_df['Time'] - message_df['Time'].min()

# 提取特征和时间序列
best_bid_prices = orderbook_df['BidPrice1'].values.reshape(-1, 1)
best_ask_prices = orderbook_df['AskPrice1'].values.reshape(-1, 1)
time_stamps = message_df['Time']

# 标准化数据
scaler_bid = MinMaxScaler()
scaled_bid_prices = scaler_bid.fit_transform(best_bid_prices)

scaler_ask = MinMaxScaler()
scaled_ask_prices = scaler_ask.fit_transform(best_ask_prices)

# 准备输入序列
def create_sequences(data, sequence_length):
    X = []
    y = []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i+sequence_length])
        y.append(data[i+sequence_length])
    return np.array(X), np.array(y)

sequence_length = 100  # 可以根据需要调整
X_bid, y_bid = create_sequences(scaled_bid_prices, sequence_length)
X_ask, y_ask = create_sequences(scaled_ask_prices, sequence_length)

# 构建LSTM模型
def build_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(50, return_sequences=True, input_shape=input_shape))
    model.add(LSTM(50))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model

# 训练LSTM模型
lstm_bid = build_lstm_model((sequence_length, 1))
lstm_bid.fit(X_bid, y_bid, epochs=10, batch_size=32, verbose=1)

lstm_ask = build_lstm_model((sequence_length, 1))
lstm_ask.fit(X_ask, y_ask, epochs=10, batch_size=32, verbose=1)

# 生成新的数据
def generate_lstm_data(model, seed_data, n_steps):
    generated_data = []
    current_data = seed_data
    for _ in range(n_steps):
        pred = model.predict(current_data[np.newaxis, :, :])
        generated_data.append(pred[0, 0])
        current_data = np.append(current_data[1:], pred, axis=0)
    return np.array(generated_data)

generated_bid_prices = generate_lstm_data(lstm_bid, X_bid[-1], len(best_bid_prices))
generated_ask_prices = generate_lstm_data(lstm_ask, X_ask[-1], len(best_ask_prices))

# 反标准化数据
generated_bid_prices = scaler_bid.inverse_transform(generated_bid_prices.reshape(-1, 1))
generated_ask_prices = scaler_ask.inverse_transform(generated_ask_prices.reshape(-1, 1))

# 绘制实际最佳买价(Best Bid)和最佳卖价(Best Ask)以及生成的最佳买价和最佳卖价的多线图
plt.figure(figsize=(15, 10))
plt.plot(time_stamps.iloc[:len(best_bid_prices)], best_bid_prices / 10000, label='Real Best Bid', color='blue', alpha=0.7)
plt.plot(time_stamps.iloc[:len(best_ask_prices)], best_ask_prices / 10000, label='Real Best Ask', color='red', alpha=0.7)
plt.plot(time_stamps.iloc[:len(generated_bid_prices)], generated_bid_prices / 10000, label='Generated Best Bid', color='cyan', linestyle='--')
plt.plot(time_stamps.iloc[:len(generated_ask_prices)], generated_ask_prices / 10000, label='Generated Best Ask', color='magenta', linestyle='--')

plt.title('Comparison of Real and Generated Best Bid/Ask Prices')
plt.xlabel('Time (seconds after start)')
plt.ylabel('Price')
plt.legend()

plt.tight_layout()
plt.show()


2024-08-04 20:55:27.197134: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-08-04 20:55:27.269786: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-08-04 20:55:27.270824: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/10
