记录索引：数据格式：[windows,numbers,labels]
labels索引：[open,high,low,close,volume,turn,pctChg]
              0    1    2   3     4     5      6

In [1]:
import gym
from gym import spaces
import numpy as np

class CustomEnv(gym.Env):
    def __init__(self, stock_data,windows=10,captial=300000):
        super(CustomEnv, self).__init__()

        # 加载股票数据
        self.initial_captial = captial
        self.stock_data = stock_data
        self.current_step = windows
        self.windows = windows
        
        # 定义观察空间和动作空间
        t = len(stock_data)
        n = len(stock_data[0])
        l = len(stock_data[0][0])
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(windows, n, l), dtype=np.float32)
        self.action_space = spaces.Box(low=0, high=1, shape=(n,), dtype=np.float32)

    def step(self, action):
        # 执行动作，返回新的状态、奖励和是否结束
        print('current_step:%s' % self.current_step)
        new_state = self.stock_data[self.current_step+1-self.windows:self.current_step+1]
        price = self.stock_data[self.current_step]
        label = self.stock_data[self.current_step+1]
        reward = self.calculate_reward(price,action)
        self.captial += reward
        print('current_step:%s,earns:%f,captial:%f' % (self.current_step,reward,self.captial))
        done = self.current_step+2>=len(self.stock_data)
        self.current_step += 1
        return new_state, reward, done, label

    def reset(self):
        # 重置环境，返回初始状态
        # 这部分需要根据你的具体需求来实现
        initial_state = self.stock_data[0:self.windows]
        initial_label = self.stock_data[self.current_step]
        self.captial = self.initial_captial
        self.current_step = self.windows
        return initial_state  ,initial_label

    def render(self, mode='human'):
        # 渲染环境（可选）
        pass

    def calculate_reward(self,price,action):
        # 计算回报
        open_price = price[:,0]
        pct_chg = price[:,6]
        for idx1,i in enumerate(action):
            if i<0:
                action[idx1] = 0
        action = action/np.sum(action)
        
        amounts = np.floor(action*self.captial/open_price/100)*100
        earns = 0
        
        valid_amounts = np.nan_to_num(amounts)  # 将NaN替换为0
        valid_open_price = np.nan_to_num(open_price)  # 将NaN替换为0
        consume = np.sum(valid_amounts * valid_open_price)
        print('consume:%f' % consume)
        
        for idx2,amount in enumerate(amounts):
            
            if amount>=100 and amount != np.inf:
                
                earn = amount*pct_chg[idx2]
                print('stock:%s ,earn:%f' %(stock_list[idx2],earn))
                earns+=earn
        
        return earns
        
        
        

In [2]:
def df_tolist(df):
    # 将pandas.dataframe转list
    array_ = np.array(df)
    list_ = array_.tolist()
    return list_[0]

In [3]:
import pandas as pd
import numpy as np
# myself
from config import stock_list
stock_list.remove('sh.000001')

# sz50
# sz50 = pd.read_csv('sz50_stocks.csv')
# stock_list = sz50.code.tolist()


ground = pd.read_csv('./data/sh.000001.csv')

ground_date = pd.to_datetime(ground['date'])
dates = ground_date.tolist()


datas = {}
for stock in stock_list:
    df = pd.read_csv('./data/%s.csv' % stock)
    df['date'] = pd.to_datetime(df['date'])
    data_ = pd.merge(ground_date, df, how='left', on='date')
    
    # 缺失值处理：日期对齐时会使得有些交易日的数据为空，所以需要对缺失数据进行填充
    data_.loc[:, ['volume','turn','pctChg']] = data_.loc[:, ['volume','turn','pctChg']].fillna(0)
    data_.loc[:, ['open', 'high', 'low', 'close']] = data_.loc[:, ['open', 'high', 'low', 'close']].fillna(method='pad')
    data_.loc[:, ['open', 'high', 'low', 'close']] = data_.loc[:, ['open', 'high', 'low', 'close']].fillna(0)
    data_['code'] = stock
    data_.pctChg = data_.pctChg/100
    
    datas[stock] = data_

date_data = []
for date in dates:
    stock_data = []
    for stock in stock_list:
        data_stock = datas[stock]
        data_date = data_stock[data_stock['date'] == date]
        data_list = data_date.iloc[:,2:]
        stock_data.append(df_tolist(data_list))
    date_data.append(stock_data)

data = np.array(date_data)

data.shape

(132, 27, 7)

In [6]:
env = CustomEnv(stock_data=data, windows=10, captial=30000)
state = env.reset()  # 重置环境并获取初始状态
done = False
while not done:
    action = env.action_space.sample()  # 随机选择一个动作
    
    next_state, reward, done, _ = env.step(action)  # 执行动作并获取下一个状态、奖励和是否结束的信息
    # 在这里可以根据需要进行其他操作，例如训练智能体等
    state = next_state  # 更新当前状态

current_step:10
consume:15698.000000
stock:sh.601398 ,earn:12.839508
stock:sz.000617 ,earn:34.782608
stock:sh.600358 ,earn:3.055556
stock:sh.600028 ,earn:0.003656
stock:sh.601390 ,earn:8.602152
stock:sh.600118 ,earn:19.235837
stock:sh.600776 ,earn:39.539748
stock:sh.601727 ,earn:4.289544
stock:sh.601186 ,earn:-0.389106
stock:sz.000063 ,earn:11.375661
stock:sz.002463 ,earn:46.575340
stock:sz.000725 ,earn:65.126050
stock:sz.002432 ,earn:124.022108
stock:sz.000807 ,earn:6.693711
current_step:10,earns:375.752373,captial:30375.752373
current_step:11
consume:14420.000000
stock:sh.601628 ,earn:-16.958444
stock:sz.000617 ,earn:-8.504801
stock:sh.600028 ,earn:-33.482852
stock:sh.600039 ,earn:-47.632580
stock:sh.600072 ,earn:-28.490028
stock:sh.601727 ,earn:-56.555268
stock:sh.601186 ,earn:-36.257310
stock:sz.000063 ,earn:0.950119
stock:sz.002463 ,earn:-59.398497
stock:sz.002432 ,earn:-21.260583
stock:sz.002245 ,earn:-38.377536
stock:sz.000807 ,earn:-26.190476
current_step:11,earns:-372.158256,c

  amounts = np.floor(action*self.captial/open_price/100)*100


In [4]:
max_value = ground.iloc[:,2:].max().values
max_value.shape

(7,)

In [5]:
from scipy.optimize import minimize
def portfolio_performance(weights, returns, cov_matrix):
    returns = np.sum(returns*weights ) *12
    std = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights))) * np.sqrt(12)
    return std, returns


def minimize_volatility(weights, returns, cov_matrix):
    return portfolio_performance(weights, returns, cov_matrix)[0]

def mkws(returns,history_pct):
    # 马克维斯计算投资比例

    
    cov_matrix =np.cov(history_pct)
    
    num_assets = len(returns)
    args = (returns, cov_matrix)
    constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
    bounds = tuple((0,1) for asset in range(num_assets))
    initial_guess = num_assets*[1./num_assets,]

    optimal_volatility = minimize(minimize_volatility, initial_guess, args=args, bounds=bounds, constraints=constraints)
    
    
    # 输出结果
    weights = optimal_volatility['x']
    return weights 
    
    


In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(None,7)))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_squared_error')


In [6]:
from keras.models import load_model
model = load_model('model_20.h5')

In [8]:
env = CustomEnv(stock_data=data, windows=24, captial=30000)
state,label = env.reset()  # 重置环境并获取初始状态
done = False

while not done:
    x_train = state/max_value
    y_train = label[:,6]/max_value[6]
    x_train = np.transpose(x_train, (1, 0, 2))
    # model.fit(x_train, y_train, batch_size=1, epochs=1)
    
    predict_value = (model.predict(x_train))*max_value[6]
    
    history_pct = np.transpose(state[:,:,6], (1, 0))
    action = mkws(predict_value[:,0], history_pct)
    
    next_state, reward, done, next_label = env.step(action)  # 执行动作并获取下一个状态、奖励和是否结束的信息
    # 在这里可以根据需要进行其他操作，例如训练智能体等
    state = next_state  # 更新当前状态
    label = next_label


current_step:24
consume:0.000000
current_step:24,earns:0.000000,captial:30000.000000
current_step:25
consume:14715.000000
stock:sh.603099 ,earn:3562.334802
current_step:25,earns:3562.334802,captial:33562.334802
current_step:26
consume:0.000000
current_step:26,earns:0.000000,captial:33562.334802


  amounts = np.floor(action*self.captial/open_price/100)*100


current_step:27
consume:0.000000
current_step:27,earns:0.000000,captial:33562.334802
current_step:28
consume:0.000000
current_step:28,earns:0.000000,captial:33562.334802
current_step:29
consume:0.000000
current_step:29,earns:0.000000,captial:33562.334802
current_step:30
consume:0.000000
current_step:30,earns:0.000000,captial:33562.334802
current_step:31
consume:0.000000
current_step:31,earns:0.000000,captial:33562.334802
current_step:32
consume:32508.000000
stock:sh.600958 ,earn:3397.208364
current_step:32,earns:3397.208364,captial:36959.543166
current_step:33
consume:28247.000000
stock:sh.601398 ,earn:452.674880
stock:sz.000895 ,earn:3.495935
stock:sh.600028 ,earn:106.708268
stock:sh.600776 ,earn:47.796864
current_step:33,earns:610.675947,captial:37570.219113
current_step:34
consume:29464.000000
stock:sh.601398 ,earn:-238.817004
stock:sz.000895 ,earn:2.055992
stock:sh.600358 ,earn:30.929175
stock:sh.603099 ,earn:59.112029
stock:sh.600776 ,earn:16.576251
current_step:34,earns:-130.1435

mkws:
10:24 44000
20: 24 44696
30:24 44696

lstm:
30: none


In [1]:
a =[1,23,4,5,6,7,8,9,0]
print(a[-3:])

[8, 9, 0]
