In [2]:
import numpy as np
import import_ipynb
from akshare_demo import *
from sklearn.preprocessing import MinMaxScaler

In [65]:
class DataProcess:

    close_sc = MinMaxScaler(feature_range=(0, 1))
    volume_sc = MinMaxScaler(feature_range=(0, 1))
    # 归一化
    stock_data = ak_data.stock_k_data("105.AAPL")
    stock_close_prices_log = close_sc.fit_transform(stock_data.iloc[:, 2:3].values)
    stock_volume_log = volume_sc.fit_transform(stock_data.iloc[:, 5:6].values)
    # 随机种子
    seed = 7
    # 数据窗口大小
    window_step = 7

    @property
    def train_split(self):
        return self.stock_close_prices_log[:-300], self.stock_volume_log[:-300]

    @property
    def test_split(self):
        return self.stock_close_prices_log[-300:], self.stock_volume_log[-300:]

    def train_data_orgnize(self):
        stock_close_prices, stock_volume = self.train_split
        x_train_close_data_set = list()
        x_train_volume_data_set = list()
        y_train_data_set = list()
        for i in range(self.window_step, len(stock_close_prices)):
            x_train_close_data_set.append(stock_close_prices[i - self.window_step:i, 0])
            x_train_volume_data_set.append(stock_volume[i - self.window_step:i, 0])
            y_train_data_set.append(stock_close_prices[i, 0])
        x_train_close_data_set = np.array(x_train_close_data_set)
        x_train_close_data_set = x_train_close_data_set.reshape((len(x_train_close_data_set), 7, 1))
        x_train_volume_data_set = np.array(x_train_volume_data_set)
        x_train_volume_data_set = x_train_volume_data_set.reshape((len(x_train_volume_data_set), 7, 1))
        x_train_data_set = np.concatenate((x_train_close_data_set, x_train_volume_data_set), axis=2)
        return x_train_data_set, y_train_data_set

    def test_data_orgnize(self):
        stock_close_prices, stock_volume = self.test_split
        x_test_close_data_set = list()
        x_test_volume_data_set = list()
        y_test_data_set = list()
        for i in range(self.window_step, len(stock_close_prices)):
            x_test_close_data_set.append(stock_close_prices[i - self.window_step:i, 0])
            x_test_volume_data_set.append(stock_volume[i - self.window_step:i, 0])
            y_test_data_set.append(stock_close_prices[i, 0])
        x_test_close_data_set = np.array(x_test_close_data_set)
        x_test_close_data_set = x_test_close_data_set.reshape((len(x_test_close_data_set), 7, 1))
        x_test_volume_data_set = np.array(x_test_volume_data_set)
        x_test_volume_data_set = x_test_volume_data_set.reshape((len(x_test_volume_data_set), 7, 1))
        x_test_data_set = np.concatenate((x_test_close_data_set, x_test_volume_data_set), axis=2)
        return x_test_data_set, np.array(y_test_data_set)

    def train_data_random(self):
        x_train_data_set, y_train_data_set = self.train_data_orgnize()
        np.random.seed(self.seed)
        np.random.shuffle(x_train_data_set)
        np.random.seed(self.seed)
        np.random.shuffle(y_train_data_set)
        return x_train_data_set, np.array(y_train_data_set)

    def data_save(self):
        x_train_data_set, y_train_data_set = self.train_data_random()
        x_test_data_set, y_test_data_set = self.test_data_orgnize()
        np.save('{}/x_train_data_set.npy'.format(self.aksd.cache_path), x_train_data_set)
        np.save('{}/y_train_data_set.npy'.format(self.aksd.cache_path), y_train_data_set)
        np.save('{}/x_test_data_set.npy'.format(self.aksd.cache_path), x_test_data_set)
        np.save('{}/y_test_data_set.npy'.format(self.aksd.cache_path), y_test_data_set)
    
    def data_set(self):
        x_train_data_set, y_train_data_set = self.train_data_random()
        x_test_data_set, y_test_data_set = self.test_data_orgnize()
        return x_train_data_set, y_train_data_set, x_test_data_set, y_test_data_set

stock(105.AAPL) k data exists


In [66]:
dp = DataProcess()

# Test

In [67]:
if __name__ == '__main__':
    x_train_data_set, y_train_data_set, x_test_data_set, y_test_data_set = dp.data_set()
    print(x_train_data_set.shape)
    print(x_train_data_set[:1])

(978, 7, 2)
[[[0.57897683 0.21174015]
  [0.58491149 0.2075823 ]
  [0.5707262  0.22199089]
  [0.56932697 0.19974793]
  [0.56580477 0.18306671]
  [0.58109979 0.1753183 ]
  [0.58809594 0.16397978]]]


In [68]:
if __name__ == '__main__':
    print(y_train_data_set.shape)
    print(y_train_data_set[:1])

(978,)
[0.56802424]


In [69]:
if __name__ == '__main__':
    print(x_test_data_set.shape)
    print(x_test_data_set[:1])

(293, 7, 2)
[[[0.66196558 0.09202776]
  [0.66818974 0.08214371]
  [0.65945662 0.0747366 ]
  [0.67870808 0.14908229]
  [0.67340066 0.06737915]
  [0.68280927 0.08781534]
  [0.68975716 0.10678242]]]


In [70]:
if __name__ == '__main__':
    print(y_test_data_set.shape)
    print(y_test_data_set[:1])

(293,)
[0.67755009]
