In [1]:
import os
import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
import torch.optim as optim
from torchvision.utils import save_image

import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import coint
from statistics import mean

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
df = pd.read_excel('./data/酒类股票收盘价.xlsx', header=1, index_col=0, skiprows=0)
n = df.shape[1]
keys = df.columns
pairs = []
for i in range(n):
    for j in range(i+1, n):
        pair = df[[keys[i], keys[j]]].dropna()
        S1 = pair[keys[i]]
        S2 = pair[keys[j]]
        result = coint(S1, S2)
        pvalue = result[1]
        if pvalue < 1:
            pairs.append((keys[i], keys[j]))
print(len(pairs))

X_train = []
for pair in pairs:
    data = df[list(pair)]
    ratios = data.iloc[:, 0]/data.iloc[:, 1]
    ma1 = ratios.rolling(window=5, center=False).mean()
    ma2 = ratios.rolling(window=60, center=False).mean()
    std = ratios.rolling(window=60, center=False).std()
    zscore = ((ma1 - ma2)/std).dropna()
    
    size = 90
    X_train.extend([np.array(zscore.iloc[i:i+size].values) for i in range(0, len(zscore)-size, 5)])

train_size = int(len(X_train) * 0.7)
print('train size:%d' % train_size)
X_train = np.stack(X_train, axis=0)
X_test = X_train[train_size:]
X_train = X_train[:train_size]

586
train size:97832


In [3]:
class autoencoder(nn.Module):
    def __init__(self):
        super(autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(90, 32),
            nn.ReLU(True),
            nn.Linear(32, 16),
            nn.ReLU(True),
            nn.Linear(16, 8)
        )
        self.decoder = nn.Sequential(
            nn.Linear(8, 16),
            nn.ReLU(True),
            nn.Linear(16, 32),
            nn.ReLU(True),
            nn.Linear(32, 90),
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [4]:
num_epochs = 50
batch_size = 128
learning_rate = 1e-3

model = autoencoder().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    model.train()
    train_loss = []
    for i in range(0, X_train.shape[0], batch_size):
        inputs = torch.tensor(X_train[i:i + batch_size],  dtype=torch.float, device=device)
        inputs = torch.nan_to_num(inputs, posinf=20.0, neginf=-20.0)
        outputs = model(inputs)
        optimizer.zero_grad()
        loss = criterion(outputs, inputs)
        loss.backward()
        optimizer.step()
        train_loss.append(loss.item())
    print('epoch [{}/{}], loss:{:.4f}'.format(epoch + 1, num_epochs, mean(train_loss)))
    
    if (epoch + 1) % 10 == 0:
        model.eval()
        test_loss = []
        with torch.no_grad():
            for i in range(0, X_test.shape[0], batch_size):
                inputs = torch.tensor(X_test[i:i + batch_size],  dtype=torch.float, device=device)
                inputs = torch.nan_to_num(inputs, posinf=20.0, neginf=-20.0)
                outputs = model(inputs)
                loss = criterion(outputs, inputs)
                test_loss.append(loss.item())
            
        print('epoch [{}/{}], test loss:{:.4f}'.format(epoch + 1, num_epochs, mean(test_loss)))

torch.save(model.state_dict(), './sim_autoencoder.pth')

epoch [1/50], loss:0.4509
epoch [2/50], loss:0.1874
epoch [3/50], loss:0.1327
epoch [4/50], loss:0.1195
epoch [5/50], loss:0.1028
epoch [6/50], loss:0.0994
epoch [7/50], loss:0.0986
epoch [8/50], loss:0.0980
epoch [9/50], loss:0.0970
epoch [10/50], loss:0.0968
epoch [10/50], test loss:0.0984
epoch [11/50], loss:0.0962
epoch [12/50], loss:0.0958
epoch [13/50], loss:0.0959
epoch [14/50], loss:0.0953
epoch [15/50], loss:0.0962
epoch [16/50], loss:0.0952
epoch [17/50], loss:0.0954
epoch [18/50], loss:0.0950
epoch [19/50], loss:0.0951
epoch [20/50], loss:0.0956
epoch [20/50], test loss:0.1004
epoch [21/50], loss:0.0949
epoch [22/50], loss:0.0949
epoch [23/50], loss:0.0950
epoch [24/50], loss:0.0952
epoch [25/50], loss:0.0949
epoch [26/50], loss:0.0951
epoch [27/50], loss:0.0949
epoch [28/50], loss:0.0949
epoch [29/50], loss:0.0950
epoch [30/50], loss:0.0949
epoch [30/50], test loss:0.0976
epoch [31/50], loss:0.0953
epoch [32/50], loss:0.0948
epoch [33/50], loss:0.0952
epoch [34/50], loss:0.