In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [3]:
data = pd.read_csv("USD_JPY.csv")
colmuns = ["終値", "始値", "高値", "安値"] #使用するデータの選択
processed_data = data[colmuns].iloc[::-1].to_numpy()

In [5]:
data

Unnamed: 0,日付け,終値,始値,高値,安値,出来高,変化率 %
0,2024-02-19,150.17,150.15,150.21,149.88,,0.01%
1,2024-02-18,150.15,150.10,150.24,150.04,,-0.04%
2,2024-02-16,150.21,149.93,150.65,149.82,,0.20%
3,2024-02-15,149.91,150.57,150.60,149.54,,-0.43%
4,2024-02-14,150.55,150.79,150.82,150.35,,-0.16%
...,...,...,...,...,...,...,...
8893,1990-01-08,144.15,144.10,144.80,143.85,,-0.28%
8894,1990-01-05,144.55,144.50,144.65,143.00,,0.82%
8895,1990-01-04,143.37,143.33,145.94,142.80,,-1.38%
8896,1990-01-03,145.37,145.32,146.90,145.05,,-0.84%


In [31]:
def split_data(dataframe, train_rate):
    dataframe = dataframe
    data_length = len(dataframe)
    train_data_length = int(data_length * train_rate)

    train_array = dataframe[:train_data_length]
    test_array = dataframe[train_data_length:]

    return train_array, test_array

train_array, test_array = split_data(processed_data, train_rate=0.8)



In [32]:
def datapreprocess(data_array):
    ss = StandardScaler()
    return ss.fit_transform(data_array)
    

In [33]:
train_array, test_array = split_data(processed_data, train_rate=0.8)

preprocessed_train_data = datapreprocess(train_array)
preprocessed_test_data = datapreprocess(test_array)


In [7]:
def checkBatchMaxIndex(data_length, batch_size):
    batch_max_index = 0
    for index in reversed(range(data_length)):
        if (index + 1)%batch_size == 0:
            batch_max_index = index + 1
            break
        else:
            pass

    return batch_max_index

def bachifyDataset(data_array, seq_len, batch_size, num_inputs):

    data_length = len(data_array)

    batch_encoder_input = np.zeros((1, seq_len, 1, num_inputs))
    batch_decoder_input = np.zeros((1, 1, 1, num_inputs))
    batch_decoder_output = np.zeros((1, 1, 1, num_inputs))

    for index in reversed(range(data_length)):
        encoder_input_start = index - seq_len - 1
        encoder_input_end = index - 1
        decoder_input_index = encoder_input_end
        decoder_output_index = decoder_input_index + 1
        if encoder_input_start < 0:
            break
        else:
            
            encoder_input = data_array[encoder_input_start:encoder_input_end].reshape(1, seq_len, 1, num_inputs)
            decoder_input = data_array[decoder_input_index].reshape(1, 1, 1, num_inputs)
            decoder_output = data_array[decoder_output_index].reshape(1, 1, 1, num_inputs)

            batch_encoder_input = np.concatenate([encoder_input, batch_encoder_input], axis=0)
            batch_decoder_input = np.concatenate([decoder_input, batch_decoder_input], axis=0)
            batch_decoder_output = np.concatenate([decoder_output, batch_decoder_output], axis=0)

    batch_encoder_input = batch_encoder_input[:-1]
    batch_decoder_input = batch_decoder_input[:-1]
    batch_decoder_output = batch_decoder_output[:-1]

    data_length = len(batch_encoder_input)
    batch_max_index = checkBatchMaxIndex(data_length, batch_size)

    return torch.tensor(batch_encoder_input[-batch_max_index:], dtype=torch.float), torch.tensor(batch_decoder_input[-batch_max_index:].reshape(-1, 1, num_inputs), dtype=torch.float), torch.tensor(batch_decoder_output[-batch_max_index:], dtype=torch.float)[:, :, :, 0].reshape(-1)



In [34]:
batch_train_encoder_input, batch_train_decoder_input, batch_train_decoder_output = bachifyDataset(data_array=preprocessed_train_data, seq_len=64, batch_size=60, num_inputs=4)
batch_test_encoder_input, batch_test_decoder_input, batch_test_decoder_output = bachifyDataset(data_array=preprocessed_test_data, seq_len=64, batch_size=60, num_inputs=4)

In [94]:
class CNN(nn.Module):
    def __init__(self, num_inputs, num_filters, kernel_size, pool_size):
        super().__init__()
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.num_inputs = num_inputs
        self.num_filters = num_filters
        self.kernel_size = kernel_size
        self.pool_size = pool_size
        self.cnn = nn.Sequential(
            nn.Conv2d(in_channels=num_filters, out_channels=num_filters, kernel_size=kernel_size, bias=True).to(device),
            nn.ReLU().to(device),
            nn.MaxPool2d(kernel_size=pool_size, stride=1).to(device)
        )

    def forward(self, x):
        x = self.cnn(x)
        x = x.reshape(-1, self.num_filters, self.num_inputs)
        
        return x

class Encoder(nn.Module):
    def __init__(self, batch_size, seq_len, input_size, hidden_size):
        super().__init__()
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.seq_len = seq_len
        self.input_weight = nn.Linear(input_size, hidden_size*4, bias=True).to(device)
        self.h_weight = nn.Linear(hidden_size, hidden_size*4, bias=True).to(device)


    def forward(self, x):
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        h_prev = torch.rand(((self.batch_size, self.hidden_size))).to(device)
        c_prev = torch.rand(((self.batch_size, self.hidden_size))).to(device)
        hs = torch.zeros((self.batch_size, 1, self.hidden_size)).to(device)
        cs = torch.zeros((self.batch_size, 1, self.hidden_size)).to(device)
        
        for seq_idx in range(self.seq_len):
            tmp_x = self.input_weight(x[:, seq_idx, :])
            tmp_h = self.h_weight(h_prev)
            
            f = F.sigmoid(tmp_x[:, :self.hidden_size]+tmp_h[:, :self.hidden_size])
            i = F.tanh(F.sigmoid(tmp_x[:, self.hidden_size:self.hidden_size*2]+tmp_h[:, self.hidden_size:self.hidden_size*2])) + 0.2
            c = F.tanh(tmp_x[:, self.hidden_size*2:self.hidden_size*3]+tmp_h[:, self.hidden_size*2:self.hidden_size*3])
            o = F.sigmoid(tmp_x[:, self.hidden_size*3:self.hidden_size*4]+tmp_h[:, self.hidden_size*3:self.hidden_size*4])
            c_next = f*c_prev + i*c
            h_next = o*F.tanh(c_next)

            h_prev = h_next
            c_prev = c_next
            hs = torch.concat([hs, h_next.reshape(self.batch_size, 1, self.hidden_size)], dim=1)
            cs = torch.concat([cs, c_next.reshape(self.batch_size, 1, self.hidden_size)], dim=1)
        
        hs = hs[:, 1:, :]
        cs = cs[:, 1, :]
        h = hs[:, 0, :].reshape(-1, 1, self.hidden_size)
        return h, hs

class AttentionDecoder(nn.Module):
    def __init__(self, batch_size, input_size, hidden_size):
        super().__init__()
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.input_weight = nn.Linear(input_size, hidden_size*4, bias=True).to(device)
        self.h_weight = nn.Linear(hidden_size, hidden_size*4, bias=True).to(device)
        self.output_weight = nn.Linear(hidden_size*2, 1, bias=True).to(device)

    def AttentionLayer(self, hs, decoder_output):
        query = decoder_output
        key, value = hs, hs
        score = F.softmax(query@torch.transpose(key, 1, 2))
        attention = score@value

        return attention

    def forward(self, hs, h, x):
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        h_prev = h
        c_prev = torch.rand(((self.batch_size, 1, self.hidden_size))).to(device)
        
        tmp_x = self.input_weight(x)
        tmp_h = self.h_weight(h_prev)
        f = F.sigmoid(tmp_x[:, :, :self.hidden_size]+tmp_h[:, :, :self.hidden_size])
        i = F.tanh(F.sigmoid(tmp_x[:, :, self.hidden_size:self.hidden_size*2]+tmp_h[:, :, self.hidden_size:self.hidden_size*2])) + 0.2
        c = F.tanh(tmp_x[:, :, self.hidden_size*2:self.hidden_size*3]+tmp_h[:, :, self.hidden_size*2:self.hidden_size*3])
        o = F.sigmoid(tmp_x[:, :, self.hidden_size*3:self.hidden_size*4]+tmp_h[:, :, self.hidden_size*3:self.hidden_size*4])
        c_next = f*c_prev + i*c
        h_next = o*F.tanh(c_next)

        attention = self.AttentionLayer(hs, h_next)

        output = torch.concat([attention, h_next], dim=-1)
        output = self.output_weight(output).reshape(-1)

        return output

class CNN_STLSTM_AM(nn.Module):
    def __init__(self, batch_size, seq_len, num_inputs, num_filters, kernel_size, pool_size,  input_size, hidden_size):
        super().__init__()

        self.cnn = CNN(num_inputs=num_inputs, num_filters=num_filters, kernel_size=kernel_size, pool_size=pool_size)
        self.encoder = Encoder(batch_size=batch_size, seq_len=seq_len, input_size=input_size, hidden_size=hidden_size)
        self.decoder = AttentionDecoder(batch_size=batch_size, input_size=input_size, hidden_size=hidden_size)

    def forward(self, batch_encoder_input, batch_decoder_input):
        cnn_output = self.cnn(batch_encoder_input)
        h, hs = self.encoder(cnn_output)
        output = self.decoder(hs, h, batch_decoder_input)

        return output
    


In [95]:
batch_size=60
seq_len=64
num_inputs=4
num_filters=64
kernel_size=1
pool_size=1
input_size=4
hidden_size=64


model = CNN_STLSTM_AM(batch_size, seq_len, num_inputs, num_filters, kernel_size, pool_size,  input_size, hidden_size)

In [96]:
def get_mini_batches(dataset, batch_size):
    mini_batches = []
    data_length = len(dataset)
    num_batches = data_length // batch_size

    for i in range(num_batches):
        start_index = i * batch_size
        end_index = start_index + batch_size
        mini_batch = dataset[start_index:end_index]
        mini_batches.append(mini_batch)

    return mini_batches

train_encoder_input = get_mini_batches(batch_train_encoder_input, batch_size=60)
train_decoder_input = get_mini_batches(batch_train_decoder_input, batch_size=60)
train_decoder_output = get_mini_batches(batch_train_decoder_output, batch_size=60)

test_encoder_input = get_mini_batches(batch_test_encoder_input, batch_size=60)
test_decoder_input = get_mini_batches(batch_test_decoder_input, batch_size=60)
test_decoder_output = get_mini_batches(batch_test_decoder_output, batch_size=60)


In [None]:
あなたにはmodel, batch_encoder_input, batch_decoder_input, batch_decoder_output, learning_rate, num_epochsを入力するとモデルが訓練、検証を行う関数を策してしてもらいます。ただし、この関数内でget_mini_batches、split_datasetを使用し訓練、評価用のlist型データセットを用意すること。また活性化関数はadam、学習率は0.001、損失関数はmae、エポックは50とします。また各イテレーション、エポック毎にmaeの結果をprint文で出力してください。

In [97]:
import torch.optim as optim
import torch.nn.functional as F

def train_and_evaluate_model(model, processed_data, learning_rate, num_epochs):
    train_array, test_array = split_data(processed_data, train_rate=0.8)

    preprocessed_train_data = datapreprocess(train_array)
    preprocessed_test_data = datapreprocess(test_array)

    batch_train_encoder_input, batch_train_decoder_input, batch_train_decoder_output = bachifyDataset(data_array=preprocessed_train_data, seq_len=64, batch_size=60, num_inputs=4)
    batch_test_encoder_input, batch_test_decoder_input, batch_test_decoder_output = bachifyDataset(data_array=preprocessed_test_data, seq_len=64, batch_size=60, num_inputs=4)

    train_encoder_input = get_mini_batches(batch_train_encoder_input, batch_size=60)
    train_decoder_input = get_mini_batches(batch_train_decoder_input, batch_size=60)
    train_decoder_output = get_mini_batches(batch_train_decoder_output, batch_size=60)

    test_encoder_input = get_mini_batches(batch_test_encoder_input, batch_size=60)
    test_decoder_input = get_mini_batches(batch_test_decoder_input, batch_size=60)
    test_decoder_output = get_mini_batches(batch_test_decoder_output, batch_size=60)

    device = torch.device("cuda")
    model = model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.L1Loss()

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0

        for i in range(len(train_encoder_input)):
            encoder_input_batch = train_encoder_input[i].to(device)
            decoder_input_batch = train_decoder_input[i].to(device)
            decoder_output_batch = train_decoder_output[i].to(device)
            optimizer.zero_grad()
            output = model(encoder_input_batch, decoder_input_batch)
            loss = criterion(output, decoder_output_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
                  
        train_loss /= len(train_encoder_input)
        print(f"[Train] Epoch:{epoch + 1}/{num_epochs}train_loss {train_loss:.4f}")
        model.eval()
        eval_loss = 0.0

        with torch.no_grad():
            for i in range(len(test_encoder_input)):
                encoder_input_batch = test_encoder_input[i].to(device)
                decoder_input_batch = test_decoder_input[i].to(device)
                decoder_output_batch = test_decoder_output[i].to(device)
                output = model(encoder_input_batch, decoder_input_batch)
                loss = criterion(output, decoder_output_batch)
                eval_loss += loss.item()

            eval_loss /= len(test_encoder_input)
            print(f"[Eval] Epoch:{epoch + 1}/{num_epochs} eval_loss {eval_loss:.4f}")
            
        print(f" Epoch {epoch+1}/{num_epochs}: Train Loss: {train_loss:.4f}, Eval Loss: {eval_loss:.4f}")


In [98]:
train_and_evaluate_model(model, processed_data, learning_rate=0.001, num_epochs=250)

  score = F.softmax(query@torch.transpose(key, 1, 2))


[Train] Epoch:1/250train_loss 0.5039
[Eval] Epoch:1/250 eval_loss 0.2236
 Epoch 1/250: Train Loss: 0.5039, Eval Loss: 0.2236
[Train] Epoch:2/250train_loss 0.2163
[Eval] Epoch:2/250 eval_loss 0.1975
 Epoch 2/250: Train Loss: 0.2163, Eval Loss: 0.1975
[Train] Epoch:3/250train_loss 0.1633
[Eval] Epoch:3/250 eval_loss 0.1895
 Epoch 3/250: Train Loss: 0.1633, Eval Loss: 0.1895
[Train] Epoch:4/250train_loss 0.1387
[Eval] Epoch:4/250 eval_loss 0.1121
 Epoch 4/250: Train Loss: 0.1387, Eval Loss: 0.1121
[Train] Epoch:5/250train_loss 0.1188
[Eval] Epoch:5/250 eval_loss 0.1506
 Epoch 5/250: Train Loss: 0.1188, Eval Loss: 0.1506
[Train] Epoch:6/250train_loss 0.1099
[Eval] Epoch:6/250 eval_loss 0.1202
 Epoch 6/250: Train Loss: 0.1099, Eval Loss: 0.1202
[Train] Epoch:7/250train_loss 0.0961
[Eval] Epoch:7/250 eval_loss 0.1416
 Epoch 7/250: Train Loss: 0.0961, Eval Loss: 0.1416
[Train] Epoch:8/250train_loss 0.0915
[Eval] Epoch:8/250 eval_loss 0.1184
 Epoch 8/250: Train Loss: 0.0915, Eval Loss: 0.1184
