In [1]:
from Tool.Stock_Tool import Stock_Tool
from Tool.Dataset_Loader import Dataset_Loader
import matplotlib.pyplot as plt
import os
import numpy as np
import pandas as pd
import plotly

In [2]:
# Load the dataset
stock_name = "2330"
dl = Dataset_Loader(stock_name,"20100101","20231231")
# input_data_type = dl.dataset[0].keys() # get all the data type
input_data_type = ['open','close','high','low']
data = dl.get(input_data_type)

In [3]:
df = pd.DataFrame(data, columns=input_data_type)
# Display the dataframe
#df.to_csv('hw1_stock2330.csv', index=False)

In [4]:
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
from torch.nn import Transformer, TransformerEncoder, TransformerEncoderLayer
import math
from torch.utils.data import Dataset, DataLoader, Subset
from torch.utils.tensorboard import SummaryWriter
from torch.profiler import profile, record_function, ProfilerActivity
from transformers import get_linear_schedule_with_warmup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


**Positional Encoding**  
讓 input $x_1$, $x_2$, ... $x_n$ 之間存在位置關係，但對此題目並無太大關係  
因為 `open`, `close` , `high`, `low` 這四者並不存在位置關係  
但舉例而言，對於 input 為句子的模型就存在著很大的關係，因為字詞存在著位置關係

In [5]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)
        
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)
    
    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

對資料集做處理，並包在一個 torch 規格的 Class 裡   
1. data normalize  
2. data -> input vector, output vector

In [6]:
class StockDataset(Dataset):
    def __init__(self, df, seq_len=30):
        self.seq_len = seq_len
        self.prices = df[['open', 'high', 'low', 'close']].values
        # Normalize
        self.scaler = MinMaxScaler()
        self.prices = self.scaler.fit_transform(self.prices)
        
        # Create input & output pairs
        self.inputs = []
        self.outputs = []
        for i in range(len(self.prices)-seq_len-1):
            self.inputs.append(np.array(self.prices[i : i + seq_len]))
            self.outputs.append(np.array(self.prices[i + seq_len + 1]))
    
    def __getitem__(self, idx):
        return (torch.FloatTensor(self.inputs[idx]), torch.FloatTensor(self.outputs[idx]))
    
    def __len__(self):
        return len(self.inputs)

**Transformer Model**  
* 參數，對應架構圖
    1. `ntoken`: 原本我的 input `dim=4`, 經過 `expand_layer` 後變成 `dim=token`，為了增加模組的擬和。  
        舉例:原本只有 4 個參數去表達一個 $f(x)$ 但 input 產生的線段不夠表達 $f(x)$  
        增加 input 也就是增加線段去表達 $f(x)$
    2. `ninp`: number of input 原先 transformer 要  word embedding 時，需要的參數。
    3. `nhead`: self-attention 時計算 relation factor α 時，α 的數量，也就是 inputs 之間的各種不同的關聯。
    4. `nhid`: transformer 架構的 `FFN` `dim`
    5. `nlayers`: 整個 transformer 重複了 `nhead` + `FFN` 幾次
    6. `mask`: decoder 時會從 left->right 吃 input，也就是從 1->30
    7. `forward`: run all model through all layers  
    <div class="center">
        <img src="https://pic4.zhimg.com/80/v2-0c259fb2d439b98de27d877dcd3d1fcb_720w.webp" style="width: 30%; height: auto;" />
    </div>  

In [7]:
writer = SummaryWriter()
prof = profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True, profile_memory=True, use_cuda=True)

  warn("use_cuda is deprecated, use activities argument instead")


In [8]:
class MyTransformerModel(nn.Module):
    def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.3):
        super().__init__()
        self.model_type = 'Transformer'
        self.src_mask = None
        self.expand_layer = nn.Linear(4, ntoken)
        self.pos_encoder = PositionalEncoding(d_model=ntoken, dropout=dropout)
        encoder_layers = TransformerEncoderLayer(d_model=ntoken, nhead=nhead, dim_feedforward=nhid, dropout=dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        self.ninp = ninp
        #self.embedding = nn.Embedding(ntoken, ninp) 
        self.decoder = nn.Linear(ntoken, 4)
        self.init_weights()
        
    def _generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask
    
    def init_weights(self):
        initrange = 0.1
        # self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)
    
    def forward(self, src):
        if self.src_mask is None or self.src_mask.size(0) != len(src):
            device = src.device
            mask = self._generate_square_subsequent_mask(len(src)).to(device)
            self.src_mask = mask

        src = self.expand_layer(src)     # (batch_size, seq_len, ntoken)
        #src = self.pos_encoder(src)     # (128, 30, 4) 可用可不用      
        output = self.transformer_encoder(src, self.src_mask) # (128, 30, 4)
        output = self.decoder(output)
        return output

**Trainer**
To fit the model with 
* `criterion` (loss function)
* `optimizer`
* `dataloader`
* `scheduler`(mannaul adjust lr)

In [9]:
class MyTrainer():
    def __init__(self, model, criterion, optimizer, dataloaders, scheduler=None, num_epochs=64, early_stopping_patience=5):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.dataloaders = dataloaders
        self.scheduler = scheduler
        self.num_epochs = num_epochs
        self.early_stopping_patience = early_stopping_patience
        self.best_val_loss = float('inf')
        self.patience_cnt = 0
        
        
    def fit(self):
        for epoch in range(self.num_epochs):
            self.model.train()
            print(f"Epoch {epoch + 1}/{self.num_epochs}")
            running_loss = 0.0
            for i, batch in enumerate(self.dataloaders['train']):
                input_, target = batch      # (128, 30, 4), (128, 4)
                input_ = input_.to(device)
                target = target.to(device)
                self.optimizer.zero_grad() # zero the parameter gradients
                output = self.model(input_) # forward pass
                target = target[:, None, :].repeat(1, 30, 1)
                with record_function("model_inference"):
                    loss = self.criterion(output, target)
                    loss.backward() #   Calculate backpropagation values
                    self.optimizer.step() #     Update weights of the model
                
                if self.scheduler is not None:
                    self.scheduler.step()
        
                running_loss += loss.item()
                
            epoch_loss = running_loss / len(self.dataloaders['train'])
            print(f"Training Loss: {epoch_loss:.6f}")
            writer.add_scalar('training loss', epoch_loss, epoch)
            

            if(epoch==0):
                writer.add_graph(self.model, input_)
            
            # Validation step
            # prevent overfitting to training data
            val_running_loss = 0.0
            self.model.eval()
            with torch.no_grad():
                for j, batch in enumerate(self.dataloaders['val']):
                    input_, target = batch
                    input_ = input_.to(device)
                    target = target.to(device)
                    output = self.model(input_)
                    target = target[:, None, :].repeat(1, 30, 1)
                    loss = self.criterion(output, target)
                    val_running_loss += loss.item()

                    
            val_loss = val_running_loss / len(self.dataloaders['val'])
            print(f"Validation Loss: {val_loss:.6f}\n")
            
            writer.add_scalar('validation loss', val_loss, epoch)
        
            if self.scheduler is not None:
                self.scheduler.step()
            
            if val_loss < self.best_val_loss:
                self.best_val_loss = val_loss
                self.best_model_wts = self.model.state_dict()
                self.patience_cnt = 0
            else:
                self.patience_cnt += 1
                if self.patience_cnt == self.early_stopping_patience:
                    print(f"Early stopping at epoch {epoch}")
                    break
    # def evaluate(self):
    #     self.model.eval()
    #     running_loss = 0.0
    #     total_predictions = []
    #     true_labels = []
    #     with torch.no_grad():
    #         for _, batch in enumerate(self.dataloaders['test']):
    #             input_, target = batch
    #             output = self.model(input_)
    #             input_ = input_.to(device)
    #             target = target.to(device)
    #             loss = self.criterion(output, target.view(*output.shape))
    #             running_loss += loss.item()
    #             predictions = output.argmax(dim=-1, keepdim=False)
    #             total_predictions.extend(predictions.cpu().detach().numpy())
    #             true_labels.extend(target.cpu().detach().numpy())

    #     avg_loss = running_loss / len(self.dataloaders['val'])
    #     accuracy = sum([int(tp == tl) for tp, tl in zip(total_predictions, true_labels)]) / len(true_labels)
    #     print(f"val Average Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}")
    


In [10]:
if __name__ == "__main__":
    batch_size = 128
    num_epochs = 64
    early_stopping_patience = 8
    model = MyTransformerModel(ntoken=16, ninp=32, nhead=4, nhid=16, nlayers=3, dropout=0.3)
    
    stock_dataset = StockDataset(df)
    model = model.to(device)
    ndata = len(stock_dataset)
    ndata_train = int(stock_dataset.__len__()*0.8)


    train_dataset = Subset(stock_dataset, range(0, ndata_train))
    val_dataset = Subset(stock_dataset, range(ndata_train, ndata))
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    dataloaders = {'train': train_loader, 'val': val_loader}
    

    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=len(train_loader)*num_epochs*0.1, num_training_steps=len(train_loader)*num_epochs)
    trainer = MyTrainer(model, 
                        criterion, 
                        optimizer, 
                        dataloaders=dataloaders, 
                        scheduler=scheduler, 
                        num_epochs=num_epochs, 
                        early_stopping_patience=early_stopping_patience)
    trainer.fit()

Epoch 1/64


Training Loss: 0.197978


  if self.src_mask is None or self.src_mask.size(0) != len(src):
  if self.src_mask is None or self.src_mask.size(0) != len(src):


Validation Loss: 0.364779

Epoch 2/64
Training Loss: 0.162993
Validation Loss: 0.261095

Epoch 3/64
Training Loss: 0.116871
Validation Loss: 0.143884

Epoch 4/64
Training Loss: 0.086755
Validation Loss: 0.076585

Epoch 5/64
Training Loss: 0.072681
Validation Loss: 0.057774

Epoch 6/64
Training Loss: 0.052938
Validation Loss: 0.033180

Epoch 7/64
Training Loss: 0.029695
Validation Loss: 0.016210

Epoch 8/64
Training Loss: 0.016843
Validation Loss: 0.018094

Epoch 9/64
Training Loss: 0.011354
Validation Loss: 0.023547

Epoch 10/64
Training Loss: 0.009300
Validation Loss: 0.025409

Epoch 11/64
Training Loss: 0.008536
Validation Loss: 0.022139

Epoch 12/64
Training Loss: 0.007423
Validation Loss: 0.018325

Epoch 13/64
Training Loss: 0.006843
Validation Loss: 0.017654

Epoch 14/64
Training Loss: 0.006411
Validation Loss: 0.014929

Epoch 15/64
Training Loss: 0.005968
Validation Loss: 0.013781

Epoch 16/64
Training Loss: 0.005631
Validation Loss: 0.014030

Epoch 17/64
Training Loss: 0.005323


In [11]:
import torch
import plotly.graph_objects as go

# Get the original data
original_data = df[['open', 'high', 'low', 'close']].values

# Get the predicted data
model.eval()
with torch.no_grad():
    # predicted_data = model(torch.FloatTensor(val_dataset.dataset.inputs).to(device)).cpu().numpy()
    batch_size = 128
    inputs = np.array(val_dataset.dataset.inputs)
    predicted_data = []

    for i in range(0, len(inputs), batch_size):
        batch_inputs = inputs[i:i+batch_size]
        batch_predicted_data = model(torch.FloatTensor(batch_inputs).to(device)).cpu().numpy()
        predicted_data.append(batch_predicted_data)

    predicted_data = np.concatenate(predicted_data)
    predicted_data = predicted_data[:, -1, :]
    predicted_data = stock_dataset.scaler.inverse_transform(predicted_data)
    
#Create the figure
fig = go.Figure()

# Add the original data trace
fig.add_trace(go.Scatter(x=df.index, y=original_data[:, 0], name='Original Open', mode='lines', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=df.index, y=original_data[:, 1], name='Original High', mode='lines', line=dict(color='green')))
fig.add_trace(go.Scatter(x=df.index, y=original_data[:, 2], name='Original Low', mode='lines', line=dict(color='red')))
fig.add_trace(go.Scatter(x=df.index, y=original_data[:, 3], name='Original Close', mode='lines', line=dict(color='orange')))

# Add the predicted data trace
fig.add_trace(go.Scatter(x=df.index[-len(predicted_data):], y=predicted_data[:, 0], name='Predicted Open', mode='lines', line=dict(color='lightblue')))
fig.add_trace(go.Scatter(x=df.index[-len(predicted_data):], y=predicted_data[:, 1], name='Predicted High', mode='lines', line=dict(color='lightgreen')))
fig.add_trace(go.Scatter(x=df.index[-len(predicted_data):], y=predicted_data[:, 2], name='Predicted Low', mode='lines', line=dict(color='pink')))
fig.add_trace(go.Scatter(x=df.index[-len(predicted_data):], y=predicted_data[:, 3], name='Predicted Close', mode='lines', line=dict(color='gold')))

# Set the layout
fig.update_layout(title='Original Data vs Predicted Data',
                  xaxis_title='Date',
                  yaxis_title='Price')

# Show the figure
fig.show()



In [12]:
model.to(device).cpu()
print(next(model.parameters()).device)
predicted_data_tensor = torch.from_numpy(predicted_data).float()
print(predicted_data_tensor.device)


cpu
cpu


In [16]:
# torch.save(model.state_dict(), 'model.pth')
# model.eval()
# inputs_tensor = torch.from_numpy(inputs).float()
# writer.add_graph(model, inputs_tensor)

# writer.add_text("Profiling", str(prof.key_averages().table(sort_by="cuda_time_total")))
writer.close()