<a href="https://colab.research.google.com/github/hongqin/Python-CoLab-bootcamp/blob/master/transformer_pilot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class TransformerBlock(nn.Module):
    def __init__(self, d_model, nhead, dim_feedforward, dropout=0.1):
        super(TransformerBlock, self).__init__()
        self.attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
        self.feed_forward = nn.Sequential(
            nn.Linear(d_model, dim_feedforward),
            nn.ReLU(),
            nn.Linear(dim_feedforward, d_model)
        )
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)
        print("TransformerBlock.__init__()")

    def forward(self, x):
        print("TransformerBlock.forward()")
        attn_output, _ = self.attn(x, x, x)
        x = x + self.dropout(attn_output)
        x = self.norm1(x)
        ff = self.feed_forward(x)
        x = x + self.dropout(ff)
        x = self.norm2(x)
        return x

class Transformer(nn.Module):
    def __init__(self, d_model, nhead, num_layers, dim_feedforward, dropout=0.1):
        print("Transformer.__init__()")
        super(Transformer, self).__init__()
        self.encoder_layers = nn.ModuleList([
            TransformerBlock(d_model, nhead, dim_feedforward, dropout) for _ in range(num_layers)
        ])
        self.fc = nn.Linear(d_model, 1)

    def forward(self, x):
        print("Transformer.forward()")
        for layer in self.encoder_layers:
            x = layer(x)
        x = self.fc(x)
        return x


In [5]:
# Define the model
d_model = 8
nhead = 4
num_layers = 2
dim_feedforward = 20
model = Transformer(d_model, nhead, num_layers, dim_feedforward)

Transformer.__init__()
TransformerBlock.__init__()
TransformerBlock.__init__()


In [6]:
# Define the input and target data
input_data = torch.tensor([[1, 2, 3, 4, 5, 6, 7, 8], [8, 7, 6, 5, 4, 3, 2, 1]]).float()
#input_data = input_data.unsqueeze(-1)
target_data = torch.tensor([[2], [3]]).float()

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [9]:
# Train the model
for epoch in range(5):
    output = model(input_data)
    loss = criterion(output, target_data)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if epoch % 50 == 0:
        print(f'Epoch: {epoch+1}, Loss: {loss.item()}')


Transformer.forward()
TransformerBlock.forward()
TransformerBlock.forward()
Epoch: 1, Loss: 0.016243206337094307
Transformer.forward()
TransformerBlock.forward()
TransformerBlock.forward()
Transformer.forward()
TransformerBlock.forward()
TransformerBlock.forward()
Transformer.forward()
TransformerBlock.forward()
TransformerBlock.forward()
Transformer.forward()
TransformerBlock.forward()
TransformerBlock.forward()


In [2]:
input_data = torch.tensor([[1, 2, 3, 4, 5], [5, 4, 3, 2, 1]])
input_data = input_data.unsqueeze(2).float()
input_data

tensor([[[1.],
         [2.],
         [3.],
         [4.],
         [5.]],

        [[5.],
         [4.],
         [3.],
         [2.],
         [1.]]])

In [3]:
input_data.shape


torch.Size([2, 5, 1])