In [14]:
!pip install nbconvert[qtpdf]

Collecting pyqtwebengine>=5.15
  Downloading PyQtWebEngine-5.15.6-cp37-abi3-manylinux1_x86_64.whl (230 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m230.4/230.4 kB[0m [31m404.7 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting PyQtWebEngine-Qt5>=5.15.0
  Downloading PyQtWebEngine_Qt5-5.15.2-py3-none-manylinux2014_x86_64.whl (67.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.5/67.5 MB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:02[0m
Installing collected packages: PyQtWebEngine-Qt5, pyqtwebengine
Successfully installed PyQtWebEngine-Qt5-5.15.2 pyqtwebengine-5.15.6


# Our First Model

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim

# Dataset
sentences = [
    "The quick brown fox jumped over the lazy dog.",
    "Advancements in AI have transformed the way we interact with technology.",
    "Yesterday, the stock market experienced a significant decline due to geopolitical tensions."
]

# Tokenize dataset
tokenized_sentences = [sentence.split() for sentence in sentences]

# Create vocabulary
word2idx = {"[PAD]": 0, "[CLS]": 1, "[SEP]": 2}
for sentence in tokenized_sentences:
    for token in sentence:
        if token not in word2idx:
            word2idx[token] = len(word2idx)

vocab_size = len(word2idx)
max_seq_len= 15


# # Encode dataset
# encoded_sentences = []
# for sentence in tokenized_sentences:
#     encoded_sentence = [word2idx["[CLS]"]] + [word2idx[word] for word in sentence] + [word2idx["[SEP]"]]
#     encoded_sentence += [word2idx["[PAD]"]] * (max_seq_len - len(encoded_sentence))
#     encoded_sentences.append(encoded_sentence)
# Encode dataset
encoded_sentences = []
for sentence in tokenized_sentences:
    encoded_sentence = [word2idx["[CLS]"]] + [word2idx[word] for word in sentence] + [word2idx["[SEP]"]]
    encoded_sentence += [word2idx["[PAD]"]] * (max_seq_len - len(encoded_sentence))
    encoded_sentences.append(encoded_sentence)

# max_seq_len = max(len(sentence) for sentence in encoded_sentences)


# Hyperparameters
d_model = 8
nhead = 2
num_layers = 1
dim_feedforward = 16

# # Positional Encoding
# class PositionalEncoding(nn.Module):
#     def __init__(self, d_model, max_seq_len):
#         super(PositionalEncoding, self).__init__()
#         pe = torch.zeros(max_seq_len, d_model)
#         position = torch.arange(0, max_seq_len, dtype=torch.float).unsqueeze(1)
#         div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-torch.log(torch.tensor(10000.0)) / d_model))
#         pe[:, 0::2] = torch.sin(position * div_term)
#         pe[:, 1::2] = torch.cos(position * div_term)
#         self.register_buffer("pe", pe)

#     # def forward(self, x):
#     #     x = x + self.pe[: x.size(0), :]
#     #     return x
#     def forward(self, x):
#         x = x + self.pe[:, :x.size(1)]
#         return x

class PositionalEncoding(nn.Module):
    def __init__(self, d_model):
        super(PositionalEncoding, self).__init__()
        self.d_model = d_model

    def forward(self, x):
        seq_len = x.size(1)
        pe = torch.zeros(seq_len, self.d_model)
        
        position = torch.arange(0, seq_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, self.d_model, 2).float() * (-torch.log(torch.tensor(10000.0)) / self.d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        
        pe = pe.unsqueeze(0).to(x.device)
        x = x + pe
        return x

# Multi-Head Attention
class MultiHeadAttention(nn.Module):
    def __init__(self, d_model, nhead):
        super(MultiHeadAttention, self).__init__()
        self.nhead = nhead
        self.head_dim = d_model // nhead
        self.qkv_linear = nn.Linear(d_model, d_model * 3)
        self.fc = nn.Linear(d_model, d_model)
        self.scale = self.head_dim ** -0.5

    def forward(self, x):
        batch_size, seq_len, _ = x.size()
        qkv = self.qkv_linear(x).view(batch_size, seq_len, self.nhead, -1).transpose(1, 2)
        q, k, v = qkv.chunk(3, dim=-1)
        attn_output = torch.matmul(q, k.transpose(-1, -2)) * self.scale
        attn_output = torch.softmax(attn_output, dim=-1)
        attn_output = torch.matmul(attn_output, v)
        attn_output = attn_output.transpose(1, 2).contiguous().view(batch_size, seq_len, -1)
        attn_output = self.fc(attn_output)
        return attn_output

# Feedforward Network
class FeedForwardNetwork(nn.Module):
    def __init__(self, d_model, dim_feedforward):
        super(FeedForwardNetwork, self).__init__()
        self.fc1 = nn.Linear(d_model, dim_feedforward)
        self.fc2 = nn.Linear(dim_feedforward, d_model)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

    
# Transformer Block
class TransformerBlock(nn.Module):
    def __init__(self, d_model, nhead, dim_feedforward):
        super(TransformerBlock, self).__init__()
        self.mha = MultiHeadAttention(d_model, nhead)
        self.ffn = FeedForwardNetwork(d_model, dim_feedforward)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(0.1)
    def forward(self, x):
        attn_output = self.mha(x)
        x = self.norm1(x + self.dropout(attn_output))
        ffn_output = self.ffn(x)
        x = self.norm2(x + self.dropout(ffn_output))
        return x

# Transformer Model
class TransformerModel(nn.Module):
    def __init__(self, vocab_size, d_model, nhead, num_layers, dim_feedforward, max_seq_len):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        # self.pos_encoding = PositionalEncoding(d_model, max_seq_len)
        self.pos_encoding = PositionalEncoding(d_model)
        self.transformer_blocks = nn.ModuleList([TransformerBlock(d_model, nhead, dim_feedforward) for _ in range(num_layers)])
        self.fc = nn.Linear(d_model, vocab_size)
    def forward(self, x):
        x = self.embedding(x)
        x = self.pos_encoding(x)
        for block in self.transformer_blocks:
            x = block(x)
        x = self.fc(x)
        return x
# class TransformerModel(nn.Module):
#     def __init__(self, vocab_size, d_model, nhead, num_layers, dim_feedforward):
#         super(TransformerModel, self).__init__()
#         self.embedding = nn.Embedding(vocab_size, d_model)
#         self.pos_encoding = PositionalEncoding(d_model)
#         self.transformer_blocks = nn.ModuleList([TransformerBlock(d_model, nhead, dim_feedforward) for _ in range(num_layers)])
#         self.fc = nn.Linear(d_model, vocab_size)

#     def forward(self, x):
#         x = self.embedding(x)
#         x = self.pos_encoding(x)
#         for block in self.transformer_blocks:
#             x = block(x)
#         x = self.fc(x)
#         return x


model = TransformerModel(vocab_size, d_model, nhead, num_layers, dim_feedforward, max_seq_len)
# model = TransformerModel(vocab_size, d_model, nhead, num_layers, dim_feedforward)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Prepare input and target tensors
input_data = torch.tensor(encoded_sentences[:-1], dtype=torch.long)
target_data = torch.tensor(encoded_sentences[1:], dtype=torch.long)

# Training
num_epochs = 200
for epoch in range(num_epochs):
    optimizer.zero_grad()
    output = model(input_data)
    loss = criterion(output.view(-1, vocab_size), target_data.view(-1))
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 20 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [20/200], Loss: 3.4442
Epoch [40/200], Loss: 3.2071
Epoch [60/200], Loss: 2.9845
Epoch [80/200], Loss: 2.7494
Epoch [100/200], Loss: 2.5668
Epoch [120/200], Loss: 2.3766
Epoch [140/200], Loss: 2.1408
Epoch [160/200], Loss: 1.9756
Epoch [180/200], Loss: 1.8454
Epoch [200/200], Loss: 1.6744


# ChachaGPT

In [13]:
import random

idx2word = {idx: word for word, idx in word2idx.items()}


def generate_text(model, start_token, max_length):
    model.eval()
    generated_text = [start_token]

    for _ in range(max_length):
        input_data = torch.tensor([generated_text], dtype=torch.long)
        output = model(input_data)
        next_token_idx = torch.argmax(output[0, -1, :]).item()
        generated_text.append(next_token_idx)

        if idx2word[next_token_idx] == "[SEP]":
            break

    return " ".join(idx2word[idx] for idx in generated_text if idx2word[idx] not in ["[CLS]", "[SEP]", "[PAD]"])

# Randomly sample a starting token from the vocabulary
random_start_token = random.choice(list(word2idx.values()))

# Generate text
generated_text = generate_text(model, random_start_token, max_length=30)
print("Generated text:", generated_text)


Generated text: due have with transformed experienced AI stock AI due experienced AI stock AI due experienced AI decline decline decline decline AI decline decline decline decline decline decline AI
