<a href="https://colab.research.google.com/github/elangbijak4/LLM-Research/blob/main/Simple4_N_Enkoder_M_Dekoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [3]:
from gensim.models import Word2Vec
from gensim.utils import simple_preprocess

In [11]:
# Step 1: Train Word2Vec model
sentences = [
    ['hello', 'world'],
    ['hi', 'there'],
    ['how', 'are', 'you'],
    ['i', 'am', 'fine'],
    ['hello', 'how', 'are', 'you'],
    ['i', 'am', 'doing', 'well'],
    ['good', 'morning'],
    ['good', 'night'],
    ['this', 'is', 'a', 'sentence'],
    ['we', 'are', 'testing', 'the', 'model'],
    ['please', 'generate', 'something', 'interesting']
]

word2vec_model = Word2Vec(sentences, vector_size=32, window=5, min_count=1, workers=4)
word_vectors = word2vec_model.wv

# Prepare vocabulary and embedding matrix
vocab = list(word_vectors.key_to_index.keys())
vocab.append('<eos>')
vocab_size = len(vocab)
embed_dim = word_vectors.vector_size

word2idx = {word: idx for idx, word in enumerate(vocab)}
idx2word = {idx: word for idx, word in enumerate(vocab)}
embedding_matrix = torch.zeros((vocab_size, embed_dim))
for word, idx in word2idx.items():
    if word in word_vectors:
        embedding_matrix[idx] = torch.tensor(word_vectors[word])

# Step 2: Define Transformer model
class TransformerModel(nn.Module):
    def __init__(self, embed_dim, nhead, dim_feedforward, num_layers, output_dim):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
        self.transformer = nn.Transformer(
            d_model=embed_dim,
            nhead=nhead,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers,
            dim_feedforward=dim_feedforward
        )
        self.fc_out = nn.Linear(embed_dim, output_dim)

    def forward(self, src, tgt):
        src = self.embedding(src)
        tgt = self.embedding(tgt)
        src = src.permute(1, 0, 2)  # (seq_len, batch_size, embed_dim)
        tgt = tgt.permute(1, 0, 2)  # (seq_len, batch_size, embed_dim)
        output = self.transformer(src, tgt)
        output = output.permute(1, 0, 2)  # (batch_size, seq_len, embed_dim)
        output = self.fc_out(output)
        return output

# Hyperparameters
nhead = 2
dim_feedforward = 128
num_layers = 4  # Increased number of layers
output_dim = vocab_size
seq_length = 5
batch_size = 2
num_epochs = 100
learning_rate = 0.001

# Dummy data for training
def encode_sentence(sentence, word2idx, seq_length):
    return [word2idx.get(word, word2idx['<eos>']) for word in sentence] + [word2idx['<eos>']] * (seq_length - len(sentence))

src_sentences = [
    ['hello', 'how', 'are', 'you'],
    ['i', 'am', 'doing', 'well']
]

tgt_sentences = [
    ['how', 'are', 'you', 'doing'],
    ['am', 'doing', 'well', 'today']
]

src = torch.tensor([encode_sentence(sent, word2idx, seq_length) for sent in src_sentences])
tgt = torch.tensor([encode_sentence(sent, word2idx, seq_length) for sent in tgt_sentences])
target_output = tgt.clone()

# Model, loss function, and optimizer
model = TransformerModel(embed_dim, nhead, dim_feedforward, num_layers, output_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    output = model(src, tgt[:, :-1])
    loss = criterion(output.reshape(-1, output_dim), target_output[:, 1:].reshape(-1))
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Inference (autoregressive decoding)
def generate_sentence(model, prompt, max_length, word2idx, idx2word):
    model.eval()
    with torch.no_grad():
        src = torch.tensor([encode_sentence(prompt, word2idx, max_length)])
        tgt = torch.zeros((1, max_length), dtype=torch.long)
        for i in range(max_length):
            output = model(src, tgt[:, :i+1])
            next_word_idx = output.argmax(dim=-1)[:, i].item()
            tgt[0, i] = next_word_idx
            if idx2word[next_word_idx] == '<eos>':
                break
        generated_sentence = [idx2word[idx.item()] for idx in tgt[0] if idx2word[idx.item()] != '<eos>']
    return ' '.join(generated_sentence)

prompt = ['hello']
generated_sentence = generate_sentence(model, prompt, seq_length, word2idx, idx2word)
print("Generated sentence:", generated_sentence)

Epoch [10/100], Loss: 2.0835
Epoch [20/100], Loss: 1.6237
Epoch [30/100], Loss: 1.1716
Epoch [40/100], Loss: 0.7045
Epoch [50/100], Loss: 0.5762
Epoch [60/100], Loss: 0.4195
Epoch [70/100], Loss: 0.2741
Epoch [80/100], Loss: 0.2318
Epoch [90/100], Loss: 0.1654
Epoch [100/100], Loss: 0.1454
Generated sentence: you you you you you
