In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np

# Data preprocessing
def preprocess_data(filename):
    data = pd.read_csv(filename)
    sentences = data['Sentence'].values
    transformed_sentences = data['Transformed sentence'].values
    return sentences, transformed_sentences

# Define the Encoder
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input):
        embedded = self.embedding(input)
        output, hidden = self.gru(embedded.view(8, 1, -1))  # Assuming batch size of 1
        return hidden

    def predict(self, sentence):
        with torch.no_grad():
            input_tensor = sentence_to_tensor(sentence)
            encoder_hidden = self.forward(input_tensor)
            return encoder_hidden

# Define the Decoder
class Decoder(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output, hidden = self.gru(output, hidden)
        output = self.out(output[0])
        return output, hidden

    def predict(self, encoder_hidden):
        with torch.no_grad():
            decoder_input = torch.tensor([[SOS_token]], device=device)
            decoder_hidden = encoder_hidden
            decoded_sentence = []
            for _ in range(MAX_LENGTH):
                decoder_output, decoder_hidden = self.forward(decoder_input, decoder_hidden)
                topv, topi = decoder_output.topk(1)
                if topi.item() == EOS_token:
                    break
                decoded_sentence.append(index_to_char[topi.item()])
                decoder_input = topi.squeeze().detach()
            return ''.join(decoded_sentence)

# Helper functions for data preparation
def sentence_to_tensor(sentence):
    tensor = torch.zeros(len(sentence), dtype=torch.long)
    for i, char in enumerate(sentence):
        tensor[i] = char_to_index[char]
    return tensor

# Constants and hyperparameters
input_size = 26  # Number of lowercase English alphabets
output_size = 26
hidden_size = 1028
learning_rate = 0.01
n_epochs = 30
MAX_LENGTH = 8
SOS_token = 0
EOS_token = 1

# Define the device (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(torch.device)

# Load and preprocess data
sentences, transformed_sentences = preprocess_data("train_data.csv")

# Helper function to map characters to indices
def sentence_to_tensor(sentence):
    tensor = torch.zeros(len(sentence), dtype=torch.long)
    for i, char in enumerate(sentence):
        tensor[i] = char_to_index[char]
    return tensor

# Create dictionaries for character to index and index to character conversions
char_to_index = {char: i for i, char in enumerate('abcdefghijklmnopqrstuvwxyz')}
index_to_char = {i: char for i, char in enumerate('abcdefghijklmnopqrstuvwxyz')}
# Initialize the encoder and decoder
encoder = Encoder(input_size, hidden_size)
decoder = Decoder(hidden_size, output_size)

# Move models to the selected device
encoder.to(device)
decoder.to(device)

# Training loop
encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

for epoch in range(1, n_epochs + 1):
    total_loss = 0  # Track total loss for the epoch
    for sentence, target_sentence in zip(sentences, transformed_sentences):
        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()

        input_tensor = sentence_to_tensor(sentence)
        target_tensor = sentence_to_tensor(target_sentence)

        encoder_hidden = encoder(input_tensor)
        decoder_input = torch.tensor([[SOS_token]], device=device)
        decoder_hidden = encoder_hidden

        loss = 0
        for di in range(target_tensor.size(0)):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            loss += criterion(decoder_output, target_tensor[di].unsqueeze(0))

            # Prepare the next decoder input
            decoder_input = target_tensor[di].unsqueeze(0)

        loss.backward()
        encoder_optimizer.step()
        decoder_optimizer.step()

        total_loss += loss.item()

    if epoch % 1 == 0:
        avg_loss = total_loss / len(sentences)
        print(f'Epoch {epoch}/{n_epochs}, Loss: {avg_loss}')

# Save models
torch.save(encoder.state_dict(), 'encoder_model.pth')
torch.save(decoder.state_dict(), 'decoder_model.pth')

# Evaluation using the provided checker code
import checker

encoder.load_state_dict(torch.load('encoder_model.pth'))
decoder.load_state_dict(torch.load('decoder_model.pth'))

checker.evaluate(encoder, decoder)

<class 'torch.device'>
Epoch 1/30, Loss: 26.811435716084073
Epoch 2/30, Loss: 26.45773829678127
Epoch 3/30, Loss: 26.125793966565812
Epoch 4/30, Loss: 25.440044383730207
Epoch 5/30, Loss: 25.31776589039394
