In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from spacy import load
import en_core_web_sm


In [3]:
# Load the CSV file
df = pd.read_csv("C:/Users/anjan/Downloads/tense_prediction.csv")

In [8]:
df.head()

Unnamed: 0,sentence,tense
0,I am eating breakfast,present
1,She will go to the park,future
2,They played soccer yesterday,past
3,I will be going to the concert,future
4,She is eating lunch now,present


In [9]:
# Load the spaCy model
nlp = en_core_web_sm.load()

In [10]:
# Preprocess the data
class SentenceTenseDataset(Dataset):
    def __init__(self, df):
        self.sentences = df['sentence'].tolist()
        self.tenses = df['tense'].tolist()
        self.vocab = sorted(list(set([token.text for sentence in self.sentences for token in nlp(sentence)])))
        self.vocab_size = len(self.vocab)
        self.word2idx = {word: i for i, word in enumerate(self.vocab)}
        self.idx2word = {i: word for i, word in enumerate(self.vocab)}

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        sentence = self.sentences[idx]
        tense = self.tenses[idx]
        input_ids = [self.word2idx[token.text] for token in nlp(sentence)]
        target_ids = [self.word2idx[token.text] for token in nlp(tense)]
        return input_ids, target_ids

In [11]:
# Define the encoder-decoder model
class EncoderDecoder(nn.Module):
    def __init__(self, vocab_size, embedding_size, hidden_size):
        super(EncoderDecoder, self).__init__()
        self.encoder = nn.LSTM(input_size=embedding_size, hidden_size=hidden_size, bidirectional=True)
        self.decoder = nn.LSTM(input_size=embedding_size, hidden_size=hidden_size)
        self.attention = nn.Linear(hidden_size * 2, 1)
        self.output = nn.Linear(hidden_size * 2, vocab_size)
        self.embedding = nn.Embedding(vocab_size, embedding_size)

    def forward(self, input_ids, target_ids):
        # Encoder
        embedded = self.embedding(input_ids)
        encoder_outputs, (hidden, cell) = self.encoder(embedded)

        # Decoder with Attention
        decoder_input = torch.tensor([[self.word2idx['<start>']]] * len(input_ids), device=input_ids.device)
        decoder_hidden = (hidden, cell)
        output = []
        for i in range(len(target_ids)):
            decoder_output, decoder_hidden = self.decoder(self.embedding(decoder_input), decoder_hidden)
            attention_weights = torch.softmax(self.attention(torch.cat((decoder_output, encoder_outputs), dim=2)), dim=1)
            context = torch.sum(attention_weights * encoder_outputs, dim=1)
            logits = self.output(torch.cat((decoder_output, context), dim=2))
            _, predicted = torch.max(logits, 2)
            output.append(predicted)
            decoder_input = predicted
        return torch.stack(output, dim=1).squeeze(2)


In [15]:
# Train the model
dataset = SentenceTenseDataset(df)
model = EncoderDecoder(vocab_size=dataset.vocab_size, embedding_size=128, hidden_size=256)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

KeyError: 'sentence'

In [None]:
for epoch in range(10):
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
    for input_ids, target_ids in dataloader:
        optimizer.zero_grad()
        output = model(input_ids, target_ids)
        loss = criterion(output.view(-1, dataset.vocab_size), target_ids.view(-1))
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}: Loss = {loss.item()}')

# Predict and convert tense
def predict_tense(sentence):
    input_ids = [dataset.word2idx[token.text] for token in nlp(sentence)]
    input_ids = torch.tensor([input_ids], device=next(model.parameters()).device)
    output = model(input_ids, None)
    predicted_tense = [dataset.idx2word[idx.item()] for idx in output[0]]
    return ''.join(predicted_tense)

def convert_tense(sentence, target_tense):
    input_ids = [dataset.word2idx[token.text] for token in nlp(sentence)]
    input_ids = torch.tensor([input_ids], device=next(model.parameters()).device)
    target_tense_ids = [dataset.word2idx[token.text] for token in nlp(target_tense)]
    target_tense_ids = torch.tensor([target_tense_ids], device=next(model.parameters()).device)
    output = model(input_ids, target_tense_ids)
    converted_words = [dataset.idx2word[idx.item()] for idx in output[0]]
    return ' '.join(converted_words)

# Example usage
input_sentence = "The dog chases the cat."
target_tense = "future"

predicted_tense = predict_tense(input_sentence)
print(f"Predicted tense: {predicted_tense}")

converted_sentence = convert_tense(input_sentence, target_tense)
print(f"Converted sentence: {converted_sentence}")