**trishit code BART**

In [3]:
import pandas as pd
import torch
from transformers import BartTokenizer, BartForConditionalGeneration
from torch.utils.data import DataLoader, Dataset
import numpy as np


# Load data
train_data = pd.read_excel('ArithOps_Train.xlsx')
valid_data = pd.read_excel('ArithOps_Validation.xlsx')

ids = {}
inv_ids = {}
for i in range(9):
    ids["number"+str(i)] = '<extra_id_'+str(i+1)+'>'
    inv_ids['<extra_id_'+str(i+1)+'>'] = "number"+str(i)
i+=1
for sym in ['+', '-', '*', '/', '%']:
    ids[sym] = '<extra_id_'+str(i+1)+'>'
    inv_ids['<extra_id_'+str(i+1)+'>'] = sym


def prepare_input_output(df, ids):
    def replaceT5ids(s, joiner=' '):
        s_list = s.split(' ')
        for i, w in enumerate(s_list):
            if w in ids:
                s_list[i] = ids[w]
        return joiner.join(s_list)
    inputs = []
    outputs = []
    for idx, row in df.iterrows():
        desc = replaceT5ids(str(row['Description']))
        ques = replaceT5ids(str(row['Question']))

        eq = replaceT5ids(str(row['Equation']), joiner='')

        input_text = desc+' <extra_id_0> '+ques
        output_text = eq
        inputs.append(input_text)
        outputs.append(output_text)
    return inputs, outputs

train_inputs, train_outputs = prepare_input_output(train_data, ids)
valid_inputs, valid_outputs = prepare_input_output(valid_data, ids)
valid_expected_outputs = valid_data['Output'].astype(str).tolist()



In [5]:
train_outputs[0]

'<extra_id_10><extra_id_1><extra_id_2>'

In [6]:

from transformers import T5Tokenizer, T5ForConditionalGeneration

# 3. Create Dataset
class ArithmeticDataset(Dataset):
    tokenizer = T5Tokenizer.from_pretrained('t5-small')
    def __init__(self, inputs, outputs, max_in_length=512, max_out_length=512, test=False):
        self.inputs = inputs
        self.outputs = outputs
        self.test = test
        # self.
        self.max_in_length = max_in_length
        self.max_out_length = max_out_length

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        input_text = self.inputs[idx]
        output_text = self.outputs[idx]
        input_encoding = self.tokenizer(
            input_text,
            padding='max_length',
            truncation=True,
            max_length=self.max_in_length,
            return_tensors='pt',
        )
        output_encoding = self.tokenizer(
            output_text,
            padding='max_length',
            truncation=True,
            max_length=self.max_out_length,
            return_tensors='pt',
        )
        labels = output_encoding['input_ids']
        labels[labels == self.tokenizer.pad_token_id] = - 100  # Ignore padding tokens in the loss
        return {
            'input_ids': input_encoding['input_ids'].flatten(),
            'inp_attention_mask': input_encoding['attention_mask'].flatten(),
            'labels': labels.flatten(),
            'out_attention_mask': torch.zeros_like(output_encoding['attention_mask']).flatten(),
        }
    
    @classmethod
    def decode(cls, x):
        return cls.tokenizer.decode(
            x,
            truncation=True,
            return_tensors='pt',
        )

    @classmethod
    def encode(cls, x):
        return cls.tokenizer.encode(
            x,
            truncation=True,
            return_tensors='pt',
        )


# Create DataLoader
train_dataset = ArithmeticDataset(train_inputs, train_outputs, max_in_length=128, max_out_length=16, test=True)
valid_dataset = ArithmeticDataset(valid_inputs, valid_outputs, max_in_length=128, max_out_length=16, test=True)


You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [79]:
a = ArithmeticDataset(train_inputs, train_outputs)

# Input sentence
sentence = "The quick brown fox <extra_id_0> jumps over the lazy dog."
sentence2 = ""

# Encode the sentence
encoded = ArithmeticDataset.encode(sentence)

# Decode it back to a sentence

encoded2 = a.tokenizer.encode(
            sentence2,
            truncation=True,
            return_tensors='pt',)
decoded = ArithmeticDataset.decode(encoded2[0])

print(f"Original Sentence: {sentence}")
print(f"Encoded Tokens: {encoded}")
print(f"Encoded2 Tokens: {encoded2}")
print(f"Decoded Sentence: {decoded}")

Original Sentence: The quick brown fox <extra_id_0> jumps over the lazy dog.
Encoded Tokens: tensor([[   37,  1704,  4216,     3, 20400, 32099,  4418,     7,   147,     8,
         19743,  1782,     5,     1]])
Encoded2 Tokens: tensor([[1]])
Decoded Sentence: </s>


In [75]:
def decode_model_logit(y):
    out = []
    for i in range(y.shape[0]):
        stp = i
        if y[i].item()==1:
            break
    sen = ArithmeticDataset.decode(y[:stp])
    return (sen)
        
decode_model_logit(a.tokenizer.encode(
    sentence,
    padding='max_length',
    truncation=True,
    max_length=512,
    return_tensors='pt',
)[0])


'The quick brown fox <extra_id_0> jumps over the lazy dog.'

In [87]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32)

# Initialize tokenizer and model
model_name = 't5-small'
model = T5ForConditionalGeneration.from_pretrained(model_name)
model.load_state_dict(torch.load('model.pth'))

# Training settings
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5, weight_decay=0.02)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
epochs = 700

# Function to calculate loss for validation set
def evaluate_model(model, data_loader, device):
    model.eval()
    total_loss = 0
    output_logits = []

    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['inp_attention_mask'].to(device)
            print(attention_mask.shape)
            labels = batch['labels'].to(device)
            print(labels.shape)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=torch.zeros_like(labels).to(device))
            loss = outputs.loss
            output_logits.append((torch.argmax(outputs.logits, dim = -1).detach().cpu().numpy(), labels.detach().cpu().numpy()))
            total_loss += loss.item()

    return total_loss / len(data_loader), output_logits


avg_loss, output_logits = evaluate_model(model, valid_loader, device)

  model.load_state_dict(torch.load('model.pth'))


torch.Size([32, 128])
torch.Size([32, 16])
torch.Size([32, 128])
torch.Size([32, 16])
torch.Size([32, 128])
torch.Size([32, 16])
torch.Size([32, 128])
torch.Size([32, 16])
torch.Size([32, 128])
torch.Size([32, 16])
torch.Size([32, 128])
torch.Size([32, 16])
torch.Size([7, 128])
torch.Size([7, 16])


In [88]:
pred, lbl = output_logits[0]
for i in range(pred.shape[0]):
    print('label: ', decode_model_logit(lbl[i]))
    print('pred: ', decode_model_logit(pred[i]))
    print()

label:  + <extra_id_1> <extra_id_2>
pred:  

label:  + <extra_id_1> <extra_id_2>
pred:  + + + + + + + + + + + + + + +

label:  - <extra_id_1> <extra_id_2>
pred:  

label:  - <extra_id_1> <extra_id_2>
pred:  

label:  - <extra_id_1> <extra_id_2>
pred:  

label:  - <extra_id_1> <extra_id_2>
pred:  

label:  - <extra_id_1> <extra_id_2>
pred:  

label:  - <extra_id_1> <extra_id_2>
pred:  

label:  - <extra_id_1> <extra_id_2>
pred:  

label:  - <extra_id_1> <extra_id_2>
pred:  

label:  - <extra_id_1> <extra_id_2>
pred:  

label:  - <extra_id_1> <extra_id_2>
pred:  

label:  - <extra_id_1> <extra_id_2>
pred:  

label:  - <extra_id_1> <extra_id_2>
pred:  

label:  + <extra_id_1> <extra_id_2>
pred:  + + + + + + + + + + + + + + +

label:  + <extra_id_1> <extra_id_2>
pred:  + + + + + + + + + + + + + + +

label:  - <extra_id_1> <extra_id_2>
pred:  

label:  + <extra_id_1> <extra_id_2>
pred:  + + + + + + + + + + + + + + +

label:  + <extra_id_1> <extra_id_2>
pred:  

label:  - <extra_id_1> <extra

In [None]:
# Training loop
min_loss = 1e100
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")
    model.train()
    train_loss = 0

    for batch in train_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        optimizer.zero_grad()
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
    
    avg_train_loss = train_loss / len(train_loader)
    avg_val_loss = evaluate_model(model, valid_loader, device)
    
    if avg_val_loss<min_loss:
        min_loss = avg_val_loss
        torch.save(model.state_dict(), 'model.pth')

    print(f"Train loss: {avg_train_loss}")
    print(f"Validation loss: {avg_val_loss}")


In [119]:
labels

tensor([[ 1768, 32098, 32097,     1,  -100,  -100,  -100,  -100,  -100,  -100,
          -100,  -100,  -100,  -100,  -100,  -100],
        [    3,    18, 32098, 32097,     1,  -100,  -100,  -100,  -100,  -100,
          -100,  -100,  -100,  -100,  -100,  -100],
        [ 1636, 32098, 32097, 32096,     1,  -100,  -100,  -100,  -100,  -100,
          -100,  -100,  -100,  -100,  -100,  -100],
        [    3,    18, 32098, 32097,     1,  -100,  -100,  -100,  -100,  -100,
          -100,  -100,  -100,  -100,  -100,  -100],
        [ 1768, 32097, 32098,     1,  -100,  -100,  -100,  -100,  -100,  -100,
          -100,  -100,  -100,  -100,  -100,  -100],
        [    3,    87, 32098, 32097,     1,  -100,  -100,  -100,  -100,  -100,
          -100,  -100,  -100,  -100,  -100,  -100],
        [ 1768, 32098, 32097,     1,  -100,  -100,  -100,  -100,  -100,  -100,
          -100,  -100,  -100,  -100,  -100,  -100],
        [ 1429, 32098, 32097,     1,  -100,  -100,  -100,  -100,  -100,  -100,
    

In [124]:
outp = torch.argmax(outputs.logits, dim = -1)

for i in range(outp.size(0)):
    print("pred: ", a.tokenizer.decode(
            outp[i],
            truncation=True,
            return_tensors='pt',
        ))
# for i in range(labels.size(0)):

    print("lebl: ", a.tokenizer.decode(
            labels[i][:4],
            truncation=True,
            return_tensors='pt',
        ))
    print()

pred:  + <extra_id_1> <extra_id_2> </s></s>+  + + + + + + + + +
lebl:  + <extra_id_1> <extra_id_2> </s>

pred:  - <extra_id_1> <extra_id_2> </s></s></s>+ +    *  +
lebl:  - <extra_id_1> <extra_id_2>

pred:  <extra_id_1> <extra_id_2> <extra_id_3> </s></s>+      +
lebl:  -- <extra_id_1> <extra_id_2> <extra_id_3>

pred:  - <extra_id_1> <extra_id_2> </s></s>--
lebl:  - <extra_id_1> <extra_id_2>

pred:  <extra_id_1> <extra_id_1> </s></s>+  *   +
lebl:  + <extra_id_2> <extra_id_1> </s>

pred:  / <extra_id_1> <extra_id_2> </s></s></s> </s>
lebl:  / <extra_id_1> <extra_id_2>

pred:  <extra_id_1> <extra_id_2> </s></s>+    +     +
lebl:  + <extra_id_1> <extra_id_2> </s>

pred:  * <extra_id_1> <extra_id_2> </s></s>* * *  * * * *  * *
lebl:  * <extra_id_1> <extra_id_2> </s>

pred:  + <extra_id_1> <extra_id_2> </s></s>+ +  +  + + +
lebl:  + <extra_id_1> <extra_id_2> </s>

pred:  - <extra_id_1> <extra_id_3> </s></s>*  *      * *
lebl:  / <extra_id_2> <extra_id_1>

pred:  * <extra_id_1> <extra_id_2> 

In [None]:


def evaluate_prefix(expressions):
    # Split the expression into tokens
    evals = []
    for expression in expressions:
        tokens = expression.split()

        # Reverse the tokens to process them from right to left
        tokens = tokens[::-1]

        # Create an empty stack
        stack = []

        # Iterate through each token
        for token in tokens:
            if token.isdigit():  # If it's a number, push it onto the stack
                stack.append(int(token))
            else:
                # The token is an operator, pop two operands from the stack
                operand1 = stack.pop()
                operand2 = stack.pop()

                # Perform the operation based on the token
                if token == '+':
                    result = operand1 + operand2
                elif token == '-':
                    result = operand1 - operand2
                elif token == '*':
                    result = operand1 * operand2
                elif token == '/':
                    result = operand1 / operand2

                # Push the result back onto the stack
                stack.append(result)

            # The final result will be the only element left in the stack
        evals.append(stack.pop())
    
    return evals


# Save predictions on validation set
def predict_on_validation(model, data_loader, tokenizer, device):
    model.eval()
    predictions = []
    pred_evals = []

    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)

            outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, max_length=128)
            pred_texts = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

            predictions.extend(pred_texts)
            pred_evals.extend(evaluate_prefix(pred_texts))

    return predictions, pred_evals

# # Get predictions on validation set
# val_predictions, val_evals = predict_on_validation(model, valid_loader, tokenizer, device)

# # Add predictions to the validation dataframe and save to Excel
# valid_data['Predicted Equation'] = val_predictions
# valid_data['Output'] = val_evals
# valid_data.to_excel('Validation_Predictions.xlsx', index=False)