This Entire Pipeline will 
1. Preprocess the .json file given by the user & Save them
2. Use our already generated fasttext_embeddings_25.pkl from the vocab.txt (this two are computed priorly )
3. Load our best baseline model `RNN_Local_Context.pth` and comupte the final Accuracy .

##### Run in `base` environement 

In [3]:
import json
import torch
import pickle
import string
import torch.nn.functional as F
from nltk.tokenize import word_tokenize
from torch.utils.data import DataLoader, Dataset
from Requirements.RNN_Local_Context_Class import (
    RNNClassifierWithAttention,
)  # Import trained model



Preprocessing Code (Step 1)

In [4]:
# Step 1: Preprocess JSON File
def preprocess_json(input_path, output_path):
    with open(input_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    processed_data = []
    punctuation_set = set(string.punctuation)

    for row in data:
        sentence = row["sentence"]
        tokens = word_tokenize(sentence)
        tokens = [token for token in tokens if token not in punctuation_set]
        aspect_terms = row["aspect_terms"]
        unique_aspect_terms = set()

        for each_aspect in aspect_terms:
            term = each_aspect["term"]
            polarity = each_aspect["polarity"]
            index = int(each_aspect["from"])  # Ensure index is captured correctly
            if term in unique_aspect_terms:
                continue
            unique_aspect_terms.add(term)
            term_tokens = word_tokenize(term)

            processed_data.append(
                {
                    "tokens": tokens,
                    "polarity": polarity,
                    "aspect_terms": [term],
                    "index": index,
                }
            )

    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(processed_data, f, indent=4)

Load Embeddings (Step - 2)

In [5]:
# Step 2: Load Pre-trained Embeddings
with open("Requirements/fasttext_embeddings_25.pkl", "rb") as f:
    embeddings = pickle.load(f)


def get_embedding(word):
    return torch.tensor(embeddings.get(word, torch.zeros(25)), dtype=torch.float32)

Define Dataset Class & Load Model

In [6]:
# Step 3: Define Dataset Class with Padding Fix
class ABSADataset(Dataset):
    def __init__(self, file_path):
        with open(file_path, "r", encoding="utf-8") as f:
            self.data = json.load(f)

        self.polarity_map = {"positive": 0, "negative": 1, "neutral": 2, "conflict": 3}

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        tokens = self.data[idx]["tokens"]
        polarity = self.data[idx]["polarity"]
        polarity_label = self.polarity_map.get(polarity, -1)
        if polarity_label == -1:
            raise ValueError(f"Unexpected polarity value: {polarity}")
        token_embeddings = [get_embedding(token) for token in tokens]
        return torch.stack(token_embeddings), torch.tensor(
            polarity_label, dtype=torch.long
        )


# Step 4: Define Padding Function for Dataloader
def collate_fn(batch):
    sequences, labels = zip(*batch)
    max_len = max(seq.shape[0] for seq in sequences)
    padded_sequences = [
        F.pad(seq, (0, 0, 0, max_len - seq.shape[0])) for seq in sequences
    ]
    return torch.stack(padded_sequences), torch.tensor(labels, dtype=torch.long)

Load our Pretrained Baseline Model

In [7]:
# Step 5: Process Input File & Load Data
input_json = "val.json"  # Replace with the actual file provided
processed_json = "processed_data.json"
preprocess_json(input_json, processed_json)

dataset = ABSADataset(processed_json)
dataloader = DataLoader(dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)

# Step 6: Load Trained Model
model = RNNClassifierWithAttention()
model.load_state_dict(torch.load("RNN_Local_Context.pth"))
model.eval()

RNNClassifierWithAttention(
  (rnn): RNN(25, 64, batch_first=True)
  (attention): Attention(
    (attention): Linear(in_features=64, out_features=1, bias=True)
  )
  (fc1): Linear(in_features=64, out_features=32, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=32, out_features=4, bias=True)
)

Validation & Accuracy Calculation

In [8]:
# Step 7: Run Inference & Compute Accuracy
correct, total = 0, 0
with torch.no_grad():
    for inputs, labels in dataloader:
        outputs = model(inputs)
        predictions = torch.argmax(outputs, dim=1)
        correct += (predictions == labels).sum().item()
        total += labels.size(0)

accuracy = correct / total * 100
print(f"Final Accuracy: {accuracy:.2f}%")

Final Accuracy: 56.65%


  return torch.tensor(embeddings.get(word, torch.zeros(25)), dtype=torch.float32)
