In [1]:
from google.colab import files
uploaded = files.upload()

Saving train.parquet to train.parquet
Saving test_without_label.parquet to test_without_label.parquet


In [2]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting datasets>=2.0.0 (from evaluate)
  Downloading datasets-3.4.0-py3-none-any.whl.metadata (19 kB)
Collecting dill (from evaluate)
  Downloading dill-0.3.9-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from evaluate)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from evaluate)
  Downloading multiprocess-0.70.17-py311-none-any.whl.metadata (7.2 kB)
Collecting dill (from evaluate)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting multiprocess (from evaluate)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading datasets-3.4.0-py3-none-any.whl (487 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
import pandas as pd
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import evaluate
import os
from torch.cuda.amp import autocast, GradScaler
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

In [21]:
train_df = pd.read_parquet('train.parquet')
test_df = pd.read_parquet('test_without_label.parquet')
train_df = train_df.sample(frac=0.5, random_state=42).reset_index(drop=True)

In [22]:
train_df.head(5)

Unnamed: 0,context,question,answer
0,Hans Bielenstein writes that as far back as th...,Who believed that they were the true Han Weste...,foreign officials
1,"In 1838, there was a flurry of entrepreneurial...",For what reason was asphalt used in the floori...,damp proofing
2,The first sulfonamide and first commercially a...,What company developed Prontosil?,IG Farben
3,The 1910 election saw 42 Labour MPs elected to...,How many MP were elected in the 1910 election?,42
4,"Ye Zhiping, the principal of Sangzao Middle Sc...",How many students attended the school?,2323


In [23]:
test_df.head(5)

Unnamed: 0,context,question,answer
63695,Perhaps the most famous raid by Oeselian pirat...,What important figure was killed in the raid?,?
80051,"Following a peak in growth in 1979, the Liberi...",In 2011 Liberia's economy was considered what?,?
32271,A plethora of anti-aircraft gun systems of sma...,The combat batteries of an Army AAA battalion ...,?
52439,Avicenna's legacy in classical psychology is p...,What subject is seen throughout Avicenna's Boo...,?
33889,"The desire to explore, record and systematize ...",In what year was Charles Burney's A General Hi...,?


In [25]:
train_data, val_data = train_test_split(train_df, test_size=0.2, random_state=42)
train_data = train_data.reset_index(drop=True)
val_data = val_data.reset_index(drop=True)

In [26]:
tokenizer = T5Tokenizer.from_pretrained("t5-base")
model = T5ForConditionalGeneration.from_pretrained("t5-base")

In [34]:
class QADataset(Dataset):
    def __init__(self, data, tokenizer, max_length=512):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        input_text = f"question: {row['question']} context: {row['context']}"
        encoding = self.tokenizer(
            input_text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        target_encoding = self.tokenizer(
            row['answer'],
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'labels': target_encoding['input_ids'].squeeze()
        }

In [35]:
train_dataset = QADataset(train_data, tokenizer)
val_dataset = QADataset(val_data, tokenizer)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)

In [36]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
scaler = GradScaler()

  scaler = GradScaler()


In [37]:
model.train()
for epoch in range(3):
    for batch in train_loader:
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        with autocast():
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

    print(f"Epoch {epoch + 1} Loss: {loss.item():.4f}")

  with autocast():


Epoch 1 Loss: 0.0026
Epoch 2 Loss: 0.0004
Epoch 3 Loss: 0.0002


In [38]:
def generate_answer(context, question):
    input_text = f"question: {question} context: {context}"
    inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True).to(device)
    outputs = model.generate(**inputs)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [39]:
y_true = []
y_pred = []

for idx in range(len(val_data)):
    predicted_answer = generate_answer(val_data.iloc[idx]['context'], val_data.iloc[idx]['question'])
    true_answer = val_data.iloc[idx]['answer']
    y_pred.append(predicted_answer)
    y_true.append(true_answer)

accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted', zero_division=1)
recall = recall_score(y_true, y_pred, average='weighted', zero_division=1)
f1 = f1_score(y_true, y_pred, average='weighted', zero_division=1)

In [40]:
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Accuracy: 0.6455
Precision: 0.9910
Recall: 0.6455
F1 Score: 0.6439


In [41]:
test_df['answer'] = test_df.apply(lambda x: generate_answer(x['context'], x['question']), axis=1)

In [45]:
test_df.to_parquet("DS8008_assign2_AhnafShahriyar_501314107.parquet", index=False)

In [46]:
soln_df = pd.read_parquet('DS8008_assign2_AhnafShahriyar_501314107.parquet')
soln_df.head(10)

Unnamed: 0,context,question,answer
0,Perhaps the most famous raid by Oeselian pirat...,What important figure was killed in the raid?,the Swedish archbishop Johannes
1,"Following a peak in growth in 1979, the Liberi...",In 2011 Liberia's economy was considered what?,one of the 20 fastest growing in the world
2,A plethora of anti-aircraft gun systems of sma...,The combat batteries of an Army AAA battalion ...,kilometers
3,Avicenna's legacy in classical psychology is p...,What subject is seen throughout Avicenna's Boo...,classical psychology
4,"The desire to explore, record and systematize ...",In what year was Charles Burney's A General Hi...,1776
5,Gaddafi briefly studied History at the Univers...,How did Gaddafi view the influence of the mili...,he refused to learn English and was rude to th...
6,"Following the 1967 war and the ""three nos"" res...",What did the ELO commit itself on?,"""armed struggle as the only way to liberate th..."
7,"In the field of music, Germany claims some of ...",Who marked the switch from classical to romant...,"Bach, Mozart and Beethoven"
8,The University of Bath and Bath Spa University...,When did the university of Bath gain royal cha...,The University of Bath gained its Royal Charte...
9,Economic reform efforts continued with the sup...,When did military conflict resume in the Congo?,December 1998
