<a href="https://colab.research.google.com/github/jmnj2003/dl-project/blob/main/v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install transformers datasets sentence-transformers tqdm matplotlib PyPDF2 python-pptx

Looking in indexes: https://download.pytorch.org/whl/cu118
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting python-pptx
  Downloading python_pptx-1.0.2-py3-none-any.whl.metadata (2.5 kB)
Collecting XlsxWriter>=0.5.7 (from python-pptx)
  Downloading xlsxwriter-3.2.9-py3-none-any.whl.metadata (2.7 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading python_pptx-1.0.2-py3-none-any.whl (472 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m472.8/472.8 kB[0m [31m21.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading xlsxwriter-3.2.9-py3-none-any.whl (175 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m175.3/175.3 kB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: XlsxWriter, PyPDF2, python-pptx
Successfully installed PyPDF2-3.0.1 XlsxWriter-3.2.9

In [2]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from transformers import (BartTokenizer, BartForConditionalGeneration,
                         T5Tokenizer, T5ForConditionalGeneration,
                         get_linear_schedule_with_warmup)
from sentence_transformers import SentenceTransformer, util
from datasets import load_dataset
import PyPDF2
from pptx import Presentation
import re, random, json, warnings, os
import numpy as np
from tqdm import tqdm
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns

warnings.filterwarnings('ignore')
os.makedirs('./models', exist_ok=True)
os.makedirs('./results', exist_ok=True)

print("=" * 80)
print("COMBINED BART & T5 QUESTION GENERATION SYSTEM")
print("=" * 80)
print("\nV Setup complete")

COMBINED BART & T5 QUESTION GENERATION SYSTEM

V Setup complete


In [3]:
"""
Dataset preparation for both models
"""

class QDataset(Dataset):
    def __init__(self, ctx, q, ans, tokenizer, max_len=512):
        self.ctx, self.q, self.ans, self.tok = ctx, q, ans, tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.ctx)

    def __getitem__(self, idx):
        inp = f"generate question: context: {self.ctx[idx]} answer: {self.ans[idx]}"
        tgt = self.q[idx]

        inp_enc = self.tok(inp, max_length=self.max_len, padding='max_length',
                          truncation=True, return_tensors='pt')
        tgt_enc = self.tok(tgt, max_length=128, padding='max_length',
                          truncation=True, return_tensors='pt')

        labels = tgt_enc['input_ids'].squeeze()
        labels[labels == self.tok.pad_token_id] = -100

        return {
            'input_ids': inp_enc['input_ids'].squeeze(),
            'attention_mask': inp_enc['attention_mask'].squeeze(),
            'labels': labels
        }


def load_squad(n=20000, split='train'):
    print(f"\n[SQuAD] Loading {split} ({n} samples)...")
    ds = load_dataset('squad', split=split)
    ctx, q, ans = [], [], []

    for i, ex in enumerate(tqdm(ds, desc="SQuAD")):
        if i >= n: break
        if ex['answers']['text']:
            ctx.append(ex['context'])
            q.append(ex['question'])
            ans.append(ex['answers']['text'][0])

    print(f"V Loaded {len(ctx)} examples")
    return ctx, q, ans


def load_triviaqa(n=20000, split='train'):
    print(f"\n[TriviaQA] Loading {split} ({n} samples)...")
    ds = load_dataset('trivia_qa', 'rc.nocontext', split=split)
    ctx, q, ans = [], [], []

    for i, ex in enumerate(tqdm(ds, desc="TriviaQA")):
        if i >= n: break

        question = ex['question']
        answer = ex['answer']['value']
        context = "This question is about trivia knowledge."

        if 'search_results' in ex and ex['search_results']:
            sr = ex['search_results']
            if 'search_context' in sr and sr['search_context']:
                sc = sr['search_context']
                context = str(sc[0] if isinstance(sc, list) else sc)[:500]

        if context and answer and question:
            ctx.append(context)
            q.append(question)
            ans.append(answer)

    print(f"V Loaded {len(ctx)} examples")
    return ctx, q, ans

print("\nV Dataset classes defined")


V Dataset classes defined


In [4]:
"""
Enhanced BART with architectural modifications
"""

class EnhancedBART(nn.Module):
    def __init__(self, base='facebook/bart-base'):
        super().__init__()
        self.bart = BartForConditionalGeneration.from_pretrained(base)
        d = self.bart.config.d_model

        self.ctx_attn = nn.MultiheadAttention(d, 8, 0.1, batch_first=True)
        self.q_cls = nn.Sequential(nn.Linear(d, 512), nn.ReLU(), nn.Dropout(0.3),
                                    nn.Linear(512, 256), nn.ReLU(), nn.Dropout(0.2),
                                    nn.Linear(256, 4))
        self.refiner = nn.Sequential(nn.Linear(d, d), nn.LayerNorm(d),
                                      nn.ReLU(), nn.Dropout(0.1))

    def forward(self, input_ids, attention_mask, labels=None):
        enc = self.bart.model.encoder(input_ids, attention_mask)
        h = enc.last_hidden_state

        attn, _ = self.ctx_attn(h, h, h, ~attention_mask.bool())
        refined = self.refiner(attn + h)

        if labels is not None:
            out = self.bart(attention_mask=attention_mask,
                           encoder_outputs=(refined,), labels=labels)
            return out.loss
        return self.bart(attention_mask=attention_mask, encoder_outputs=(refined,))

print("\nV Enhanced BART model defined")


V Enhanced BART model defined


In [5]:
"""
T5 Model for question generation
"""

class T5QG:
    def __init__(self, model_name='t5-base'):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.tokenizer = T5Tokenizer.from_pretrained(model_name)
        self.model = T5ForConditionalGeneration.from_pretrained(model_name).to(self.device)
        print(f"\nV T5 loaded on {self.device}")

print("\nV T5 wrapper defined")


V T5 wrapper defined


In [6]:
"""
Training utilities for both models
"""

def train_model(model, tokenizer, device, ctx, q, ans, name, epochs=3, bs=8):
    print(f"\n{'='*80}")
    print(f"TRAINING {name.upper()}")
    print(f"{'='*80}")

    dataset = QDataset(ctx, q, ans, tokenizer)
    loader = DataLoader(dataset, batch_size=bs, shuffle=True)

    opt = AdamW(model.parameters(), lr=5e-5)
    sched = get_linear_schedule_with_warmup(opt, 500, len(loader)*epochs)

    model.train()
    losses = []

    for epoch in range(epochs):
        epoch_loss = 0
        pbar = tqdm(loader, desc=f"Epoch {epoch+1}/{epochs}")

        for batch in pbar:
            inp = batch['input_ids'].to(device)
            mask = batch['attention_mask'].to(device)
            labs = batch['labels'].to(device)

            if hasattr(model, 'bart'):
                loss = model(inp, mask, labs)
            else:
                out = model(input_ids=inp, attention_mask=mask, labels=labs)
                loss = out.loss

            opt.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            opt.step()
            sched.step()

            epoch_loss += loss.item()
            pbar.set_postfix({'loss': f'{loss.item():.4f}'})

        avg = epoch_loss / len(loader)
        losses.append(avg)
        print(f"V Epoch {epoch+1} Loss: {avg:.4f}")

    # Save
    os.makedirs(f'./models/{name}', exist_ok=True)
    if hasattr(model, 'bart'):
        torch.save(model.state_dict(), f'./models/{name}/model.pt')
    else:
        model.save_pretrained(f'./models/{name}')
        tokenizer.save_pretrained(f'./models/{name}')

    return losses


def evaluate_model(model, tokenizer, device, ctx, q, ans, n=100):
    model.eval()
    sbert = SentenceTransformer('all-MiniLM-L6-v2')
    generated = []

    for i in tqdm(range(min(n, len(ctx))), desc="Evaluating"):
        inp = f"generate question: context: {ctx[i]} answer: {ans[i]}"
        inputs = tokenizer(inp, max_length=512, padding='max_length',
                          truncation=True, return_tensors='pt').to(device)

        with torch.no_grad():
            if hasattr(model, 'bart'):
                out = model.bart.generate(inputs['input_ids'], max_length=128,
                                         num_beams=5, early_stopping=True)
            else:
                out = model.generate(inputs['input_ids'], max_length=128,
                                    num_beams=5, early_stopping=True)

        generated.append(tokenizer.decode(out[0], skip_special_tokens=True))

    # Metrics
    lengths = [len(g.split()) for g in generated]
    unique = len(set(generated)) / len(generated)

    embs = sbert.encode(generated)
    sims = []
    for i in range(len(embs)):
        for j in range(i+1, min(i+20, len(embs))):
            sims.append(util.cos_sim(embs[i], embs[j]).item())
    diversity = 1 - np.mean(sims) if sims else 1.0

    return {
        'avg_length': np.mean(lengths),
        'unique_ratio': unique,
        'diversity': diversity,
        'generated': generated
    }

print("\nV Training functions defined")


V Training functions defined


In [7]:
"""
Load all training and test data
"""

print("\n" + "="*80)
print("LOADING DATASETS (20K SAMPLES EACH)")
print("="*80)

# SQuAD
sq_train_ctx, sq_train_q, sq_train_ans = load_squad(20000, 'train')
sq_test_ctx, sq_test_q, sq_test_ans = load_squad(1000, 'validation')

# TriviaQA
tq_train_ctx, tq_train_q, tq_train_ans = load_triviaqa(20000, 'train')
tq_test_ctx, tq_test_q, tq_test_ans = load_triviaqa(1000, 'validation')

print("\n" + "="*80)
print("DATASET SUMMARY")
print("="*80)
print(f"SQuAD Train: {len(sq_train_ctx)} | Test: {len(sq_test_ctx)}")
print(f"TriviaQA Train: {len(tq_train_ctx)} | Test: {len(tq_test_ctx)}")


LOADING DATASETS (20K SAMPLES EACH)

[SQuAD] Loading train (20000 samples)...


README.md: 0.00B [00:00, ?B/s]

plain_text/train-00000-of-00001.parquet:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

plain_text/validation-00000-of-00001.par(…):   0%|          | 0.00/1.82M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/87599 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10570 [00:00<?, ? examples/s]

SQuAD:  23%|██▎       | 20000/87599 [00:01<00:05, 13382.57it/s]


V Loaded 20000 examples

[SQuAD] Loading validation (1000 samples)...


SQuAD:   9%|▉         | 1000/10570 [00:00<00:00, 12374.48it/s]


V Loaded 1000 examples

[TriviaQA] Loading train (20000 samples)...


README.md: 0.00B [00:00, ?B/s]

Resolving data files:   0%|          | 0/26 [00:00<?, ?it/s]

rc.nocontext/train-00000-of-00001.parque(…):   0%|          | 0.00/55.4M [00:00<?, ?B/s]

rc.nocontext/validation-00000-of-00001.p(…):   0%|          | 0.00/7.34M [00:00<?, ?B/s]

rc.nocontext/test-00000-of-00001.parquet:   0%|          | 0.00/1.20M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/138384 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/17944 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/17210 [00:00<?, ? examples/s]

TriviaQA:  14%|█▍        | 20000/138384 [00:03<00:20, 5738.76it/s]


V Loaded 20000 examples

[TriviaQA] Loading validation (1000 samples)...


Resolving data files:   0%|          | 0/26 [00:00<?, ?it/s]

TriviaQA:   6%|▌         | 1000/17944 [00:00<00:02, 5759.90it/s]

V Loaded 1000 examples

DATASET SUMMARY
SQuAD Train: 20000 | Test: 1000
TriviaQA Train: 20000 | Test: 1000





In [8]:
"""
Train BART on both datasets
"""

print("\n" + "="*80)
print("PART 1: BART TRAINING")
print("="*80)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
bart_tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')

# Train on SQuAD
print("\n[1/2] BART on SQuAD...")
bart_sq = EnhancedBART().to(device)
bart_sq_losses = train_model(bart_sq, bart_tokenizer, device,
                             sq_train_ctx, sq_train_q, sq_train_ans,
                             'bart_squad', epochs=3, bs=8)

# Evaluate
bart_sq_metrics = evaluate_model(bart_sq, bart_tokenizer, device,
                                 sq_test_ctx, sq_test_q, sq_test_ans, 100)
print(f"\nV BART-SQuAD Metrics:")
print(f"  Avg Length: {bart_sq_metrics['avg_length']:.2f}")
print(f"  Unique: {bart_sq_metrics['unique_ratio']:.2%}")
print(f"  Diversity: {bart_sq_metrics['diversity']:.4f}")

# Train on TriviaQA
print("\n[2/2] BART on TriviaQA...")
bart_tq = EnhancedBART().to(device)
bart_tq_losses = train_model(bart_tq, bart_tokenizer, device,
                             tq_train_ctx, tq_train_q, tq_train_ans,
                             'bart_triviaqa', epochs=3, bs=8)

# Evaluate
bart_tq_metrics = evaluate_model(bart_tq, bart_tokenizer, device,
                                 tq_test_ctx, tq_test_q, tq_test_ans, 100)
print(f"\nV BART-TriviaQA Metrics:")
print(f"  Avg Length: {bart_tq_metrics['avg_length']:.2f}")
print(f"  Unique: {bart_tq_metrics['unique_ratio']:.2%}")
print(f"  Diversity: {bart_tq_metrics['diversity']:.4f}")


PART 1: BART TRAINING


vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]


[1/2] BART on SQuAD...


model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]


TRAINING BART_SQUAD


Epoch 1/3: 100%|██████████| 2500/2500 [06:58<00:00,  5.97it/s, loss=2.5759]


V Epoch 1 Loss: 4.8026


Epoch 2/3: 100%|██████████| 2500/2500 [06:56<00:00,  6.01it/s, loss=2.3058]


V Epoch 2 Loss: 2.4783


Epoch 3/3: 100%|██████████| 2500/2500 [06:57<00:00,  5.99it/s, loss=1.9035]


V Epoch 3 Loss: 1.9306


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Evaluating: 100%|██████████| 100/100 [00:17<00:00,  5.65it/s]



V BART-SQuAD Metrics:
  Avg Length: 13.42
  Unique: 36.00%
  Diversity: 0.4036

[2/2] BART on TriviaQA...

TRAINING BART_TRIVIAQA


Epoch 1/3: 100%|██████████| 2500/2500 [06:44<00:00,  6.19it/s, loss=3.4016]


V Epoch 1 Loss: 5.0749


Epoch 2/3: 100%|██████████| 2500/2500 [06:42<00:00,  6.21it/s, loss=3.2359]


V Epoch 2 Loss: 3.0709


Epoch 3/3: 100%|██████████| 2500/2500 [06:42<00:00,  6.21it/s, loss=2.6135]


V Epoch 3 Loss: 2.7021


Evaluating: 100%|██████████| 100/100 [00:17<00:00,  5.60it/s]


V BART-TriviaQA Metrics:
  Avg Length: 11.13
  Unique: 29.00%
  Diversity: 0.7302





In [None]:
"""
Train T5 on both datasets
"""

print("\n" + "="*80)
print("PART 2: T5 TRAINING")
print("="*80)

t5_tokenizer = T5Tokenizer.from_pretrained('t5-base')

# Train on SQuAD
print("\n[1/2] T5 on SQuAD...")
t5_sq = T5ForConditionalGeneration.from_pretrained('t5-base').to(device)
t5_sq_losses = train_model(t5_sq, t5_tokenizer, device,
                           sq_train_ctx, sq_train_q, sq_train_ans,
                           't5_squad', epochs=3, bs=8)

# Evaluate
t5_sq_metrics = evaluate_model(t5_sq, t5_tokenizer, device,
                               sq_test_ctx, sq_test_q, sq_test_ans, 100)
print(f"\nV T5-SQuAD Metrics:")
print(f"  Avg Length: {t5_sq_metrics['avg_length']:.2f}")
print(f"  Unique: {t5_sq_metrics['unique_ratio']:.2%}")
print(f"  Diversity: {t5_sq_metrics['diversity']:.4f}")

# Train on TriviaQA
print("\n[2/2] T5 on TriviaQA...")
t5_tq = T5ForConditionalGeneration.from_pretrained('t5-base').to(device)
t5_tq_losses = train_model(t5_tq, t5_tokenizer, device,
                           tq_train_ctx, tq_train_q, tq_train_ans,
                           't5_triviaqa', epochs=3, bs=8)

# Evaluate
t5_tq_metrics = evaluate_model(t5_tq, t5_tokenizer, device,
                               tq_test_ctx, tq_test_q, tq_test_ans, 100)
print(f"\nV T5-TriviaQA Metrics:")
print(f"  Avg Length: {t5_tq_metrics['avg_length']:.2f}")
print(f"  Unique: {t5_tq_metrics['unique_ratio']:.2%}")
print(f"  Diversity: {t5_tq_metrics['diversity']:.4f}")


PART 2: T5 TRAINING


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565



[1/2] T5 on SQuAD...


model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]


TRAINING T5_SQUAD


Epoch 1/3: 100%|██████████| 2500/2500 [12:44<00:00,  3.27it/s, loss=1.3398]


V Epoch 1 Loss: 1.6819


Epoch 2/3: 100%|██████████| 2500/2500 [12:44<00:00,  3.27it/s, loss=1.7899]


V Epoch 2 Loss: 1.3461


Epoch 3/3: 100%|██████████| 2500/2500 [12:44<00:00,  3.27it/s, loss=1.5337]


V Epoch 3 Loss: 1.2358


Evaluating: 100%|██████████| 100/100 [00:31<00:00,  3.21it/s]



V T5-SQuAD Metrics:
  Avg Length: 8.82
  Unique: 38.00%
  Diversity: 0.4684

[2/2] T5 on TriviaQA...

TRAINING T5_TRIVIAQA


Epoch 1/3: 100%|██████████| 2500/2500 [12:33<00:00,  3.32it/s, loss=3.1508]


V Epoch 1 Loss: 3.3502


Epoch 2/3: 100%|██████████| 2500/2500 [12:33<00:00,  3.32it/s, loss=3.1237]


V Epoch 2 Loss: 3.0081


Epoch 3/3:  32%|███▏      | 795/2500 [03:59<08:33,  3.32it/s, loss=2.8849]

In [None]:
"""
Process PDF/PPT and extract facts
"""

class DocProcessor:
    @staticmethod
    def extract_pdf(path, skip_first=True):
        text = []
        with open(path, 'rb') as f:
            pdf = PyPDF2.PdfReader(f)
            start = 1 if skip_first else 0
            for i in range(start, len(pdf.pages)):
                page_text = pdf.pages[i].extract_text()
                if page_text and len(page_text) > 100:
                    text.append(page_text)
        return ' '.join(text)

    @staticmethod
    def clean_text(text):
        patterns = [r'Session\s+\d+', r'Slide\s+\d+', r'Topic\s+\d+',
                   r'LEARNING OUTCOMES', r'www\.\S+']
        for p in patterns:
            text = re.sub(p, '', text, flags=re.IGNORECASE)
        return re.sub(r'\s+', ' ', text).strip()

    @staticmethod
    def extract_facts(text):
        sentences = [s.strip() for s in re.split(r'[.!?]+', text)
                    if len(s.strip()) > 50]
        facts = []
        for sent in sentences:
            if any(w in sent.lower() for w in ['is', 'are', 'means', 'defined']):
                if 50 < len(sent) < 300:
                    words = sent.split()[:10]
                    topic = ' '.join([w for w in words if len(w) > 4][:3])
                    facts.append({'text': sent, 'topic': topic, 'explanation': sent})
        return facts

print("\nV Document processor defined")

In [None]:
"""
Generate questions using trained models
"""

def generate_mcq_from_facts(facts, model, tokenizer, device, n=5):
    questions = []
    model.eval()

    for i, fact in enumerate(facts[:n*2]):
        if len(questions) >= n: break

        inp = f"generate question: context: {fact['text']} answer: {fact['topic']}"
        inputs = tokenizer(inp, max_length=512, padding='max_length',
                          truncation=True, return_tensors='pt').to(device)

        with torch.no_grad():
            if hasattr(model, 'bart'):
                out = model.bart.generate(inputs['input_ids'], max_length=128, num_beams=5)
            else:
                out = model.generate(inputs['input_ids'], max_length=128, num_beams=5)

        q_text = tokenizer.decode(out[0], skip_special_tokens=True)

        correct = fact['explanation'][:150]
        others = [f for f in facts if f != fact]
        distractors = random.sample(others, min(3, len(others)))
        dist_texts = [d['explanation'][:150] for d in distractors]

        opts = [correct] + dist_texts[:3]
        random.shuffle(opts)
        correct_letter = chr(65 + opts.index(correct))

        questions.append({
            'type': 'MCQ',
            'question': q_text,
            'options': {'A': opts[0], 'B': opts[1], 'C': opts[2], 'D': opts[3]},
            'correct': correct_letter
        })

    return questions


def generate_tf_from_facts(facts, n=3):
    questions = []
    for i, fact in enumerate(facts[:n*2]):
        if len(questions) >= n: break
        is_true = (i % 2 == 0)
        stmt = fact['text']
        if not is_true:
            if ' is ' in stmt:
                stmt = stmt.replace(' is ', ' is not ', 1)
        questions.append({
            'type': 'T/F',
            'statement': stmt,
            'correct': 'TRUE' if is_true else 'FALSE'
        })
    return questions


def export_quiz(questions, filename):
    with open(filename, 'w', encoding='utf-8') as f:
        f.write("="*80 + "\n")
        f.write("GENERATED QUIZ\n")
        f.write("="*80 + "\n")
        f.write(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
        f.write(f"Total Questions: {len(questions)}\n")
        f.write("="*80 + "\n\n")

        for i, q in enumerate(questions, 1):
            f.write(f"\n{'='*80}\nQuestion {i} [{q['type']}]\n{'='*80}\n\n")

            if q['type'] == 'MCQ':
                f.write(f"Q: {q['question']}\n\n")
                for k, v in q['options'].items():
                    f.write(f"  {k}) {v}\n")
                f.write(f"\nCorrect: {q['correct']}\n")
            elif q['type'] == 'T/F':
                f.write(f"Statement: {q['statement']}\n\nCorrect: {q['correct']}\n")

            f.write("\n" + "-"*80 + "\n")

    print(f"\nV Quiz exported: {filename}")

print("\nV Question generation functions defined")

In [None]:
"""
Generate quizzes using best models
"""

print("\n" + "="*80)
print("PART 3: QUIZ GENERATION")
print("="*80)

# UPDATE THIS PATH
pdf_path = r"C:\Users\User\Downloads\y2s1\DEEP LEARNING\slides\WEEK 2 - INTRODUCTION TO DEEP LEARNING.pdf"

try:
    # Process document
    raw = DocProcessor.extract_pdf(pdf_path)
    clean = DocProcessor.clean_text(raw)
    facts = DocProcessor.extract_facts(clean)
    print(f"\nV Extracted {len(facts)} facts from document")

    # Generate BART quiz
    print("\n[1/2] Generating BART quiz...")
    bart_questions = []
    bart_questions.extend(generate_mcq_from_facts(facts, bart_sq, bart_tokenizer, device, 5))
    bart_questions.extend(generate_tf_from_facts(facts, 3))
    export_quiz(bart_questions, 'quiz_bart.txt')

    # Generate T5 quiz
    print("\n[2/2] Generating T5 quiz...")
    t5_questions = []
    t5_questions.extend(generate_mcq_from_facts(facts, t5_sq, t5_tokenizer, device, 5))
    t5_questions.extend(generate_tf_from_facts(facts, 3))
    export_quiz(t5_questions, 'quiz_t5.txt')

    print("\nV Both quizzes generated successfully")

except FileNotFoundError:
    print(f"\nX File not found: {pdf_path}")
    print("Update the pdf_path variable")

In [None]:
"""
Create comparison plots
"""

print("\n" + "="*80)
print("PART 4: COMPREHENSIVE COMPARISON")
print("="*80)

# Prepare data
all_results = {
    'BART-SQuAD': {'losses': bart_sq_losses, 'metrics': bart_sq_metrics},
    'BART-TriviaQA': {'losses': bart_tq_losses, 'metrics': bart_tq_metrics},
    'T5-SQuAD': {'losses': t5_sq_losses, 'metrics': t5_sq_metrics},
    'T5-TriviaQA': {'losses': t5_tq_losses, 'metrics': t5_tq_metrics}
}

# Create comprehensive plot
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
fig.suptitle('BART vs T5: Comprehensive Performance Comparison', fontsize=16, fontweight='bold')

# Plot 1: Training Loss Curves
for name, res in all_results.items():
    axes[0, 0].plot(range(1, len(res['losses'])+1), res['losses'],
                   marker='o', label=name, linewidth=2)
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Loss')
axes[0, 0].set_title('Training Loss Comparison')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Plot 2: Metric Comparison (Avg Length)
names = list(all_results.keys())
lengths = [all_results[n]['metrics']['avg_length'] for n in names]
axes[0, 1].bar(range(len(names)), lengths, alpha=0.7)
axes[0, 1].set_xticks(range(len(names)))
axes[0, 1].set_xticklabels(names, rotation=45, ha='right')
axes[0, 1].set_ylabel('Words')
axes[0, 1].set_title('Average Question Length')
axes[0, 1].grid(True, alpha=0.3, axis='y')

# Plot 3: Unique Ratio
unique_ratios = [all_results[n]['metrics']['unique_ratio'] for n in names]
axes[0, 2].bar(range(len(names)), unique_ratios, alpha=0.7, color='green')
axes[0, 2].set_xticks(range(len(names)))
axes[0, 2].set_xticklabels(names, rotation=45, ha='right')
axes[0, 2].set_ylabel('Ratio')
axes[0, 2].set_title('Question Uniqueness')
axes[0, 2].grid(True, alpha=0.3, axis='y')

# Plot 4: Diversity Score
diversity_scores = [all_results[n]['metrics']['diversity'] for n in names]
axes[1, 0].bar(range(len(names)), diversity_scores, alpha=0.7, color='orange')
axes[1, 0].set_xticks(range(len(names)))
axes[1, 0].set_xticklabels(names, rotation=45, ha='right')
axes[1, 0].set_ylabel('Score')
axes[1, 0].set_title('Question Diversity')
axes[1, 0].grid(True, alpha=0.3, axis='y')

# Plot 5: Dataset Comparison (BART)
bart_metrics = ['avg_length', 'unique_ratio', 'diversity']
bart_sq_vals = [bart_sq_metrics[m] for m in bart_metrics]
bart_tq_vals = [bart_tq_metrics[m] for m in bart_metrics]
x = np.arange(len(bart_metrics))
axes[1, 1].bar(x-0.2, bart_sq_vals, 0.4, label='SQuAD', alpha=0.7)
axes[1, 1].bar(x+0.2, bart_tq_vals, 0.4, label='TriviaQA', alpha=0.7)
axes[1, 1].set_xticks(x)
axes[1, 1].set_xticklabels(['Length', 'Unique', 'Diversity'])
axes[1, 1].set_title('BART: Dataset Comparison')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3, axis='y')

# Plot 6: Dataset Comparison (T5)
t5_sq_vals = [t5_sq_metrics[m] for m in bart_metrics]
t5_tq_vals = [t5_tq_metrics[m] for m in bart_metrics]
axes[1, 2].bar(x-0.2, t5_sq_vals, 0.4, label='SQuAD', alpha=0.7)
axes[1, 2].bar(x+0.2, t5_tq_vals, 0.4, label='TriviaQA', alpha=0.7)
axes[1, 2].set_xticks(x)
axes[1, 2].set_xticklabels(['Length', 'Unique', 'Diversity'])
axes[1, 2].set_title('T5: Dataset Comparison')
axes[1, 2].legend()
axes[1, 2].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig('./results/comprehensive_comparison.png', dpi=300, bbox_inches='tight')
print("\nV Comparison plot saved: ./results/comprehensive_comparison.png")
plt.show()

In [None]:
"""
Print comprehensive summary
"""

print("\n" + "="*80)
print("FINAL SUMMARY REPORT")
print("="*80)


print("\n[1] DATASET COMPARISON")
print(f"{'Model':<15} {'Dataset':<15} {'Avg Len':<10} {'Unique':<10} {'Diversity'}")

for name in names:
    m = all_results[name]['metrics']
    model, dataset = name.split('-')
    print(f"{model:<15} {dataset:<15} {m['avg_length']:<10.2f} "
          f"{m['unique_ratio']:<10.2%} {m['diversity']:.4f}")

print("\n[2] MODEL COMPARISON (Overall Diversity)")

bart_avg = (bart_sq_metrics['diversity'] + bart_tq_metrics['diversity']) / 2
t5_avg = (t5_sq_metrics['diversity'] + t5_tq_metrics['diversity']) / 2

print(f"BART Average Diversity: {bart_avg:.4f}")
print(f"T5 Average Diversity : {t5_avg:.4f}")

better_model = "BART" if bart_avg > t5_avg else "T5"
print(f"\nBetter Overall Model: {better_model}")

print("\n[3] DATASET ROBUSTNESS")

bart_robust = abs(bart_sq_metrics['diversity'] - bart_tq_metrics['diversity'])
t5_robust = abs(t5_sq_metrics['diversity'] - t5_tq_metrics['diversity'])

print(f"BART Diversity Variance: {bart_robust:.4f}")
print(f"T5 Diversity Variance : {t5_robust:.4f}")

more_robust = "BART" if bart_robust < t5_robust else "T5"
print(f"\nMore Robust Model: {more_robust}")

print("\n[4] FINAL CONCLUSION")

if better_model == more_robust:
    print(f"{better_model} demonstrates both higher question diversity "
          f"and stronger robustness across datasets.")
else:
    print(f"{better_model} produces more diverse questions, while "
          f"{more_robust} is more stable across datasets.")

