In [None]:
!pip install evaluate datasets nltk transformers

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3


In [None]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
import os
import torch
import numpy as np
import pandas as pd
import nltk
import evaluate
from datasets import load_dataset, DatasetDict
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
    DataCollatorForSeq2Seq,
)

model_checkpoint = "t5-small"

short_qna_train_path = "/content/drive/MyDrive/Testing/New/short_train.csv"
short_qna_val_path   = "/content/drive/MyDrive/Testing/New/short_val.csv"

base_drive_output_path = "/content/drive/MyDrive/Testing/New/model_outputs"

short_qna_output_dir = os.path.join(base_drive_output_path, "short_qna_finetuned")

# Preprocessing Parameters
max_input_length = 750
max_target_length = 128
input_prefix = "generate question and answer: context: "
output_structure = "question: {} answer: {}"

# Training Parameters
batch_size = 12
learning_rate = 5e-5
num_train_epochs = 6
weight_decay = 0.01
logging_steps = 100
save_steps_ratio = 0.2

try:
    nltk.data.find('tokenizers/punkt')
except nltk.downloader.DownloadError:
    print("Downloading NLTK punkt tokenizer...")
    nltk.download('punkt', quiet=True)

if not os.path.exists(base_drive_output_path):
    os.makedirs(base_drive_output_path)
    print(f"Created base output directory: {base_drive_output_path}")

print(f"Configuration for SHORT Q&A:")
print(f"  Model Checkpoint: {model_checkpoint}")
print(f"  Training CSV: {short_qna_train_path}")
print(f"  Validation CSV: {short_qna_val_path}")
print(f"  Output Directory (for checkpoints & logs): {short_qna_output_dir}")
print(f"  Max Input Length: {max_input_length}")
print(f"  Max Target Length: {max_target_length}")
print(f"  Batch Size: {batch_size}")
print(f"  Epochs: {num_train_epochs}")

Configuration for SHORT Q&A:
  Model Checkpoint: t5-small
  Training CSV: /content/drive/MyDrive/Testing/New/short_train.csv
  Validation CSV: /content/drive/MyDrive/Testing/New/short_val.csv
  Output Directory (for checkpoints & logs): /content/drive/MyDrive/Testing/New/model_outputs/short_qna_finetuned
  Max Input Length: 750
  Max Target Length: 128
  Batch Size: 12
  Epochs: 6


In [None]:
import pandas as pd
from datasets import Dataset, DatasetDict

# Check if files exist before loading
if not os.path.exists(short_qna_train_path):
    raise FileNotFoundError(f"Training file not found: {short_qna_train_path}")
if not os.path.exists(short_qna_val_path):
    raise FileNotFoundError(f"Validation file not found: {short_qna_val_path}")

print("Loading custom short Q&A datasets from CSV using pandas...")

try:
    # Load CSVs using pandas
    train_df_short = pd.read_csv(short_qna_train_path)
    val_df_short = pd.read_csv(short_qna_val_path)

    # Convert pandas DataFrames to datasets.Dataset objects
    train_dataset_short = Dataset.from_pandas(train_df_short)
    val_dataset_short = Dataset.from_pandas(val_df_short)

    # Create a DatasetDict
    raw_datasets = DatasetDict({
        "train": train_dataset_short,
        "validation": val_dataset_short
    })
    print("Successfully loaded datasets using pandas.")

except Exception as e:
    print(f"Error loading CSVs with pandas: {e}")
    print("Please ensure your CSV files are correctly formatted and paths are correct.")
    raise

print("\nDataset structure:")
print(raw_datasets)
print("\nSample training example:")
if len(raw_datasets["train"]) > 0:
    print(raw_datasets["train"][0])
else:
    print("Warning: Short Q&A training dataset is empty.")


required_columns = ['context', 'question', 'answer']
for split in raw_datasets.keys():
    if len(raw_datasets[split]) > 0:
        for col in required_columns:
            if col not in raw_datasets[split].column_names:
                stripped_column_names = [c.strip() for c in raw_datasets[split].column_names]
                if col not in stripped_column_names:
                    raise ValueError(
                        f"Missing required column '{col}' in '{split}' split. "
                        f"Available columns: {raw_datasets[split].column_names}"
                    )
    else:
        print(f"Warning: Short Q&A '{split}' split is empty. Skipping column check.")

Loading custom short Q&A datasets from CSV using pandas...
Successfully loaded datasets using pandas.

Dataset structure:
DatasetDict({
    train: Dataset({
        features: ['context_id', 'question_id', 'context', 'question', 'answer'],
        num_rows: 2394
    })
    validation: Dataset({
        features: ['context_id', 'question_id', 'context', 'question', 'answer'],
        num_rows: 587
    })
})

Sample training example:
{'context_id': 'C001', 'question_id': 'C001_Q1', 'context': 'Dynamic typing checks types at runtime. Functional programming emphasizes pure functions and immutability. Object-oriented programming organizes code into classes and objects. Static typing enforces type rules at compile time. Interpreted languages are executed line by line by an interpreter. Programming languages provide syntax and semantics to write software programs. Memory management can be manual or automatic. High-level languages are easier for humans to read and write.', 'question': 'Describe

In [None]:
print(f"\nLoading tokenizer for {model_checkpoint}...")
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

def preprocess_short_qna(examples):
    """Prepares custom Q&A data for T5 fine-tuning."""
    inputs = []
    targets = []

    contexts = examples.get('context', [])
    questions = examples.get('question', [])
    answers = examples.get('answer', [])

    if not (len(contexts) == len(questions) == len(answers)):
         print(f"Warning: Mismatch in lengths: contexts ({len(contexts)}), questions ({len(questions)}), answers ({len(answers)})")
         min_len = min(len(contexts), len(questions), len(answers))
         contexts, questions, answers = contexts[:min_len], questions[:min_len], answers[:min_len]


    for context, question, answer in zip(contexts, questions, answers):
        if not all(isinstance(item, str) for item in [context, question, answer]):
            print(f"Warning: Skipping record due to non-string data: Context type {type(context)}, Q type {type(question)}, A type {type(answer)}")
            continue # Skip this record

        model_input_text = f"{input_prefix}{context.strip()}"
        inputs.append(model_input_text)

        model_target_text = output_structure.format(question.strip(), answer.strip())
        targets.append(model_target_text)

    # Tokenize Inputs
    model_inputs = tokenizer(inputs,
                             max_length=max_input_length,
                             padding="max_length",
                             truncation=True)

    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets,
                           max_length=max_target_length,
                           padding="max_length",
                           truncation=True)

    label_pad_token_id = -100
    padded_labels = []
    for label_ids in labels["input_ids"]:
         padded_labels.append([
             (l if l != tokenizer.pad_token_id else label_pad_token_id) for l in label_ids
         ])
    model_inputs["labels"] = padded_labels

    return model_inputs

print("\nApplying preprocessing to the datasets...")
tokenized_datasets = raw_datasets.map(
    preprocess_short_qna,
    batched=True,
    remove_columns=raw_datasets["train"].column_names
)
print("Preprocessing finished.")


print("\nSample Processed Input (decoded):")
print(tokenizer.decode(tokenized_datasets['train'][0]['input_ids'], skip_special_tokens=False))
print("\nSample Processed Label (decoded):")
label_ids_short = [id for id in tokenized_datasets['train'][0]['labels'] if id != -100]
print(tokenizer.decode(label_ids_short, skip_special_tokens=False))


Loading tokenizer for t5-small...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]


Applying preprocessing to the datasets...


Map:   0%|          | 0/2394 [00:00<?, ? examples/s]



Map:   0%|          | 0/587 [00:00<?, ? examples/s]

Preprocessing finished.

Sample Processed Input (decoded):
generate question and answer: context: Dynamic typing checks types at runtime. Functional programming emphasizes pure functions and immutability. Object-oriented programming organizes code into classes and objects. Static typing enforces type rules at compile time. Interpreted languages are executed line by line by an interpreter. Programming languages provide syntax and semantics to write software programs. Memory management can be manual or automatic. High-level languages are easier for humans to read and write.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pa

In [None]:
print(f"\nLoading base model '{model_checkpoint}' for Short Q&A training...")
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)
print("Base model loaded.")


Loading base model 't5-small' for Short Q&A training...


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Base model loaded.


In [None]:
print(f"\nSetting up Training Arguments. Output dir: {short_qna_output_dir}")

train_dataset_size = len(tokenized_datasets["train"])
num_gpus = torch.cuda.device_count() if torch.cuda.is_available() else 1
if num_gpus == 0: num_gpus = 1

steps_per_epoch = (train_dataset_size // (batch_size * num_gpus)) +1
save_steps = int(steps_per_epoch * save_steps_ratio)
if save_steps < 10: save_steps = logging_steps

args = Seq2SeqTrainingArguments(
    output_dir=short_qna_output_dir,
    eval_strategy="epoch",
    learning_rate=learning_rate,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size * 2,
    weight_decay=weight_decay,
    save_total_limit=3,
    num_train_epochs=num_train_epochs,
    predict_with_generate=True,
    fp16=torch.cuda.is_available(),
    logging_dir=os.path.join(short_qna_output_dir, "logs"),
    logging_strategy="steps",
    logging_steps=logging_steps,
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    report_to="tensorboard",
    generation_max_length=max_target_length
)

print("Setting up Data Collator...")
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
print("Setup complete.")


Setting up Training Arguments. Output dir: /content/drive/MyDrive/Testing/New/model_outputs/short_qna_finetuned
Setting up Data Collator...
Setup complete.


In [None]:
!pip install rouge_score

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=9ff73934ac4c056de757b1047df6abf23d36cf3133833b1ef5086aae99debbc9
  Stored in directory: /root/.cache/pip/wheels/1e/19/43/8a442dc83660ca25e163e1bd1f89919284ab0d0c1475475148
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


In [None]:
print("\nSetting up evaluation metrics (ROUGE)...")
rouge_metric = evaluate.load("rouge")

def compute_metrics_short_qna(eval_pred): # Renaming to avoid conflict if you define another later
    predictions, labels = eval_pred
    predictions = np.where(predictions == -100, tokenizer.pad_token_id, predictions)
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)

    labels = np.where(labels == -100, tokenizer.pad_token_id, labels)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    decoded_preds_nltk = ["\n".join(nltk.sent_tokenize(pred.strip())) for pred in decoded_preds]
    decoded_labels_nltk = ["\n".join(nltk.sent_tokenize(label.strip())) for label in decoded_labels]

    rouge_result = rouge_metric.compute(predictions=decoded_preds_nltk, references=decoded_labels_nltk, use_stemmer=True)
    rouge_result = {key: value * 100 for key, value in rouge_result.items()}

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    metrics = {**rouge_result, "gen_len": np.mean(prediction_lens)}

    return {k: round(v, 4) for k, v in metrics.items()}

print("Metrics setup complete.")


Setting up evaluation metrics (ROUGE)...


Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

Metrics setup complete.


In [None]:
import nltk
nltk.download('punkt_tab') # Ensure this is available for the metrics function

print("\nSetting up Trainer for Short Q&A task...")
trainer = Seq2SeqTrainer(
    model=model,
    args=args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics_short_qna
)

print("\nStarting training for Short Q&A Generation...")
try:
    train_result = trainer.train()
except Exception as e:
    print(f"An error occurred during training: {e}")
    if torch.cuda.is_available():
        print("Attempting to clear CUDA cache...")
        torch.cuda.empty_cache()
    raise e

print("\nTraining finished.")

# === PATH CORRECTION FOR SAVING BEST MODEL ===
# Save the final model (which is the best if load_best_model_at_end=True)
# to the main output directory for this model.
trainer.save_model() # Saves to args.output_dir (short_qna_output_dir)
print(f"Final Short Q&A model (potentially best) saved to {short_qna_output_dir}")

metrics = train_result.metrics
trainer.log_metrics("train_short", metrics) # Differentiate metrics log
trainer.save_metrics("train_short", metrics) # Differentiate metrics file
trainer.save_state()

# Also explicitly save the best model to a dedicated 'best_model' subfolder
# This makes loading for inference cleaner.
short_qna_best_model_path = os.path.join(short_qna_output_dir, "best_model")
if not os.path.exists(short_qna_best_model_path):
    os.makedirs(short_qna_best_model_path)
trainer.save_model(short_qna_best_model_path)
print(f"Best Short Q&A Model explicitly saved to {short_qna_best_model_path}")
# === PATH CORRECTION END ===

if torch.cuda.is_available():
    del model
    if 'trainer' in globals(): del trainer # trainer might not be defined if training failed early
    torch.cuda.empty_cache()
print("Training process complete for Short Q&A.")

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
  trainer = Seq2SeqTrainer(



Setting up Trainer for Short Q&A task...

Starting training for Short Q&A Generation...


Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


KeyboardInterrupt: 

In [None]:
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import re
import os # Ensure os is imported

# --- Configuration for Short Q&A Inference ---
# === PATH CORRECTION START ===
# short_qna_output_dir should be defined from Cell 3.
# If running this cell independently, redefine short_qna_output_dir or base_drive_output_path:
# base_drive_output_path = "/content/drive/MyDrive/Testing/New/model_outputs"
# short_qna_output_dir = os.path.join(base_drive_output_path, "short_qna_finetuned")

short_qna_model_load_path = os.path.join(short_qna_output_dir, "best_model")
# === PATH CORRECTION END ===

model_checkpoint_inf = "t5-small" # Should match the trained model base
max_input_length_inf = 750
input_prefix_inf = "generate question and answer: context: "
inf_max_output_length = 128
inf_num_beams = 4
inf_early_stopping = True
inf_no_repeat_ngram_size = 2
# ---

if not os.path.exists(short_qna_model_load_path):
    raise FileNotFoundError(f"Fine-tuned Short Q&A model directory not found: {short_qna_model_load_path}. Please ensure training completed and saved the model to this specific path.")

print(f"Loading fine-tuned Short Q&A model and tokenizer from: {short_qna_model_load_path}")
tokenizer_inf_short = AutoTokenizer.from_pretrained(short_qna_model_load_path)
model_inf_short = AutoModelForSeq2SeqLM.from_pretrained(short_qna_model_load_path)

device_inf_short = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Use a distinct device var
model_inf_short.to(device_inf_short)
model_inf_short.eval()
print(f"Short Q&A Inference model moved to device: {device_inf_short}")

def generate_short_qna(context):
    if not context or not isinstance(context, str):
        print("Error: Invalid context provided.")
        return None
    input_text = f"{input_prefix_inf}{context.strip()}"
    inputs = tokenizer_inf_short(input_text,
                                 max_length=max_input_length_inf,
                                 padding=True,
                                 truncation=True,
                                 return_tensors="pt")
    input_ids = inputs.input_ids.to(device_inf_short)
    attention_mask = inputs.attention_mask.to(device_inf_short)

    print(f"\nGenerating short question and answer...")
    with torch.no_grad():
        outputs = model_inf_short.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_length=inf_max_output_length,
            num_beams=inf_num_beams,
            early_stopping=inf_early_stopping,
            no_repeat_ngram_size=inf_no_repeat_ngram_size,
            num_return_sequences=1
        )
    generated_text = tokenizer_inf_short.decode(outputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
    print(f"Raw generated output: {generated_text}")

    match = re.match(r"question:\s*(.*?)\s*answer:\s*(.*)", generated_text, re.IGNORECASE | re.DOTALL)
    if match:
        question = match.group(1).strip()
        answer = match.group(2).strip()
        print("Parsing successful.")
        return {"question": question, "answer": answer}
    else:
        print("Error: Could not parse the generated output.")
        parts = generated_text.lower().split('answer:', 1)
        if len(parts) == 2:
             q_part = parts[0].replace('question:', '').strip()
             a_part = parts[1].strip()
             if q_part and a_part:
                 print("Fallback parsing attempted.")
                 return {"question": q_part, "answer": a_part}
        return None

passage_example = """
The first computer-like device was created in 1822
by Charles Babbage, an English mathematician
and inventor. Babbage's machine, known as the
"Difference Engine," was designed to calculate
mathematical tables automatically. Although the
device was never completed, it laid the foundation for the development of future
computing machines.
"""
print("\n--- Example Short Q&A Inference ---")
print(f"Input Passage:\n{passage_example[:200]}...")
qna_pair = generate_short_qna(passage_example)
if qna_pair:
    print("\nGenerated Short Q&A Pair:")
    print(f"  Q: {qna_pair['question']}")
    print(f"  A: {qna_pair['answer']}")
else:
    print("\nFailed to generate a valid short Q&A pair.")
print("\n--- Short Q&A Inference Cell Complete ---")

# Optional: Clean up
# if torch.cuda.is_available():
#     if 'model_inf_short' in globals(): del model_inf_short
#     if 'tokenizer_inf_short' in globals(): del tokenizer_inf_short
#     torch.cuda.empty_cache()

Loading fine-tuned Short Q&A model and tokenizer from: /content/drive/MyDrive/Testing/New/model_outputs/short_qna_finetuned/best_model
Short Q&A Inference model moved to device: cpu

--- Example Short Q&A Inference ---
Input Passage:

The first computer-like device was created in 1822
by Charles Babbage, an English mathematician
and inventor. Babbage's machine, known as the
"Difference Engine," was designed to calculate
mathematic...

Generating short question and answer...
Raw generated output: question: What is the purpose of the "Difference Engine"? answer: The first computer-like device was created by Charles Babbage, an English mathematician and inventor.
Parsing successful.

Generated Short Q&A Pair:
  Q: What is the purpose of the "Difference Engine"?
  A: The first computer-like device was created by Charles Babbage, an English mathematician and inventor.

--- Short Q&A Inference Cell Complete ---


In [None]:
import nltk
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [None]:
import os # Ensure os is imported if this cell is run standalone
import torch
import numpy as np
import pandas as pd
import nltk
import evaluate
from datasets import load_dataset, DatasetDict
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
    DataCollatorForSeq2Seq,
)

# --- Configuration for LONG Q&A ---\
model_checkpoint_long = "t5-small" # Can be same as short, or different if needed

# Dataset Paths
long_qna_train_path = "/content/drive/MyDrive/Testing/New/long_train.csv"
long_qna_val_path   = "/content/drive/MyDrive/Testing/New/long_val.csv"

# === PATH CORRECTION START ===
# Base output directory on Drive (should be the same as defined in Cell 3 for consistency)
# If Cell 3 was not run in this session, define it here:
if 'base_drive_output_path' not in globals():
    base_drive_output_path = "/content/drive/MyDrive/Testing/New/model_outputs"
    if not os.path.exists(base_drive_output_path):
        os.makedirs(base_drive_output_path)
        print(f"Created base output directory: {base_drive_output_path}")


# Output Directory for the fine-tuned LONG Q&A model
long_qna_output_dir = os.path.join(base_drive_output_path, "long_qna_finetuned")
# === PATH CORRECTION END ===

# Preprocessing Parameters
max_input_length_long = 750
max_target_length_long = 256 # Increased for long answers
input_prefix_long = "generate question and answer: context: "
output_structure_long = "question: {} answer: {}"

# Training Parameters
batch_size_long = 8 # Adjusted from your original code for long Q&A
learning_rate_long = 5e-5
num_train_epochs_long = 6
weight_decay_long = 0.01
logging_steps_long = 100
save_steps_ratio_long = 0.2
# --- End Configuration ---

# NLTK data (punkt likely already downloaded)
try:
    nltk.data.find('tokenizers/punkt')
except nltk.downloader.DownloadError:
    nltk.download('punkt', quiet=True)
try:
    nltk.data.find('tokenizers/punkt_tab') # For metrics function later
except nltk.downloader.DownloadError:
    nltk.download('punkt_tab', quiet=True)


print(f"Configuration for LONG Q&A:")
print(f"  Model Checkpoint: {model_checkpoint_long}")
print(f"  Training CSV: {long_qna_train_path}")
print(f"  Validation CSV: {long_qna_val_path}")
print(f"  Output Directory (for checkpoints & logs): {long_qna_output_dir}") # Updated
print(f"  Max Input Length: {max_input_length_long}")
print(f"  Max Target Length: {max_target_length_long}")
print(f"  Batch Size: {batch_size_long}")
print(f"  Epochs: {num_train_epochs_long}")

Configuration for LONG Q&A:
  Model Checkpoint: t5-small
  Training CSV: /content/drive/MyDrive/Testing/New/long_train.csv
  Validation CSV: /content/drive/MyDrive/Testing/New/long_val.csv
  Output Directory (for checkpoints & logs): /content/drive/MyDrive/Testing/New/model_outputs/long_qna_finetuned
  Max Input Length: 750
  Max Target Length: 256
  Batch Size: 8
  Epochs: 6


In [None]:
import pandas as pd # Make sure pandas is imported
from datasets import Dataset, DatasetDict # Make sure Dataset and DatasetDict are imported

# Check if files exist before loading
if not os.path.exists(long_qna_train_path): # Uses long_qna_train_path from Cell 13
    raise FileNotFoundError(f"Training file not found: {long_qna_train_path}")
if not os.path.exists(long_qna_val_path):   # Uses long_qna_val_path from Cell 13
    raise FileNotFoundError(f"Validation file not found: {long_qna_val_path}")

print("Loading custom LONG Q&A datasets from CSV using pandas...")

try:
    # Load CSVs using pandas
    train_df_long = pd.read_csv(long_qna_train_path)
    val_df_long = pd.read_csv(long_qna_val_path)

    # Convert pandas DataFrames to datasets.Dataset objects
    train_dataset_long = Dataset.from_pandas(train_df_long)
    val_dataset_long = Dataset.from_pandas(val_df_long)

    # Create a DatasetDict, assign to raw_datasets_long
    raw_datasets_long = DatasetDict({ # Use the correct variable name
        "train": train_dataset_long,
        "validation": val_dataset_long
    })
    print("Successfully loaded LONG Q&A datasets using pandas.")

except Exception as e:
    print(f"Error loading LONG Q&A CSVs with pandas: {e}")
    print("Please ensure your CSV files are correctly formatted and paths are correct.")
    raise


# Optional: Inspect the loaded data
print("\nLong Q&A Dataset structure:")
print(raw_datasets_long)
print("\nSample LONG Q&A training example:")
if len(raw_datasets_long["train"]) > 0:
    print(raw_datasets_long["train"][0])
else:
    print("Warning: Long Q&A training dataset is empty.")

# Define required columns (can reuse variable from short Q&A section if in same scope)
required_columns_long = ['context', 'question', 'answer']
# Check if all required columns are present
for split in raw_datasets_long.keys():
    if len(raw_datasets_long[split]) > 0:
      for col in required_columns_long:
          if col not in raw_datasets_long[split].column_names:
              stripped_column_names_long = [c.strip() for c in raw_datasets_long[split].column_names]
              if col not in stripped_column_names_long:
                  raise ValueError(
                      f"Missing required column '{col}' in LONG Q&A '{split}' split. "
                      f"Available columns: {raw_datasets_long[split].column_names}"
                  )
    else:
        print(f"Warning: Long Q&A '{split}' split is empty. Skipping column check.")

Loading custom LONG Q&A datasets from CSV using pandas...
Successfully loaded LONG Q&A datasets using pandas.

Long Q&A Dataset structure:
DatasetDict({
    train: Dataset({
        features: ['context_id', 'question_id', 'context', 'question', 'answer'],
        num_rows: 2413
    })
    validation: Dataset({
        features: ['context_id', 'question_id', 'context', 'question', 'answer'],
        num_rows: 578
    })
})

Sample LONG Q&A training example:
{'context_id': 'C001', 'question_id': 'C001_Q1', 'context': 'A central challenge in Machine Learning is balancing computational efficiency with accuracy.\nKey applications of Machine Learning include real-world problem solving and data analysis.\nCore theoretical concepts in Machine Learning are essential for designing efficient systems.\nMachine Learning often relies on mathematical models and statistical methods for analysis.\nRecent research in Machine Learning has led to significant improvements in performance and scalability.\nG

In [None]:
print(f"\nReusing tokenizer from {model_checkpoint_long} for LONG Q&A (or loading if not present).")
# Ensure tokenizer is available (it should be from short Q&A if run sequentially)
if 'tokenizer' not in globals():
    tokenizer = AutoTokenizer.from_pretrained(model_checkpoint_long)


def preprocess_long_qna(examples):
    inputs = []
    targets = []
    contexts = examples.get('context', [])
    questions = examples.get('question', [])
    answers = examples.get('answer', [])

    if not (len(contexts) == len(questions) == len(answers)):
         print(f"Warning (Long Q&A): Mismatch in lengths.")
         min_len = min(len(contexts), len(questions), len(answers))
         contexts, questions, answers = contexts[:min_len], questions[:min_len], answers[:min_len]

    for context, question, answer in zip(contexts, questions, answers):
        if not all(isinstance(item, str) for item in [context, question, answer]):
            print(f"Warning (Long Q&A): Skipping record due to non-string data.")
            continue
        model_input_text = f"{input_prefix_long}{context.strip()}" # Using long_qna specific prefix
        inputs.append(model_input_text)
        model_target_text = output_structure_long.format(question.strip(), answer.strip()) # Using long_qna specific structure
        targets.append(model_target_text)

    model_inputs = tokenizer(inputs,
                             max_length=max_input_length_long, # Uses long_qna specific length
                             padding="max_length",
                             truncation=True)
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets,
                           max_length=max_target_length_long, # Uses long_qna specific target length
                           padding="max_length",
                           truncation=True)
    label_pad_token_id = -100
    padded_labels = []
    for label_ids in labels["input_ids"]:
         padded_labels.append([
             (l if l != tokenizer.pad_token_id else label_pad_token_id) for l in label_ids
         ])
    model_inputs["labels"] = padded_labels
    return model_inputs

print("\nApplying preprocessing to the LONG Q&A datasets...")
tokenized_datasets_long = raw_datasets_long.map(
    preprocess_long_qna,
    batched=True,
    remove_columns=raw_datasets_long["train"].column_names
)
print("Preprocessing finished for LONG Q&A.")

if len(tokenized_datasets_long['train']) > 0:
    print("\nSample Processed LONG Q&A Input (decoded):")
    print(tokenizer.decode(tokenized_datasets_long['train'][0]['input_ids'], skip_special_tokens=False))
    print("\nSample Processed LONG Q&A Label (decoded):")
    label_ids_long_inspect = [id for id in tokenized_datasets_long['train'][0]['labels'] if id != -100]
    print(tokenizer.decode(label_ids_long_inspect, skip_special_tokens=False))
else:
    print("Tokenized long Q&A training dataset is empty, skipping sample inspection.")


Reusing tokenizer from t5-small for LONG Q&A (or loading if not present).

Applying preprocessing to the LONG Q&A datasets...


Map:   0%|          | 0/2413 [00:00<?, ? examples/s]



Map:   0%|          | 0/578 [00:00<?, ? examples/s]

Preprocessing finished for LONG Q&A.

Sample Processed LONG Q&A Input (decoded):
generate question and answer: context: A central challenge in Machine Learning is balancing computational efficiency with accuracy. Key applications of Machine Learning include real-world problem solving and data analysis. Core theoretical concepts in Machine Learning are essential for designing efficient systems. Machine Learning often relies on mathematical models and statistical methods for analysis. Recent research in Machine Learning has led to significant improvements in performance and scalability. Graduate-level research in Machine Learning explores novel techniques and deep learning. The concept of Machine Learning refers to the fundamental principles and techniques used in this area. Machine Learning is interconnected with other fields, such as data structures and algorithms. Understanding the history and evolution of Machine Learning provides insight into current methodologies.</s><pad><pad><pad

In [None]:
print(f"\nLoading base model '{model_checkpoint_long}' for LONG Q&A training...")
model_long = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint_long)
print("Base model for LONG Q&A loaded.")

# Clean up previous models to free GPU memory if they exist and are different
if 'model' in globals() and model is not model_long:
    print("Deleting previous 'model' (short Q&A base) instance...")
    del model
if 'model_inf_short' in globals() and model_inf_short is not model_long: # Should have been deleted earlier
    print("Deleting previous 'model_inf_short' instance...")
    del model_inf_short
if torch.cuda.is_available():
    torch.cuda.empty_cache()


Loading base model 't5-small' for LONG Q&A training...
Base model for LONG Q&A loaded.
Deleting previous 'model' (short Q&A base) instance...
Deleting previous 'model_inf_short' instance...


In [None]:
print(f"\nSetting up Training Arguments for LONG Q&A. Output dir: {long_qna_output_dir}")

train_dataset_size_long = len(tokenized_datasets_long["train"])
num_gpus_long = torch.cuda.device_count() if torch.cuda.is_available() else 1
if num_gpus_long == 0: num_gpus_long = 1

steps_per_epoch_long = (train_dataset_size_long // (batch_size_long * num_gpus_long)) + 1
save_steps_long = int(steps_per_epoch_long * save_steps_ratio_long) # use _long suffixed var
if save_steps_long < 10: save_steps_long = logging_steps_long # use _long suffixed var

args_long = Seq2SeqTrainingArguments(
    output_dir=long_qna_output_dir, # Corrected output dir
    eval_strategy="epoch",
    learning_rate=learning_rate_long,
    per_device_train_batch_size=batch_size_long,
    per_device_eval_batch_size=batch_size_long * 2,
    weight_decay=weight_decay_long,
    save_total_limit=3,
    num_train_epochs=num_train_epochs_long,
    predict_with_generate=True,
    fp16=torch.cuda.is_available(),
    logging_dir=os.path.join(long_qna_output_dir, "logs_long"), # Differentiated log dir
    logging_strategy="steps",
    logging_steps=logging_steps_long,
    save_strategy="epoch",
    # save_steps=save_steps_long, # save_strategy="epoch" saves per epoch
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    report_to="tensorboard",
    generation_max_length=max_target_length_long # For eval
)

print("Setting up Data Collator for LONG Q&A...")
# Pass model_long to the collator
data_collator_long = DataCollatorForSeq2Seq(tokenizer, model=model_long)
print("Setup complete for LONG Q&A.")


Setting up Training Arguments for LONG Q&A. Output dir: /content/drive/MyDrive/Testing/New/model_outputs/long_qna_finetuned
Setting up Data Collator for LONG Q&A...
Setup complete for LONG Q&A.


In [None]:
# The compute_metrics_short_qna function is already defined and can be reused.
# NLTK punkt_tab should also be downloaded (done in Cell 13 config for long Q&A).

print("\nSetting up Trainer for LONG Q&A task...")
trainer_long = Seq2SeqTrainer(
    model=model_long,
    args=args_long,
    train_dataset=tokenized_datasets_long["train"],
    eval_dataset=tokenized_datasets_long["validation"],
    data_collator=data_collator_long,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics_short_qna # Reusing the metrics function
)
print("Trainer for LONG Q&A setup complete.")


Setting up Trainer for LONG Q&A task...
Trainer for LONG Q&A setup complete.


  trainer_long = Seq2SeqTrainer(


In [None]:
print("\nStarting training for LONG Q&A Generation...")
try:
    train_result_long = trainer_long.train()
except Exception as e:
    print(f"An error occurred during LONG Q&A training: {e}")
    if torch.cuda.is_available():
        print("Attempting to clear CUDA cache for LONG Q&A training...")
        torch.cuda.empty_cache()
    raise e
print("LONG Q&A training finished.")

print("\nTraining for LONG Q&A finished. Saving final model and metrics...")

trainer_long.save_model() # Saves to args_long.output_dir (long_qna_output_dir)
print(f"Final Long Q&A model (potentially best) saved to {long_qna_output_dir}")

metrics_long = train_result_long.metrics
trainer_long.log_metrics("train_long", metrics_long)
trainer_long.save_metrics("train_long", metrics_long)
trainer_long.save_state()

# === PATH CORRECTION FOR SAVING BEST LONG MODEL ===
long_qna_best_model_path = os.path.join(long_qna_output_dir, "best_model")
if not os.path.exists(long_qna_best_model_path):
    os.makedirs(long_qna_best_model_path)
trainer_long.save_model(long_qna_best_model_path)
print(f"Best LONG Q&A Model explicitly saved to {long_qna_best_model_path}")
# === PATH CORRECTION END ===

if torch.cuda.is_available():
    print("Cleaning up LONG Q&A model and trainer from GPU memory...")
    if 'model_long' in globals(): del model_long
    if 'trainer_long' in globals(): del trainer_long
    torch.cuda.empty_cache()
print("Training process complete for LONG Q&A.")


Starting training for LONG Q&A Generation...


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum,Gen Len
1,0.3333,2.034157,55.6049,37.6073,50.2291,54.111,70.0571
2,0.1522,2.139813,55.9949,37.6137,51.0451,54.5744,68.1021
3,0.1241,2.197016,55.4696,37.5778,50.8853,54.2709,68.7336
4,0.1115,2.230448,55.3546,37.7035,51.1549,54.1305,69.6626
5,0.1045,2.25802,55.1701,37.6653,50.9686,54.0603,70.2111
6,0.1018,2.263139,55.0027,37.6119,50.8993,53.8375,69.5242


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight'].


LONG Q&A training finished.

Training for LONG Q&A finished. Saving final model and metrics...
Final Long Q&A model (potentially best) saved to /content/drive/MyDrive/Testing/New/model_outputs/long_qna_finetuned
***** train_long metrics *****
  epoch                    =        6.0
  total_flos               =  2673203GF
  train_loss               =     0.2696
  train_runtime            = 0:17:09.03
  train_samples_per_second =     14.069
  train_steps_per_second   =      1.761
Best LONG Q&A Model explicitly saved to /content/drive/MyDrive/Testing/New/model_outputs/long_qna_finetuned/best_model
Cleaning up LONG Q&A model and trainer from GPU memory...
Training process complete for LONG Q&A.


In [None]:
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import re
import os

long_qna_model_load_path_inf = os.path.join(long_qna_output_dir, "best_model")

model_checkpoint_long_inf = "t5-small"
max_input_length_long_inf = 750
input_prefix_long_inf = "generate question and answer: context: "
inf_max_output_length_long = 256
inf_num_beams_long = 4
inf_early_stopping_long = True
inf_no_repeat_ngram_size_long = 2


if not os.path.exists(long_qna_model_load_path_inf):
    raise FileNotFoundError(f"Fine-tuned LONG Q&A model directory not found: {long_qna_model_load_path_inf}.")

print(f"Loading fine-tuned LONG Q&A model and tokenizer from: {long_qna_model_load_path_inf}")
tokenizer_inf_long = AutoTokenizer.from_pretrained(long_qna_model_load_path_inf)
model_inf_long = AutoModelForSeq2SeqLM.from_pretrained(long_qna_model_load_path_inf)

device_inf_long = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_inf_long.to(device_inf_long)
model_inf_long.eval()
print(f"LONG Q&A Inference model moved to device: {device_inf_long}")

def generate_long_qna(context):
    if not context or not isinstance(context, str):
        print("Error: Invalid context provided.")
        return None
    input_text = f"{input_prefix_long_inf}{context.strip()}"
    inputs = tokenizer_inf_long(input_text,
                                max_length=max_input_length_long_inf,
                                padding=True,
                                truncation=True,
                                return_tensors="pt")
    input_ids = inputs.input_ids.to(device_inf_long)
    attention_mask = inputs.attention_mask.to(device_inf_long)

    print(f"\nGenerating LONG question and answer...")
    with torch.no_grad():
        outputs = model_inf_long.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_length=inf_max_output_length_long,
            num_beams=inf_num_beams_long,
            early_stopping=inf_early_stopping_long,
            no_repeat_ngram_size=inf_no_repeat_ngram_size_long,
            num_return_sequences=1
        )
    generated_text = tokenizer_inf_long.decode(outputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
    print(f"Raw generated LONG output: {generated_text}")

    match = re.match(r"question:\s*(.*?)\s*answer:\s*(.*)", generated_text, re.IGNORECASE | re.DOTALL)
    if match:
        question = match.group(1).strip()
        answer = match.group(2).strip()
        print("Parsing successful for LONG Q&A.")
        return {"question": question, "answer": answer}
    else:
        print("Error: Could not parse the generated LONG output.")
        parts = generated_text.lower().split('answer:', 1)
        if len(parts) == 2:
             q_part = parts[0].replace('question:', '').strip()
             a_part = parts[1].strip()
             if q_part and a_part:
                 print("Fallback parsing attempted for LONG Q&A.")
                 return {"question": q_part, "answer": a_part}
        return None

passage_example_long = """
Intrusion detection plays a crucial role in network security, as it helps ensure proper handling and protection of resources. Packet filtering plays a crucial role in network security, as it helps ensure proper handling and protection of resources. Vpn plays a crucial role in network security, as it helps ensure proper handling and protection of resources. Network segmentation plays a crucial role in network security, as it helps ensure proper handling and protection of resources. Network segmentation plays a crucial role in network security, as it helps ensure proper handling and protection of resources. Vpn plays a crucial role in network security, as it helps ensure proper handling and protection of resources. Network segmentation plays a crucial role in network security, as it helps ensure proper handling and protection of resources. Firewall configuration plays a crucial role in network security, as it helps ensure proper handling and protection of resources. Network segmentation plays a crucial role in network security, as it helps ensure proper handling and protection of resources. Vpn plays a crucial role in network security, as it helps ensure proper handling and protection of resources. Intrusion detection plays a crucial role in network security, as it helps ensure proper handling and protection of resources. Network segmentation plays a crucial role in network security, as it helps ensure proper handling and protection of resources.
"""
print("\n--- Example LONG Q&A Inference ---")
print(f"Input Passage for LONG Q&A:\n{passage_example_long[:250]}...")
long_qna_pair = generate_long_qna(passage_example_long)
if long_qna_pair:
    print("\nGenerated LONG Q&A Pair:")
    print(f"  Q: {long_qna_pair['question']}")
    print(f"  A: {long_qna_pair['answer']}")
else:
    print("\nFailed to generate a valid LONG Q&A pair.")
print("\n--- LONG Q&A Inference Cell Complete ---")


Loading fine-tuned LONG Q&A model and tokenizer from: /content/drive/MyDrive/Testing/New/model_outputs/long_qna_finetuned/best_model
LONG Q&A Inference model moved to device: cpu

--- Example LONG Q&A Inference ---
Input Passage for LONG Q&A:

Intrusion detection plays a crucial role in network security, as it helps ensure proper handling and protection of resources. Packet filtering plays a crucial role in network security, as it helps ensure proper handling and protection of resources. ...

Generating LONG question and answer...
Raw generated LONG output: question: What is packet filtering in network security? answer: Firewall configuration refers to the process used to packet filtrating in system security. It involves multiple steps to ensure accuracy and reliability. In network safety, proper execution of packet filters is essential for maintaining system integrity. Failures in this process can lead to significant security vulnerabilities.
Parsing successful for LONG Q&A.

Generate

In [None]:
!pip install Flask PyPDF2
!pip install flask-ngrok

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1
Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl.metadata (1.8 kB)
Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Installing collected packages: flask-ngrok
Successfully installed flask-ngrok-0.0.25


In [None]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.9-py3-none-any.whl.metadata (9.3 kB)
Downloading pyngrok-7.2.9-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.9


In [None]:
!ngrok config add-authtoken 2xkc0M26NfumNL9ASRxPB1ji1Ak_4J9UXseie1BgEDBUCMUc1

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
import os
import re
import torch
from flask import Flask, render_template, request
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import PyPDF2
from pyngrok import ngrok

app = Flask(__name__)

try:
    _short_qna_finetuned_dir_flask = short_qna_output_dir
    _long_qna_finetuned_dir_flask = long_qna_output_dir
except NameError:
    print("Flask: Notebook output_dir variables not found, using fallback paths.")
    _base_drive_output_path_flask = "/content/drive/MyDrive/Testing/New/model_outputs"
    _short_qna_finetuned_dir_flask = os.path.join(_base_drive_output_path_flask, "short_qna_finetuned")
    _long_qna_finetuned_dir_flask = os.path.join(_base_drive_output_path_flask, "long_qna_finetuned")

SHORT_QNA_MODEL_LOAD_PATH = os.path.join(_short_qna_finetuned_dir_flask, "best_model")
LONG_QNA_MODEL_LOAD_PATH = os.path.join(_long_qna_finetuned_dir_flask, "best_model")

MODEL_CHECKPOINT_INF = "t5-small"

# === MORE AGGRESSIVE DIVERSITY PARAMETERS ===
# Short Q&A Inference Params
MAX_INPUT_LENGTH_SHORT_INF = 750
INPUT_PREFIX_SHORT_INF = "generate question and answer: context: "
INF_MAX_OUTPUT_LENGTH_SHORT = 128
INF_NUM_BEAMS_SHORT = 1                 # <<< SET TO 1 FOR PURE SAMPLING
INF_EARLY_STOPPING_SHORT = False        # <<< Disable early stopping with num_beams=1 if using do_sample=True
                                        # (or keep True, but it has less effect with num_beams=1)
INF_NO_REPEAT_NGRAM_SIZE_SHORT = 3
NUM_RETURN_SEQUENCES_SHORT = 3          # Let's try to get 3
TEMPERATURE_SHORT = 0.9                 # <<< INCREASED TEMPERATURE
TOP_P_SHORT = 0.9                       # Keep top_p
TOP_K_SHORT = 0                         # <<< SET top_k=0 to disable it and rely on top_p & temp

# Long Q&A Inference Params
MAX_INPUT_LENGTH_LONG_INF = 750
INPUT_PREFIX_LONG_INF = "generate question and answer: context: "
INF_MAX_OUTPUT_LENGTH_LONG = 256
INF_NUM_BEAMS_LONG = 1                  # <<< SET TO 1 FOR PURE SAMPLING
INF_EARLY_STOPPING_LONG = False         # <<< Disable
INF_NO_REPEAT_NGRAM_SIZE_LONG = 3
NUM_RETURN_SEQUENCES_LONG = 3           # Let's try to get 3 for long as well
TEMPERATURE_LONG = 0.9                  # <<< INCREASED TEMPERATURE
TOP_P_LONG = 0.9                        # Keep top_p
TOP_K_LONG = 0                          # <<< SET top_k=0 to disable it
# === END OF DIVERSITY PARAMETER CHANGES ===

tokenizer_short_flask = None
model_short_flask = None
tokenizer_long_flask = None
model_long_flask = None
device_flask = None

def load_models_for_flask():
    global tokenizer_short_flask, model_short_flask, tokenizer_long_flask, model_long_flask, device_flask
    print("Flask App: Loading models...")
    device_flask = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Flask App: Using device: {device_flask}")
    if os.path.exists(SHORT_QNA_MODEL_LOAD_PATH):
        print(f"Flask App: Loading Short Q&A model from: {SHORT_QNA_MODEL_LOAD_PATH}")
        try:
            tokenizer_short_flask = AutoTokenizer.from_pretrained(SHORT_QNA_MODEL_LOAD_PATH)
            model_short_flask = AutoModelForSeq2SeqLM.from_pretrained(SHORT_QNA_MODEL_LOAD_PATH)
            model_short_flask.to(device_flask)
            model_short_flask.eval()
            print("Flask App: Short Q&A model loaded.")
        except Exception as e: print(f"Flask App: Error loading Short Q&A model: {e}"); model_short_flask = None
    else: print(f"Flask App Warning: Short Q&A model path not found: {SHORT_QNA_MODEL_LOAD_PATH}")
    if os.path.exists(LONG_QNA_MODEL_LOAD_PATH):
        print(f"Flask App: Loading Long Q&A model from: {LONG_QNA_MODEL_LOAD_PATH}")
        try:
            tokenizer_long_flask = AutoTokenizer.from_pretrained(LONG_QNA_MODEL_LOAD_PATH)
            model_long_flask = AutoModelForSeq2SeqLM.from_pretrained(LONG_QNA_MODEL_LOAD_PATH)
            model_long_flask.to(device_flask)
            model_long_flask.eval()
            print("Flask App: Long Q&A model loaded.")
        except Exception as e: print(f"Flask App: Error loading Long Q&A model: {e}"); model_long_flask = None
    else: print(f"Flask App Warning: Long Q&A model path not found: {LONG_QNA_MODEL_LOAD_PATH}")
    print("Flask App: Model loading attempt complete.")

def parse_generated_qna_text(generated_text):
    match = re.match(r"question:\s*(.*?)\s*answer:\s*(.*)", generated_text, re.IGNORECASE | re.DOTALL)
    if match:
        question, answer = match.group(1).strip(), match.group(2).strip()
        if question and answer: return {"question": question, "answer": answer}
    parts = generated_text.lower().split('answer:', 1)
    if len(parts) == 2:
        q_part, a_part = parts[0].replace('question:', '').strip(), parts[1].strip()
        if q_part and a_part: print("Fallback parsing used."); return {"question": q_part, "answer": a_part}
    return None

def generate_multiple_qna_from_model(context, answer_type="short"):
    qna_list, error_message = [], None
    if answer_type == "short":
        if not model_short_flask or not tokenizer_short_flask: return [], "Short Q&A model not loaded."
        tokenizer_inf, model_inf = tokenizer_short_flask, model_short_flask
        params = {"prefix": INPUT_PREFIX_SHORT_INF, "max_in": MAX_INPUT_LENGTH_SHORT_INF, "max_out": INF_MAX_OUTPUT_LENGTH_SHORT,
                  "beams": INF_NUM_BEAMS_SHORT, "early_stop": INF_EARLY_STOPPING_SHORT, "no_repeat": INF_NO_REPEAT_NGRAM_SIZE_SHORT,
                  "num_seq": NUM_RETURN_SEQUENCES_SHORT, "temp": TEMPERATURE_SHORT, "top_p": TOP_P_SHORT, "top_k": TOP_K_SHORT}
    elif answer_type == "long":
        if not model_long_flask or not tokenizer_long_flask: return [], "Long Q&A model not loaded."
        tokenizer_inf, model_inf = tokenizer_long_flask, model_long_flask
        params = {"prefix": INPUT_PREFIX_LONG_INF, "max_in": MAX_INPUT_LENGTH_LONG_INF, "max_out": INF_MAX_OUTPUT_LENGTH_LONG,
                  "beams": INF_NUM_BEAMS_LONG, "early_stop": INF_EARLY_STOPPING_LONG, "no_repeat": INF_NO_REPEAT_NGRAM_SIZE_LONG,
                  "num_seq": NUM_RETURN_SEQUENCES_LONG, "temp": TEMPERATURE_LONG, "top_p": TOP_P_LONG, "top_k": TOP_K_LONG}
    else: return [], "Invalid answer type specified."

    if not context or not isinstance(context, str): return [], "Invalid context provided."
    input_text = f"{params['prefix']}{context.strip()}"
    try:
        inputs = tokenizer_inf(input_text, max_length=params['max_in'], padding="max_length", truncation=True, return_tensors="pt")
    except Exception as e: return [], f"Error during tokenization: {str(e)}"

    input_ids, attention_mask = inputs.input_ids.to(device_flask), inputs.attention_mask.to(device_flask)
    print(f"Flask App: Generating {params['num_seq']} {answer_type} Q&A pairs with temp={params['temp']}, top_p={params['top_p']}, top_k={params['top_k']}, beams={params['beams']}...")
    try:
        with torch.no_grad():
            generation_args = {
                "input_ids": input_ids, "attention_mask": attention_mask, "max_length": params['max_out'],
                "num_return_sequences": params['num_seq'], "no_repeat_ngram_size": params['no_repeat'],
                "do_sample": True, "temperature": params['temp'], "top_p": params['top_p'], "top_k": params['top_k']
            }
            # Only include num_beams and early_stopping if num_beams > 1
            if params['beams'] > 1:
                generation_args["num_beams"] = params['beams']
                generation_args["early_stopping"] = params['early_stop']

            outputs = model_inf.generate(**generation_args)
    except Exception as e: return [], f"Error during model generation: {str(e)}"

    for i, output_sequence in enumerate(outputs):
        generated_text = tokenizer_inf.decode(output_sequence, skip_special_tokens=True, clean_up_tokenization_spaces=True)
        print(f"Flask App: Raw generated output #{i+1} ({answer_type}): {generated_text}")
        parsed_qna = parse_generated_qna_text(generated_text)
        if parsed_qna: qna_list.append(parsed_qna)
    if not qna_list and params['num_seq'] > 0: error_message = f"Could not parse any valid Q&A pairs from the generated {answer_type} output."

    # Simple de-duplication based on question
    if qna_list:
        unique_qna_list = []
        seen_questions = set()
        for qna_pair in qna_list:
            question_key = qna_pair["question"].lower().strip()
            if question_key not in seen_questions:
                unique_qna_list.append(qna_pair)
                seen_questions.add(question_key)
        if len(unique_qna_list) < len(qna_list):
            print(f"Flask App: De-duplicated Q&A pairs. Original: {len(qna_list)}, Unique: {len(unique_qna_list)}")
        qna_list = unique_qna_list

    return qna_list, error_message

def extract_text_from_txt(file_stream):
    try: return file_stream.read().decode('utf-8')
    except Exception as e: print(f"Error reading txt: {e}"); return None

def extract_text_from_pdf(file_stream):
    try:
        reader = PyPDF2.PdfReader(file_stream)
        text = "".join(page.extract_text() or "" for page in reader.pages if page.extract_text())
        return text.strip() if text else ""
    except Exception as e: print(f"Error reading PDF: {e}"); return None

WORD_LIMIT = 300

@app.route('/', methods=['GET', 'POST'])
def index():
    error, passage_input, answer_type_input, qna_results = None, "", "short", []
    if request.method == 'POST':
        passage_text_form, uploaded_file = request.form.get('passage_text', '').strip(), request.files.get('file')
        answer_type_input = request.form.get('answer_type', 'short')
        context_to_process = ""
        if uploaded_file and uploaded_file.filename != '':
            filename = uploaded_file.filename
            if filename.endswith('.txt'): context_to_process = extract_text_from_txt(uploaded_file.stream)
            elif filename.endswith('.pdf'): context_to_process = extract_text_from_pdf(uploaded_file.stream)
            else: error = "Invalid file type. Upload .txt or .pdf."
            if context_to_process is None and not error: error = "Could not extract text from file."
            elif context_to_process == "" and not error : error = "Extracted text from file is empty." # Check for empty string explicitly
            passage_input = context_to_process[:1000] + "..." if context_to_process and len(context_to_process) > 1000 else (context_to_process or f"File: {filename} (failed/empty)")
        elif passage_text_form: context_to_process = passage_input = passage_text_form
        else: error = "Please enter a passage or upload a file."

        if context_to_process and not error:
            word_count = len(context_to_process.split())
            if word_count == 0 : error = "The provided text is empty after processing." if not error else error
            elif word_count > WORD_LIMIT: error = f"Passage exceeds word limit of {WORD_LIMIT} (found {word_count})."
            else:
                model_ready = (answer_type_input == "short" and model_short_flask) or \
                              (answer_type_input == "long" and model_long_flask)
                if not model_ready: error = f"The model for '{answer_type_input}' answers is not loaded."
                else:
                    generated_qna_list, gen_error = generate_multiple_qna_from_model(context_to_process, answer_type_input)
                    if gen_error: error = gen_error
                    elif generated_qna_list: qna_results = generated_qna_list
                    else: error = f"No valid Q&A pairs generated for '{answer_type_input}' type."
        elif not passage_text_form and not (uploaded_file and uploaded_file.filename != '') and not error: # User clicked submit with no input
            error = "Please enter a passage or upload a file."
        elif not context_to_process and passage_text_form and not error : # Text area had only whitespace
             error = "Please enter a non-empty passage."
             passage_input = passage_text_form

    return render_template('index.html', error=error, passage_input=passage_input,
                           answer_type_input=answer_type_input, qna_results=qna_results, WORD_LIMIT=WORD_LIMIT)

if __name__ == '__main__':
    if not os.path.exists("templates"): os.makedirs("templates")
    html_content = """
<!doctype html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1"><title>Q&A Generator</title><style>body{font-family:sans-serif;margin:20px;background-color:#f4f4f4;color:#333}.container{background-color:#fff;padding:20px;border-radius:8px;box-shadow:0 0 10px rgba(0,0,0,0.1);max-width:800px;margin:auto}h1,h2{color:#333;text-align:center}label{display:block;margin-top:15px;margin-bottom:5px;font-weight:bold}textarea{width:100%;padding:10px;border:1px solid #ddd;border-radius:4px;box-sizing:border-box;min-height:150px}select,input[type="file"]{width:100%;padding:10px;margin-top:5px;border:1px solid #ddd;border-radius:4px;box-sizing:border-box}input[type="submit"]{background-color:#5cb85c;color:white;cursor:pointer;font-size:16px;padding:12px 20px;border:none;margin-top:20px;width:100%}input[type="submit"]:hover{background-color:#4cae4c}.error{color:#d9534f;background-color:#f2dede;border:1px solid #ebccd1;padding:10px;border-radius:4px;margin-top:15px}.qna-results-container{margin-top:30px}.qna-pair{border:1px solid #eee;padding:15px;margin-top:15px;border-radius:4px;background-color:#f9f9f9}.qna-pair p{margin:8px 0;line-height:1.6}.qna-pair strong{color:#0056b3}</style></head><body><div class="container"><h1>Q&A Generator</h1>{% if error %}<p class="error">{{ error }}</p>{% endif %}<form method="POST" enctype="multipart/form-data"><div><label for="passage_text">Enter Passage (max {{ WORD_LIMIT }} words):</label><textarea name="passage_text" id="passage_text" rows="10">{{ passage_input if passage_input is not none else '' }}</textarea></div><div><label for="file">Or Upload File (.txt, .pdf):</label><input type="file" name="file" id="file" accept=".txt,.pdf"></div><div><label for="answer_type">Select Answer Type:</label><select name="answer_type" id="answer_type"><option value="short" {% if answer_type_input == 'short' %}selected{% endif %}>Short Answer</option><option value="long" {% if answer_type_input == 'long' %}selected{% endif %}>Long Answer</option></select></div><input type="submit" value="Generate Q&A"></form>{% if qna_results %}<div class="qna-results-container"><h2>Generated Q&A Pairs:</h2>{% for item in qna_results %}<div class="qna-pair"><p><strong>Question {{ loop.index }}:</strong> {{ item.question }}</p><p><strong>Answer {{ loop.index }}:</strong> {{ item.answer }}</p></div>{% endfor %}</div>{% elif request.method == 'POST' and not error %}<p style="text-align:center; margin-top:20px;">No Q&A pairs were generated. The model might not have found suitable content or failed to parse its output.</p>{% endif %}</div></body></html>
"""
    with open("templates/index.html", "w") as f: f.write(html_content)
    print("Created/Updated templates/index.html")
    load_models_for_flask()
    public_url = ngrok.connect(5000)
    print(f" * ngrok tunnel \"{public_url}\" -> \"http://127.0.0.1:5000\"")
    app.run(port=5000, use_reloader=False)

Created/Updated templates/index.html
Flask App: Loading models...
Flask App: Using device: cpu
Flask App: Loading Short Q&A model from: /content/drive/MyDrive/Testing/New/model_outputs/short_qna_finetuned/best_model
Flask App: Short Q&A model loaded.
Flask App: Loading Long Q&A model from: /content/drive/MyDrive/Testing/New/model_outputs/long_qna_finetuned/best_model
Flask App: Long Q&A model loaded.
Flask App: Model loading attempt complete.
 * ngrok tunnel "NgrokTunnel: "https://7ae5-34-125-30-224.ngrok-free.app" -> "http://localhost:5000"" -> "http://127.0.0.1:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [29/May/2025 08:11:25] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [29/May/2025 08:11:26] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -


Flask App: Generating 3 short Q&A pairs with temp=0.9, top_p=0.9, top_k=0, beams=1...


INFO:werkzeug:127.0.0.1 - - [29/May/2025 08:11:55] "POST / HTTP/1.1" 200 -


Flask App: Raw generated output #1 (short): question: What is object-oriented programming? answer: Object-oriented programs organize code into classes that encapsulate data and behavior as objects.
Flask App: Raw generated output #2 (short): question: Why is static typing important? answer: Static typing enforces type rules during compilation, whereas static typing checks types during execution.
Flask App: Raw generated output #3 (short): question: What is dynamic typing? answer: Static typing enforces type rules during compilation, whereas dynamic typing checks types during execution.
Flask App: Generating 3 short Q&A pairs with temp=0.9, top_p=0.9, top_k=0, beams=1...


INFO:werkzeug:127.0.0.1 - - [29/May/2025 08:13:36] "POST / HTTP/1.1" 200 -


Flask App: Raw generated output #1 (short): question: Why is dynamic typing important? answer: Static typing enforces type rules during compilation and execution, while dynamic typing checks types during execution.
Flask App: Raw generated output #2 (short): question: Why is static typing important? answer: Static typing enforces type rules during compilation, whereas static typing enforced type rules in machine code during execution.
Flask App: Raw generated output #3 (short): question: Why is dynamic typing important? answer: Static typing enforces type rules during compilation, whereas dynamic typing checks types during execution.
Flask App: De-duplicated Q&A pairs. Original: 3, Unique: 2
Flask App: Generating 3 short Q&A pairs with temp=0.9, top_p=0.9, top_k=0, beams=1...


INFO:werkzeug:127.0.0.1 - - [29/May/2025 08:18:05] "POST / HTTP/1.1" 200 -


Flask App: Raw generated output #1 (short): question: What is security monitoring in cloud security? answer: Security monitoring refers to the process used to security monitoring and monitoring in Cloud security.
Flask App: Raw generated output #2 (short): question: What is data encryption in cloud security? answer: Data encryption refers to the process used to data encryption during auditing in cloud cybersecurity.
Flask App: Raw generated output #3 (short): question: What is critical monitoring in cloud security? answer: Critical monitoring refers to the process used to critical monitoring during security auditing in cloud privacy.
Flask App: Generating 3 short Q&A pairs with temp=0.9, top_p=0.9, top_k=0, beams=1...


INFO:werkzeug:127.0.0.1 - - [29/May/2025 08:20:00] "POST / HTTP/1.1" 200 -


Flask App: Raw generated output #1 (short): question: What is chain of custody in digital forensics? answer: Chain of custody refers to the process used to chain of garde in digital documentation.
Flask App: Raw generated output #2 (short): question: What is chain of custody in digital forensics? answer: Chain of custody refers to the process used to chain of garde in digital contradictions.
Flask App: Raw generated output #3 (short): question: What is chain of custody in digital forensics? answer: Chain of custody refers to the process used to chain of garde in digital thematics.
Flask App: De-duplicated Q&A pairs. Original: 3, Unique: 1
Flask App: Generating 3 short Q&A pairs with temp=0.9, top_p=0.9, top_k=0, beams=1...


INFO:werkzeug:127.0.0.1 - - [29/May/2025 08:20:45] "POST / HTTP/1.1" 200 -


Flask App: Raw generated output #1 (short): question: What is chain of custody in digital forensics? answer: Chain of custody refers to the process used to chain of garde in digital digital judicial system.
Flask App: Raw generated output #2 (short): question: What is chain of custody in digital forensics? answer: Chain of custody refers to the process used to chain of court in digital neutrals.
Flask App: Raw generated output #3 (short): question: What is chain of custody in digital forensics? answer: Chain of custody refers to the process used to chain of possession in digital contradictions.
Flask App: De-duplicated Q&A pairs. Original: 3, Unique: 1
Flask App: Generating 3 long Q&A pairs with temp=0.9, top_p=0.9, top_k=0, beams=1...


INFO:werkzeug:127.0.0.1 - - [29/May/2025 08:21:02] "POST / HTTP/1.1" 200 -


Flask App: Raw generated output #1 (long): question: What is chain of custody in digital forensics? answer: Chain of custody refers to the process used to chain of garde in digital digital criterion. It involves multiple steps to ensure accuracy and reliability. In digital cryptics, proper execution of chain of court is essential for maintaining system integrity. Failures in this process can lead to significant security vulnerabilities.
Flask App: Raw generated output #2 (long): question: What is chain of custody in digital forensics? answer: Chain of custody refers to the process used to chain of garde in digital digital. forensic culture: It involves multiple steps to ensure accuracy and reliability. In digital, proper execution of chain of hold is essential for maintaining system integrity. Failures in this process can lead to significant security vulnerabilities.
Flask App: Raw generated output #3 (long): question: What is chain of custody in digital forensics? answer: Chain of cus

INFO:werkzeug:127.0.0.1 - - [29/May/2025 08:23:27] "POST / HTTP/1.1" 200 -


Flask App: Raw generated output #1 (long): question: What is compliance in cloud security? answer: Compliance refers to the process used to compliance in Cloud Security. It involves multiple steps to ensure accuracy and reliability. In cloud security, proper execution of compliance is essential for maintaining system integrity. Failures in this process can lead to significant security vulnerabilities.
Flask App: Raw generated output #2 (long): question: What is compliance in cloud security? answer: Compliance refers to the process used to compliance in Cloud security. It involves multiple steps to ensure accuracy and reliability. In cloud security, proper execution of compliance is essential for maintaining system integrity. Failures in this process can lead to significant security vulnerabilities.
Flask App: Raw generated output #3 (long): question: What is security policy in cloud security? answer: Security policy refers to the process used to security policy for cloud security. It i

INFO:werkzeug:127.0.0.1 - - [29/May/2025 08:24:29] "POST / HTTP/1.1" 200 -


Flask App: Raw generated output #1 (long): question: What is compliance in cloud security? answer: Compliance refers to the process used to compliance in Cloud security. It involves multiple steps to ensure accuracy and reliability. In cloud security, proper execution of compliance is essential for maintaining system integrity. Failures in this process can lead to significant security vulnerabilities.
Flask App: Raw generated output #2 (long): question: What is security policy in cloud security? answer: Security policy refers to the process used to security policy în cloud security. It involves multiple steps to ensure accuracy and reliability. In cloud security, proper execution of security policy is essential for maintaining system integrity. Failures in this process can lead to significant security vulnerabilities.
Flask App: Raw generated output #3 (long): question: What is security policy in cloud security? answer: Security policy refers to the process used to security policy for 

INFO:werkzeug:127.0.0.1 - - [29/May/2025 08:24:45] "POST / HTTP/1.1" 200 -


Flask App: Raw generated output #1 (long): question: What is security policy in cloud security? answer: Security policy refers to the process used to security policy în cloud security. It involves multiple steps to ensure accuracy and reliability. In cloud security, proper execution of security policy is essential for maintaining system integrity. Failures in this process can lead to significant security vulnerabilities.
Flask App: Raw generated output #2 (long): question: What is security policy in cloud security? answer: Security policy refers to the process used to security policy for security management. It involves multiple steps to ensure accuracy and reliability. In cloud security, proper execution of security policy is essential for maintaining system integrity. Failures in this process can lead to significant security vulnerabilities.
Flask App: Raw generated output #3 (long): question: What is compliance in cloud security? answer: Compliance refers to the process used to comp

INFO:werkzeug:127.0.0.1 - - [29/May/2025 08:24:56] "POST / HTTP/1.1" 200 -


Flask App: Raw generated output #1 (short): question: What is identity and access management in cloud security? answer: Identity and access control refers to the process used to identity and credit management in cyber security.
Flask App: Raw generated output #2 (short): question: What is key management in cloud security? answer: Key management refers to the process used to key management for cloud security.
Flask App: Raw generated output #3 (short): question: What is identity and access management in cloud security? answer: Identity and access control refers to the process used to identity and Access management in Cloud security.
Flask App: De-duplicated Q&A pairs. Original: 3, Unique: 2


INFO:werkzeug:127.0.0.1 - - [29/May/2025 08:26:00] "POST / HTTP/1.1" 200 -


Flask App: Generating 3 short Q&A pairs with temp=0.9, top_p=0.9, top_k=0, beams=1...


INFO:werkzeug:127.0.0.1 - - [29/May/2025 09:36:24] "POST / HTTP/1.1" 200 -


Flask App: Raw generated output #1 (short): question: What is data encryption in cloud security? answer: Data encryption refers to the process used to data encryption for data encryption.
Flask App: Raw generated output #2 (short): question: What is identity and access management in cloud security? answer: Identity and access control refers to the process used to identity and identity management in Cloud security.
Flask App: Raw generated output #3 (short): question: What is compliance in cloud security? answer: Compliance refers to the process used to compliance in clouds security.
