In [None]:
# import torch
# from transformers import T5TokenizerFast, T5ForConditionalGeneration, Trainer, TrainingArguments
# from datasets import load_dataset

# # Load dataset
# train_dir = "/kaggle/input/qa-dataset/dataset/train/train.json"
# test_dir = "/kaggle/input/qa-dataset/dataset/test/test.json"
# dataset = load_dataset("json", data_files={"train": train_dir, "validation": test_dir})

# # Load T5 tokenizer and model
# tokenizer = T5TokenizerFast.from_pretrained("t5-base")
# model = T5ForConditionalGeneration.from_pretrained("t5-base")

# # Define preprocessing function
# def preprocess_data(examples):
#     inputs = [f"question: {q} context: {c}" for q, c in zip(examples["question"], examples["context"])]
#     targets = examples["answers"]

#     model_inputs = tokenizer(inputs, max_length=512, padding="max_length", truncation=True)
#     labels = tokenizer(targets, max_length=128, padding="max_length", truncation=True)

#     model_inputs["labels"] = labels["input_ids"]
#     return model_inputs

# # Apply preprocessing
# tokenized_datasets = dataset.map(
#     preprocess_data,
#     batched=True,
#     remove_columns=["context", "question", "answers"]
# )

# # Move model to GPU if available
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)

# # Training arguments
# training_args = TrainingArguments(
#     output_dir="./t5_qa",
#     eval_strategy="steps",
#     eval_steps=500,
#     save_strategy="steps",
#     save_steps=500,
#     logging_strategy="steps",
#     logging_steps=100,
#     per_device_train_batch_size=2,
#     per_device_eval_batch_size=2,
#     gradient_accumulation_steps=2,
#     num_train_epochs=3,
#     learning_rate=2e-5,
#     weight_decay=0.01,
#     warmup_steps=500,
#     max_grad_norm=1.0,
#     report_to="none",
#     save_total_limit=2,
#     disable_tqdm=True,
#     fp16=True,
#     dataloader_pin_memory=False,
#     optim="adamw_torch",
#     bf16=False
# )

# # Trainer setup
# trainer = Trainer(
#     model=model,
#     args=training_args,
#     train_dataset=tokenized_datasets["train"],
#     eval_dataset=tokenized_datasets["validation"]
# )

# # Train the model
# trainer.train()


In [None]:
import torch
from transformers import BartTokenizerFast, BartForConditionalGeneration, Trainer, TrainingArguments
from datasets import load_dataset

In [None]:
# Load dataset
train_dir = "/kaggle/input/qa-dataset/dataset/train/train.json"
test_dir = "/kaggle/input/qa-dataset/dataset/test/test.json"
dataset = load_dataset("json", data_files={"train": train_dir, "validation": test_dir})

# Load BART tokenizer and model
tokenizer = BartTokenizerFast.from_pretrained("facebook/bart-large")
model = BartForConditionalGeneration.from_pretrained("facebook/bart-large")

In [None]:
# Define preprocessing function
def preprocess_data(examples):
    inputs = tokenizer(
        examples["question"],
        examples["context"],
        truncation=True,
        padding="max_length",
        max_length=512
    )

    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            examples["answers"],
            truncation=True,
            padding="max_length",
            max_length=128
        )

    inputs["labels"] = labels["input_ids"]
    return inputs

# Apply preprocessing
tokenized_datasets = dataset.map(
    preprocess_data,
    batched=True,
    remove_columns=["context", "question", "answers"]
)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training arguments
training_args = TrainingArguments(
    output_dir="./bart_qa",
    eval_strategy="steps",
    eval_steps=500,
    save_strategy="steps",
    save_steps=500,
    logging_strategy="steps",
    logging_steps=100,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=2,
    num_train_epochs=3,
    learning_rate=2e-5,
    weight_decay=0.01,
    warmup_steps=500,
    max_grad_norm=1.0,
    report_to="none",
    save_total_limit=2,
    disable_tqdm=True,
    fp16=True,
    dataloader_pin_memory=False,
    optim="adamw_torch",
    bf16=False
)

In [None]:
# Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"]
)

# Train the model
trainer.train()

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.63k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.02G [00:00<?, ?B/s]

Map:   0%|          | 0/11018 [00:00<?, ? examples/s]



Map:   0%|          | 0/2755 [00:00<?, ? examples/s]



{'loss': 13.0337, 'grad_norm': 4356703.0, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.07259528130671507}
{'loss': 7.0226, 'grad_norm': 5013011.0, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.14519056261343014}
{'loss': 4.6303, 'grad_norm': 5462073.0, 'learning_rate': 1.2e-05, 'epoch': 0.2177858439201452}
{'loss': 1.7432, 'grad_norm': 1898636.375, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.29038112522686027}
{'loss': 0.1498, 'grad_norm': 236059.53125, 'learning_rate': 2e-05, 'epoch': 0.3629764065335753}
{'eval_loss': 0.06640709936618805, 'eval_runtime': 244.4202, 'eval_samples_per_second': 11.272, 'eval_steps_per_second': 2.819, 'epoch': 0.3629764065335753}




{'loss': 0.0672, 'grad_norm': 1908899.625, 'learning_rate': 1.9449187551638668e-05, 'epoch': 0.4355716878402904}
{'loss': 0.0558, 'grad_norm': 696970.75, 'learning_rate': 1.8898375103277335e-05, 'epoch': 0.5081669691470054}
{'loss': 0.049, 'grad_norm': 1245967.125, 'learning_rate': 1.8347562654916e-05, 'epoch': 0.5807622504537205}
{'loss': 0.0464, 'grad_norm': 1197801.875, 'learning_rate': 1.7796750206554668e-05, 'epoch': 0.6533575317604355}
{'loss': 0.1665, 'grad_norm': 190735.03125, 'learning_rate': 1.7245937758193335e-05, 'epoch': 0.7259528130671506}
{'eval_loss': 0.0580323152244091, 'eval_runtime': 244.2298, 'eval_samples_per_second': 11.28, 'eval_steps_per_second': 2.821, 'epoch': 0.7259528130671506}




{'loss': 0.0452, 'grad_norm': 138733.84375, 'learning_rate': 1.6695125309832005e-05, 'epoch': 0.7985480943738656}
{'loss': 0.0483, 'grad_norm': 522915.8125, 'learning_rate': 1.614431286147067e-05, 'epoch': 0.8711433756805808}
{'loss': 0.039, 'grad_norm': 190414.203125, 'learning_rate': 1.5593500413109338e-05, 'epoch': 0.9437386569872959}
{'loss': 0.0362, 'grad_norm': 145277.9375, 'learning_rate': 1.5042687964748005e-05, 'epoch': 1.0159709618874773}
{'loss': 0.0336, 'grad_norm': 166815.390625, 'learning_rate': 1.4491875516386671e-05, 'epoch': 1.0885662431941925}
{'eval_loss': 0.04493004456162453, 'eval_runtime': 244.0015, 'eval_samples_per_second': 11.291, 'eval_steps_per_second': 2.824, 'epoch': 1.0885662431941925}




{'loss': 0.0326, 'grad_norm': 136244.875, 'learning_rate': 1.3941063068025338e-05, 'epoch': 1.1611615245009075}
{'loss': 0.0331, 'grad_norm': 335799.875, 'learning_rate': 1.3390250619664004e-05, 'epoch': 1.2337568058076225}
{'loss': 0.0307, 'grad_norm': 1400752.125, 'learning_rate': 1.2839438171302671e-05, 'epoch': 1.3063520871143375}
{'loss': 0.0307, 'grad_norm': 265265.78125, 'learning_rate': 1.228862572294134e-05, 'epoch': 1.3789473684210527}
{'loss': 0.0309, 'grad_norm': 195410.328125, 'learning_rate': 1.1737813274580006e-05, 'epoch': 1.4515426497277677}
{'eval_loss': 0.0415491946041584, 'eval_runtime': 244.3209, 'eval_samples_per_second': 11.276, 'eval_steps_per_second': 2.82, 'epoch': 1.4515426497277677}




{'loss': 0.0286, 'grad_norm': 128632.7890625, 'learning_rate': 1.1187000826218673e-05, 'epoch': 1.524137931034483}


In [None]:
save_path = "/kaggle/working/bart_qa_finetuned"
trainer.save_model(save_path)
tokenizer.save_pretrained(save_path)

In [None]:
import shutil

shutil.make_archive("/kaggle/working/bart_qa_finetuned", 'zip', "bart_qa_finetuned")

In [1]:
import torch
from transformers import LEDTokenizer, LEDForConditionalGeneration, Trainer, TrainingArguments
from datasets import load_dataset

train_dir = "/kaggle/input/qa-dataset/dataset/train/train.json"
test_dir = "/kaggle/input/qa-dataset/dataset/test/test.json"
dataset = load_dataset("json", data_files={"train": train_dir, "validation": test_dir})

# Use the correct model identifier
tokenizer = LEDTokenizer.from_pretrained("allenai/led-base-16384")
model = LEDForConditionalGeneration.from_pretrained("allenai/led-base-16384")

def preprocess_data(examples):
    inputs = [f"question: {q} context: {c}" for q, c in zip(examples["question"], examples["context"])]
    targets = examples["answers"]
    model_inputs = tokenizer(inputs, max_length=1024, padding="max_length", truncation=True)
    labels = tokenizer(targets, max_length=128, padding="max_length", truncation=True)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_datasets = dataset.map(
    preprocess_data,
    batched=True,
    remove_columns=["context", "question", "answers"]
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

training_args = TrainingArguments(
    output_dir="./led_qa",
    eval_strategy="steps",
    eval_steps=500,
    save_strategy="steps",
    save_steps=500,
    logging_strategy="steps",
    logging_steps=100,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=2,
    num_train_epochs=3,
    learning_rate=2e-5,
    weight_decay=0.01,
    warmup_steps=500,
    max_grad_norm=1.0,
    report_to="none",
    save_total_limit=2,
    disable_tqdm=True,
    fp16=True,
    dataloader_pin_memory=False,
    optim="adamw_torch",
    bf16=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"]
)

trainer.train()


Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/27.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.09k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/648M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

Map:   0%|          | 0/11018 [00:00<?, ? examples/s]

Map:   0%|          | 0/2755 [00:00<?, ? examples/s]



{'loss': 12.3406, 'grad_norm': 7017768.0, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.07259528130671507}
{'loss': 5.0122, 'grad_norm': 6689450.0, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.14519056261343014}
{'loss': 2.2308, 'grad_norm': 4485698.5, 'learning_rate': 1.2e-05, 'epoch': 0.2177858439201452}
{'loss': 0.4132, 'grad_norm': 330081.375, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.29038112522686027}
{'loss': 0.065, 'grad_norm': 79761.6640625, 'learning_rate': 2e-05, 'epoch': 0.3629764065335753}
{'eval_loss': 0.04624195769429207, 'eval_runtime': 236.5215, 'eval_samples_per_second': 11.648, 'eval_steps_per_second': 2.913, 'epoch': 0.3629764065335753}




{'loss': 0.0453, 'grad_norm': 127795.6171875, 'learning_rate': 1.9449187551638668e-05, 'epoch': 0.4355716878402904}
{'loss': 0.044, 'grad_norm': 133011.171875, 'learning_rate': 1.8898375103277335e-05, 'epoch': 0.5081669691470054}
{'loss': 0.0396, 'grad_norm': 96232.8359375, 'learning_rate': 1.8347562654916e-05, 'epoch': 0.5807622504537205}
{'loss': 0.0383, 'grad_norm': 146381.859375, 'learning_rate': 1.7796750206554668e-05, 'epoch': 0.6533575317604355}
{'loss': 0.038, 'grad_norm': 94047.09375, 'learning_rate': 1.7245937758193335e-05, 'epoch': 0.7259528130671506}
{'eval_loss': 0.03343921899795532, 'eval_runtime': 236.998, 'eval_samples_per_second': 11.625, 'eval_steps_per_second': 2.907, 'epoch': 0.7259528130671506}




{'loss': 0.0394, 'grad_norm': 72417.1875, 'learning_rate': 1.6695125309832005e-05, 'epoch': 0.7985480943738656}
{'loss': 0.0341, 'grad_norm': 77503.515625, 'learning_rate': 1.614431286147067e-05, 'epoch': 0.8711433756805808}
{'loss': 0.032, 'grad_norm': 55145.01171875, 'learning_rate': 1.5593500413109338e-05, 'epoch': 0.9437386569872959}
{'loss': 0.0311, 'grad_norm': 108002.703125, 'learning_rate': 1.5042687964748005e-05, 'epoch': 1.0159709618874773}
{'loss': 0.024, 'grad_norm': 54437.828125, 'learning_rate': 1.4491875516386671e-05, 'epoch': 1.0885662431941925}
{'eval_loss': 0.031276024878025055, 'eval_runtime': 236.1363, 'eval_samples_per_second': 11.667, 'eval_steps_per_second': 2.918, 'epoch': 1.0885662431941925}




{'loss': 0.0238, 'grad_norm': 102351.875, 'learning_rate': 1.3941063068025338e-05, 'epoch': 1.1611615245009075}
{'loss': 0.023, 'grad_norm': 66981.3203125, 'learning_rate': 1.3390250619664004e-05, 'epoch': 1.2337568058076225}
{'loss': 0.0222, 'grad_norm': 71675.125, 'learning_rate': 1.2839438171302671e-05, 'epoch': 1.3063520871143375}
{'loss': 0.0249, 'grad_norm': 259258.84375, 'learning_rate': 1.228862572294134e-05, 'epoch': 1.3789473684210527}


KeyboardInterrupt: 

In [None]:
print("hi")

In [None]:
print("hi")