In [1]:
!pip install transformers datasets accelerate -q

In [9]:
%%writefile text_generation.py

from transformers import pipeline

generator = pipeline("text-generation", model="aubmindlab/aragpt2-base")

prompts = [
    "في إحدى ليالي الشتاء الباردة",
    "تحب دانية الذهاب إلى عملها بسبب",
    "كان الأطفال يلعبون في الملعب ثم"
]

for i, prompt in enumerate(prompts, start=1):
    print("=" * 40)
    print(f"Prompt {i}: {prompt}")

    output = generator(
        prompt,
        max_length=40,
        do_sample=False,
        top_k=50,
        top_p=0.95
    )[0]["generated_text"]

    print("Generated Text:")
    print(output)
    print()


Overwriting text_generation.py


In [10]:
!python text_generation.py

2025-12-09 20:11:08.605599: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1765311068.648602   11647 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1765311068.660688   11647 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1765311068.691691   11647 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1765311068.691777   11647 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1765311068.691782   11647 computation_placer.cc:177] computation placer alr

In [20]:
%%writefile summarization_qa.py

from transformers import pipeline

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

qa = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")

with open("article.txt", "r", encoding="utf-8") as f:
    article = f.read()

summary = summarizer(
    article,
    max_length=130,
    min_length=30,
    do_sample=False
)[0]["summary_text"]

print("=== Summary ===")
print(summary)
print("\n")

question = "What is the main idea of the article?"
result = qa(question=question, context=article)

print("=== Question Answering ===")
print("Question:", question)
print("Answer:", result["answer"])


Overwriting summarization_qa.py


In [21]:
!python summarization_qa.py

2025-12-09 21:00:05.522742: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1765314005.585484   23414 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1765314005.604945   23414 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1765314005.661968   23414 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1765314005.662027   23414 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1765314005.662032   23414 computation_placer.cc:177] computation placer alr

In [39]:
%%writefile fine_tuning.py

from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    DataCollatorForLanguageModeling,
    TrainingArguments,
    Trainer,
    pipeline
)

texts = [
    "Dania walked through the quiet streets of Amman, realizing the city held more stories than she ever imagined."
    "Every night, she returned to her small desk, where the glow of her laptop became the only light in the room."
    "The moment Dania opened her old notebook, memories of her first project came rushing back like a forgotten dream."
    "She had always believed that every dataset carried a secret, waiting for someone patient enough to uncover it."
    "One cold evening, a sudden idea struck her—an idea that felt strangely alive, like it had been waiting for her."
    "As the rain tapped softly on her window, Dania typed the first line of code that would change everything."
    "She paused, realizing that she wasn’t just building a model; she was building a version of herself she had never met."
    "Some nights, the silence felt heavy, but Dania learned to find comfort in the rhythm of her thoughts."
    "The old library at the University of Jordan became her second home, where stories of past students whispered through the shelves."
    "Just when she felt lost, a single line in her code finally worked, lighting up her face with quiet triumph."
    "With every experiment, Dania felt the world around her shrink, until only the story she was writing truly existed."
    "She once feared failure, but now she saw it as a character in her story—a character that pushed her forward."
    "In the stillness of the early morning, Dania realized that dreams grow louder when the world is quiet."
    "She knew that the journey would be long, but every chapter begins with a single brave sentence."
    "Dania closed her eyes for a moment, imagining the future she was slowly constructing with each key she pressed."

]

model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id

generator_before = pipeline("text-generation", model=model, tokenizer=tokenizer)

print("=== BEFORE FINE-TUNING ===")
before_output = generator_before(
    "In the University of Jordan,",
    max_new_tokens=50,
    do_sample=True,
    top_k=50,
    top_p=0.95
)[0]["generated_text"]
print(before_output)
print("\n")

tokenized = [
    tokenizer(
        t,
        truncation=True,
        padding="max_length",
        max_length=64
    )
    for t in texts
]

dataset = Dataset.from_dict({
    "input_ids": [item["input_ids"] for item in tokenized],
    "attention_mask": [item["attention_mask"] for item in tokenized],
})

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

training_args = TrainingArguments(
    output_dir="./distilgpt2-finetuned",
    num_train_epochs=3,
    per_device_train_batch_size=2,
    logging_steps=5,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    data_collator=data_collator
)

print("=== TRAINING... ===")
trainer.train()
print("=== TRAINING DONE ===\n")

generator_after = pipeline("text-generation", model=model, tokenizer=tokenizer)

print("=== AFTER FINE-TUNING ===")
after_output = generator_after(
    "In the University of Jordan,",
    max_new_tokens=50,
    do_sample=True,
    top_k=50,
    top_p=0.95
)[0]["generated_text"]
print(after_output)
print()


Overwriting fine_tuning.py


In [40]:
!python fine_tuning.py

2025-12-09 22:42:29.678969: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1765320149.728624   47950 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1765320149.740478   47950 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1765320149.768934   47950 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1765320149.768998   47950 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1765320149.769007   47950 computation_placer.cc:177] computation placer alr