# STEP 1 - Install Dependencies

Installs the necessary Python libraries for running the code.

In [None]:
!pip install transformers datasets torch accelerate



# STEP 2 - Import Libraries

Imports the required modules and classes for model handling, data processing, and training.

In [None]:
import torch
import os
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import load_dataset
from transformers import DataCollatorForLanguageModeling

# STEP 3 - Load or Reuse Model and Tokenizer

Loads the GPT-2 medium model and tokenizer, either from a local directory (if previously trained) or from Hugging Face.

In [None]:
model_name = "gpt2-medium"
local_model_path = "./fine_tuned_story_model"
# Check if fine-tuned model exists locally
if os.path.exists(local_model_path):
    print("Loading fine-tuned model from local directory...")
    tokenizer = AutoTokenizer.from_pretrained(local_model_path)
    model = AutoModelForCausalLM.from_pretrained(local_model_path)
else:
    print("Loading pre-trained model from Hugging Face...")
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    # Set padding token
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = model.config.eos_token_id

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


Loading fine-tuned model from local directory...


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 1024)
    (wpe): Embedding(1024, 1024)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-23): 24 x GPT2Block(
        (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=3072, nx=1024)
          (c_proj): Conv1D(nf=1024, nx=1024)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=4096, nx=1024)
          (c_proj): Conv1D(nf=1024, nx=4096)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=1024, out_features=50257, bias=False)
)

# STEP 4 - Load and Prepare Dataset (Only Needed for Training)

Loads and preprocesses the "TinyStories" dataset for training the model.

In [None]:
def load_and_process_data():
    dataset = load_dataset("roneneldan/TinyStories")
    train_data = dataset['train'].select(range(20000))  # 20k samples

    def preprocess_function(examples):
        formatted_texts = [f"Story Start: {text}" for text in examples['text']]
        return tokenizer(formatted_texts,
                        truncation=True,
                        padding='max_length',
                        max_length=300)

    tokenized_dataset = train_data.map(preprocess_function,
                                     batched=True,
                                     remove_columns=['text'])

    train_size = int(0.9 * len(tokenized_dataset))
    train_dataset = tokenized_dataset.select(range(train_size))
    eval_dataset = tokenized_dataset.select(range(train_size, len(tokenized_dataset)))
    return train_dataset, eval_dataset


# STEP 5 - Training Setup (Run Only if Training is Needed)

Configures and runs the training process for fine-tuning the model on the story dataset.

In [None]:
def train_model(train_dataset, eval_dataset):
    data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

    training_args = TrainingArguments(
        output_dir="./story_generator",
        overwrite_output_dir=True,
        num_train_epochs=3,
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        evaluation_strategy="steps",
        eval_steps=1000,
        save_steps=1000,
        warmup_steps=200,
        learning_rate=3e-5,
        weight_decay=0.01,
        logging_steps=200,
        fp16=True,
        gradient_accumulation_steps=2,
        report_to="none"  # Disable W&B logging
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        data_collator=data_collator,
    )

    trainer.train()
    # Save after training
    model.save_pretrained(local_model_path)
    tokenizer.save_pretrained(local_model_path)
    print("Model saved to", local_model_path)

# STEP 6 - Story Generation Function

In [None]:
def generate_story(prompt, max_length=300, temperature=0.8, top_p=0.9, beams=4):
    input_ids = tokenizer.encode(f"Story Start: {prompt}", return_tensors="pt").to(device)

    output = model.generate(
        input_ids,
        max_length=max_length,
        num_beams=beams,
        temperature=temperature,
        top_p=top_p,
        do_sample=True,
        no_repeat_ngram_size=3,
        early_stopping=True,
        pad_token_id=tokenizer.eos_token_id
    )

    story = tokenizer.decode(output[0], skip_special_tokens=True)
    story = story.replace("Story Start: ", "")
    return story




# STEP 7 - Main Execution


Checks if the model needs to be trained, trains it if necessary and generates a story using a predefined prompt.

In [None]:
if not os.path.exists(local_model_path):
    print("Training model for the first time...")
    train_dataset, eval_dataset = load_and_process_data()
    train_model(train_dataset, eval_dataset)
else:
    print("Using pre-trained local model, skipping training...")

# Prompt user for story starter
print("\nPlease enter the starting sentence of your story:")
user_prompt = input("> ")

# Generate and display the story
story = generate_story(user_prompt)
print("\nGenerated Story:")
print(story)

Using pre-trained local model, skipping training...

Please enter the starting sentence of your story:
> Once upon a time, there was a little dragon who couldn't fly in the sky

Generated Story:
Once upon a time, there was a little dragon who couldn't fly in the sky. He was sad because he couldn't soar like the other dragons. One day, he met a wise old owl who told him that he could soar if he practiced. The dragon listened to the owl and practiced every day.

One day, the dragon met a little bird who was also sad. The bird asked the dragon, "Why can't you soar like me?" The dragon thought for a moment and then said, "Because I don't have wings like you."

The bird felt sorry for the dragon and wanted to help him. So, the bird flew down to the ground and landed on the dragon's back. Together, they practiced flying together and soon the dragon was soaring like the little bird. From that day on, they became the best of friends. And they lived happily ever after. The end. The moral of the