In [None]:

import os
import math
import logging
from transformers import (
    GPT2LMHeadModel,
    GPT2Tokenizer,
    Trainer,
    TrainingArguments,
    set_seed,
    DataCollatorForLanguageModeling,
    LineByLineTextDataset
)

def train_model():
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)

    tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
    special_tokens = {
        "bos_token": "<BOS>",
        "eos_token": "<EOS>",
        "pad_token": "<PAD>",
        "additional_special_tokens": ["<superhero>", "<action>", "<drama>", "<thriller>", "<horror>", "<sci_fi>"]
    }
    tokenizer.add_special_tokens(special_tokens)

    model = GPT2LMHeadModel.from_pretrained("gpt2")
    model.resize_token_embeddings(len(tokenizer))

    train_dataset = LineByLineTextDataset(
        tokenizer=tokenizer,
        file_path=r"C:\Users\Dell\Desktop\Movie Plot Generator\6_genre_clean_training_data.txt",
        block_size=128,
    )

    eval_dataset = LineByLineTextDataset(
        tokenizer=tokenizer,
        file_path=r"C:\Users\Dell\Desktop\Movie Plot Generator\6_genre_eval_data.txt",
        block_size=128,
    )

    data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

    training_args = TrainingArguments(
        output_dir=r"C:\Users\Dell\Desktop\Movie Plot Generator\story_gen_output2",
        overwrite_output_dir=True,
        do_train=True,
        do_eval=True,
        num_train_epochs=3,
        per_device_train_batch_size=2,
        per_device_eval_batch_size=2,
        save_steps=500,
        save_total_limit=1,
        logging_steps=100,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        tokenizer=tokenizer,
        data_collator=data_collator,
    )

    set_seed(42)
    trainer.train()
    trainer.save_model()
    tokenizer.save_pretrained(training_args.output_dir)

train_model()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
  trainer = Trainer(
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mviswasudheer01082005[0m ([33mviswasudheer01082005-visvesvaraya-national-institute-of-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Step,Training Loss
100,3.8692
200,3.7915
300,3.6879
400,3.7662
500,3.6934
600,3.6468
700,3.663
800,3.6428
900,3.5952
1000,3.6407


In [1]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch


def load_model_and_tokenizer(path=r"C:\Users\Dell\Desktop\Movie Plot Generator\story_gen_output2"):
    model = GPT2LMHeadModel.from_pretrained(path, use_safetensors=True)
    tokenizer = GPT2Tokenizer.from_pretrained(path)
    model.eval()
    return model, tokenizer


def generate_story(prompt, model, tokenizer, max_length=300):
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    eos_token_id = tokenizer.eos_token_id

    output = model.generate(
        input_ids,
        max_length=max_length,
        do_sample=True,
        top_k=40,
        top_p=0.92,
        temperature=1.0,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=eos_token_id,
        repetition_penalty=1.2,
        no_repeat_ngram_size=2
    )

    story = tokenizer.decode(output[0], skip_special_tokens=True)
    return story

model, tokenizer = load_model_and_tokenizer()
story = generate_story("<BOS> <action>", model, tokenizer)
print(story)

  from .autonotebook import tqdm as notebook_tqdm


  In the year AD 6, a large white dog is brought to King Philip by an animal trader. The two set out on their journey but are interrupted by mysterious flying creatures which come across them as they were meant for hunting or catching prey. To stop them and save mankind from extinction one of Drayson's most dangerous creatures returns with his henchmen. 


In [None]:
pip install gradio



In [2]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
import gradio as gr

# Load model and tokenizer
model_path = "/content/drive/MyDrive/ML PROJ/story_gen_output2"
model = GPT2LMHeadModel.from_pretrained(model_path)
tokenizer = GPT2Tokenizer.from_pretrained(model_path)
model.eval()


genre_backgrounds = {
    "horror": "/content/drive/MyDrive/ML PROJ/pics/horr.jpg",
    "action": "https://i.ibb.co/vs5Q2vp/action.jpg",
    "sci_fi": "https://i.ibb.co/n1Y8BQX/scifi.jpg",
    "drama": "https://i.ibb.co/X3XmTc4/drama.jpg",
    "superhero": "https://i.ibb.co/98R3jLt/superhero.jpg",
    "thriller": "https://i.ibb.co/t4pZMPf/thriller.jpg"
}


def generate_story(genre, max_tokens=100, temperature=0.9):
    genre_token = f"<{genre.lower()}>"
    input_ids = tokenizer.encode(genre_token, return_tensors="pt")

    output = model.generate(
        input_ids,
        max_new_tokens=max_tokens,
        temperature=temperature,
        top_k=50,
        top_p=0.95,
        do_sample=True,
        pad_token_id=tokenizer.pad_token_id
        # eos_token_id=tokenizer.eos_token_id
    )

    story = tokenizer.decode(output[0], skip_special_tokens=True)

    background_url = genre_backgrounds.get(genre, "")

    styled_output = f"""
    <div style='padding: 20px;
                color: white;
                font-family: "Georgia", serif;
                background: linear-gradient(to right, rgba(0,0,0,0.7), rgba(0,0,0,0.9)),
                            url({background_url});
                background-size: cover;
                background-repeat: no-repeat;
                border-radius: 15px;
                box-shadow: 0 0 15px rgba(0,0,0,0.6);'>
        <h2>{genre.title()} Story</h2>
        <p style='font-size: 18px; line-height: 1.6;'>{story}</p>
    </div>
    """
    return styled_output

# Genre list
genre_list = ["horror", "action", "sci_fi", "drama", "superhero", "thriller"]

# Gradio interface
with gr.Blocks(theme=gr.themes.Soft(primary_hue="rose")) as demo:
    gr.Markdown("""
        <h1 style='text-align: center; font-family: Arial; color: #880E4F;'>🎬 Movie Story Generator</h1>
        <p style='text-align: center;'>Choose a genre and let the Model write a story for you!</p>
    """)

    with gr.Row():
        genre = gr.Dropdown(choices=genre_list, label="Select Genre")
        max_tokens = gr.Slider(minimum=50, maximum=500, step=10, value=100, label="Max Tokens")
        temperature = gr.Slider(minimum=0.5, maximum=1.5, step=0.1, value=0.9, label="Creativity (Temperature)")

    output = gr.HTML(label="Generated Story")

    generate_btn = gr.Button("Generate Story ✨")

    generate_btn.click(fn=generate_story,
                       inputs=[genre, max_tokens, temperature],
                       outputs=output)

demo.launch()


ModuleNotFoundError: No module named 'gradio'