In [1]:
!pip install accelerate
!pip install tokenizers
!pip install transformers
!pip install bitsandbytes
!pip install einops
!pip install xformers
!pip install langchain
!pip install sentence_transformers
!pip install gradio

Collecting accelerate
  Downloading accelerate-0.31.0-py3-none-any.whl (309 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.4/309.4 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.w

In [None]:
!pip install accelerate



In [2]:
from torch import cuda, bfloat16
import transformers
from transformers import AutoModelForCausalLM

model_id = 'mistralai/Mistral-7B-Instruct-v0.2'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# begin initializing HF items, you need an access token
hf_auth = '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'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)

# enable evaluation mode to allow model inference
model.eval()

print(f"Model loaded on {device}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]



model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

Model loaded on cuda:0


In [3]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)



tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

In [4]:
stop_list = ['\nHuman:', '\n```\n']

stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
stop_token_ids

[[1, 28705, 13, 28769, 6366, 28747], [1, 28705, 13, 13940, 28832, 13]]

In [5]:
import torch

stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]
stop_token_ids

[tensor([    1, 28705,    13, 28769,  6366, 28747], device='cuda:0'),
 tensor([    1, 28705,    13, 13940, 28832,    13], device='cuda:0')]

In [6]:
from transformers import StoppingCriteria, StoppingCriteriaList,pipeline

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [7]:
generate_text = pipeline(
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,
    task='text-generation',
    stopping_criteria=stopping_criteria,
    temperature=0.7,  #  for more creative generation
    max_new_tokens=1500,
    # min_new_tokens=1000,
    repetition_penalty=1.2  # reduce repetition
)

In [8]:
!pip install langchain_community


Collecting langchain_community
  Downloading langchain_community-0.2.5-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl (28 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.21.3-py3-none-any.whl (49 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.2/49.2 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)
Installing collected packages: mypy-extensi

In [9]:
import gradio as gr
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline

# Define the LLMs
llm_first = HuggingFacePipeline(pipeline=generate_text)
llm_second = HuggingFacePipeline(pipeline=generate_text)
llm_third = HuggingFacePipeline(pipeline=generate_text)
llm_fourth = HuggingFacePipeline(pipeline=generate_text)

# Define the prompt templates for each LLM
template_first = """
The Lester Dent Pulp Paper Master Fiction Plot Formula is a writing guide created by Lester Dent, a prolific pulp fiction writer best known for his work on the "Doc Savage" series. Dent's formula provides a structured approach to crafting engaging and action-packed stories. Here is a brief overview of the formula:

Introduction:
Introduce the hero and the central problem or conflict.
Set up a situation that hooks the reader's interest immediately.
First Quarter:
The hero tries to solve the problem.
Introduce complications and obstacles that thwart the hero's initial attempts.
Introduce other key characters (both allies and antagonists).
End with a twist or a surprising development.

Characters:
- Hero: {hero}
- Villain: {villain}
- Sidekick: {sidekick}
- Victim: {victim}
- Witness: {witness}

You are a story generator that follows the Lester Dent Pulp Paper formula.
Based on this formula, your task is to generate exactly the first 1500 words of the story in one continuous block of text. The text should be coherent, engaging, and flow like a real story without any interruptions or section headers.
"""

template_second = """
The Lester Dent Pulp Paper Master Fiction Plot Formula continues as follows:

Second Quarter:
Escalate the conflict and introduce additional problems for the hero.
The hero faces greater challenges and setbacks.
Include physical conflicts or action scenes.
Introduce a major plot twist or revelation that complicates the hero's mission.

This is the first part of the story:
{first_part}

Based on this formula, your task is to generate the next 1500 words of the story in one continuous block of text. The text should be coherent, engaging, and flow like a real story without any interruptions or section headers.
"""

template_third = """
The Lester Dent Pulp Paper Master Fiction Plot Formula continues as follows:

Third Quarter:
The hero makes some progress towards solving the problem but faces significant adversity.
Introduce new conflicts and obstacles.
The hero encounters the main villain or a critical turning point in the story.
End with a twist that puts the hero in an even worse situation.

This is the first part of the story:
{first_part}

This is the second part of the story:
{second_part}

Based on this formula, your task is to generate the next 1500 words of the story in one continuous block of text. The text should be coherent, engaging, and flow like a real story without any interruptions or section headers.
"""

template_fourth = """
The Lester Dent Pulp Paper Master Fiction Plot Formula continues as follows:

Final Quarter:
The hero faces the greatest challenges and is pushed to their limits.
All mysteries and plot threads are resolved.
The hero uses their skills, intelligence, and bravery to overcome the final obstacles.
End with a final twist or surprise that concludes the story in a satisfying way.

This is the first part of the story:
{first_part}

This is the second part of the story:
{second_part}

This is the third part of the story:
{third_part}

Based on this formula, your task is to generate the final 1500 words of the story which is the last quarter of the story so you should end the story in this chapter. The generated text should be in one continuous block of text. The text should be coherent, engaging, and flow like a real story without any interruptions or section headers.
"""

# Function to combine all parts into one text
def combine_story_parts(*parts):
    return "\n\n".join(parts)

# Function to generate the first part
def generate_first_part(hero, villain, sidekick, victim, witness):
    prompt_first = PromptTemplate.from_template(template_first)
    first_result = llm_first(prompt_first.format(hero=hero, villain=villain, sidekick=sidekick, victim=victim, witness=witness))
    return first_result

# Function to generate the second part
def generate_second_part(first_part):
    prompt_second = PromptTemplate.from_template(template_second)
    second_result = llm_second(prompt_second.format(first_part=first_part))
    return second_result

# Function to generate the third part
def generate_third_part(first_part, second_part):
    prompt_third = PromptTemplate.from_template(template_third)
    third_result = llm_third(prompt_third.format(first_part=first_part, second_part=second_part))
    return third_result

# Function to generate the final part
def generate_final_part(first_part, second_part, third_part):
    prompt_fourth = PromptTemplate.from_template(template_fourth)
    fourth_result = llm_fourth(prompt_fourth.format(first_part=first_part, second_part=second_part, third_part=third_part))
    return fourth_result

# Function to combine all parts and generate the full story
def generate_full_story(hero, villain, sidekick, victim, witness, first_part, second_part, third_part):
    # Generate the final part
    final_part = generate_final_part(first_part, second_part, third_part)

    # Combine all parts
    full_story = combine_story_parts(first_part, second_part, third_part, final_part)
    return full_story

# Define the Gradio interface
with gr.Blocks() as interface:
    gr.Markdown("# Story Generator")
    gr.Markdown("Generate a complete story based on the Lester Dent Pulp Paper formula.")

    with gr.Row():
        hero_input = gr.Textbox(label="Hero")
        villain_input = gr.Textbox(label="Villain")
        sidekick_input = gr.Textbox(label="Sidekick")
        victim_input = gr.Textbox(label="Victim")
        witness_input = gr.Textbox(label="Witness")
        generate_first = gr.Button("Generate First Part")

    first_part_output = gr.Textbox(label="First Part", lines=10)

    with gr.Row():
        generate_second = gr.Button("Generate Second Part")
        first_part_modified = gr.Textbox(label="Modified First Part", lines=10)

    second_part_output = gr.Textbox(label="Second Part", lines=10)

    with gr.Row():
        generate_third = gr.Button("Generate Third Part")
        second_part_modified = gr.Textbox(label="Modified Second Part", lines=10)

    third_part_output = gr.Textbox(label="Third Part", lines=10)

    with gr.Row():
        generate_final = gr.Button("Generate Final Part")
        third_part_modified = gr.Textbox(label="Modified Third Part", lines=10)

    final_part_output = gr.Textbox(label="Final Part", lines=10)

    generate_full = gr.Button("Generate Full Story")
    full_story_output = gr.Textbox(label="Full Story", lines=20)

    # Set up the interactions
    generate_first.click(
        fn=generate_first_part,
        inputs=[hero_input, villain_input, sidekick_input, victim_input, witness_input],
        outputs=[first_part_output]
    )

    generate_second.click(
        fn=generate_second_part,
        inputs=[first_part_output],
        outputs=[second_part_output]
    )

    generate_third.click(
        fn=generate_third_part,
        inputs=[first_part_modified, second_part_output],
        outputs=[third_part_output]
    )

    generate_final.click(
        fn=generate_final_part,
        inputs=[first_part_modified, second_part_modified, third_part_modified],
        outputs=[final_part_output]
    )

    generate_full.click(
        fn=generate_full_story,
        inputs=[hero_input, villain_input, sidekick_input, victim_input, witness_input, first_part_modified, second_part_modified, third_part_modified],
        outputs=[full_story_output]
    )

# Launch the Gradio interface
interface.launch()


  warn_deprecated(


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://edab8f4c4451c070ef.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [None]:
import gradio as gr
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline

# Define the LLMs
llm_first = HuggingFacePipeline(pipeline=generate_text)
llm_second = HuggingFacePipeline(pipeline=generate_text)
llm_third = HuggingFacePipeline(pipeline=generate_text)
llm_fourth = HuggingFacePipeline(pipeline=generate_text)

# Define the prompt templates for each LLM
template_first = """
The Lester Dent Pulp Paper Master Fiction Plot Formula is a writing guide created by Lester Dent, a prolific pulp fiction writer best known for his work on the "Doc Savage" series. Dent's formula provides a structured approach to crafting engaging and action-packed stories. Here is a brief overview of the formula:

Introduction:
Introduce the hero and the central problem or conflict.
Set up a situation that hooks the reader's interest immediately.
First Quarter:
The hero tries to solve the problem.
Introduce complications and obstacles that thwart the hero's initial attempts.
Introduce other key characters (both allies and antagonists).
End with a twist or a surprising development.

Characters:
- Hero: {hero}
- Villain: {villain}
- Sidekick: {sidekick}
- Victim: {victim}
- Witness: {witness}

You are a story generator that follows the Lester Dent Pulp Paper formula.
Based on this formula, your task is to generate exactly the first 1500 words of the story in one continuous block of text. The text should be coherent, engaging, and flow like a real story without any interruptions or section headers.
"""

template_second = """
The Lester Dent Pulp Paper Master Fiction Plot Formula continues as follows:

Second Quarter:
Escalate the conflict and introduce additional problems for the hero.
The hero faces greater challenges and setbacks.
Include physical conflicts or action scenes.
Introduce a major plot twist or revelation that complicates the hero's mission.

This is the first part of the story:
{first_part}

Based on this formula, your task is to generate the next 1500 words of the story in one continuous block of text. The text should be coherent, engaging, and flow like a real story without any interruptions or section headers.
"""

template_third = """
The Lester Dent Pulp Paper Master Fiction Plot Formula continues as follows:

Third Quarter:
The hero makes some progress towards solving the problem but faces significant adversity.
Introduce new conflicts and obstacles.
The hero encounters the main villain or a critical turning point in the story.
End with a twist that puts the hero in an even worse situation.

This is the first part of the story:
{first_part}

This is the second part of the story:
{second_part}

Based on this formula, your task is to generate the next 1500 words of the story in one continuous block of text. The text should be coherent, engaging, and flow like a real story without any interruptions or section headers.
"""

template_fourth = """
The Lester Dent Pulp Paper Master Fiction Plot Formula continues as follows:

Final Quarter:
The hero faces the greatest challenges and is pushed to their limits.
All mysteries and plot threads are resolved.
The hero uses their skills, intelligence, and bravery to overcome the final obstacles.
End with a final twist or surprise that concludes the story in a satisfying way.

This is the first part of the story:
{first_part}

This is the second part of the story:
{second_part}

This is the third part of the story:
{third_part}

Based on this formula, your task is to generate the final 1500 words of the story which is the last quarter of the story so you should end the story in this chapter. The generated text should be in one continuous block of text. The text should be coherent, engaging, and flow like a real story without any interruptions or section headers.
"""

# Function to combine all parts into one text
def combine_story_parts(*parts):
    return "\n\n".join(parts)

# Function to generate the first part
def generate_first_part(hero, villain, sidekick, victim, witness):
    prompt_first = PromptTemplate.from_template(template_first)
    first_result = llm_first(prompt_first.format(hero=hero, villain=villain, sidekick=sidekick, victim=victim, witness=witness))
    return first_result

# Function to generate the second part
def generate_second_part(first_part):
    prompt_second = PromptTemplate.from_template(template_second)
    second_result = llm_second(prompt_second.format(first_part=first_part))
    return second_result

# Function to generate the third part
def generate_third_part(first_part, second_part):
    prompt_third = PromptTemplate.from_template(template_third)
    third_result = llm_third(prompt_third.format(first_part=first_part, second_part=second_part))
    return third_result

# Function to generate the final part
def generate_final_part(first_part, second_part, third_part):
    prompt_fourth = PromptTemplate.from_template(template_fourth)
    fourth_result = llm_fourth(prompt_fourth.format(first_part=first_part, second_part=second_part, third_part=third_part))
    return fourth_result

# Function to combine all parts and generate the full story
def generate_full_story(hero, villain, sidekick, victim, witness, first_part, second_part, third_part):
    # Generate the final part
    final_part = generate_final_part(first_part, second_part, third_part)

    # Combine all parts
    full_story = combine_story_parts(first_part, second_part, third_part, final_part)
    return full_story

# Define the Gradio interface
with gr.Blocks() as interface:
    gr.Markdown("# Story Generator")
    gr.Markdown("Generate a complete story based on the Lester Dent Pulp Paper formula.")

    with gr.Row():
        hero_input = gr.Textbox(label="Hero")
        villain_input = gr.Textbox(label="Villain")
        sidekick_input = gr.Textbox(label="Sidekick")
        victim_input = gr.Textbox(label="Victim")
        witness_input = gr.Textbox(label="Witness")
        generate_first = gr.Button("Generate First Part")

    first_part_output = gr.Textbox(label="First Part", lines=10)

    with gr.Row():
        generate_second = gr.Button("Generate Second Part")
        first_part_modified = gr.Textbox(label="Modified First Part", lines=10)

    second_part_output = gr.Textbox(label="Second Part", lines=10)

    with gr.Row():
        generate_third = gr.Button("Generate Third Part")
        second_part_modified = gr.Textbox(label="Modified Second Part", lines=10)

    third_part_output = gr.Textbox(label="Third Part", lines=10)

    with gr.Row():
        generate_final = gr.Button("Generate Final Part")
        third_part_modified = gr.Textbox(label="Modified Third Part", lines=10)

    final_part_output = gr.Textbox(label="Final Part", lines=10)

    generate_full = gr.Button("Generate Full Story")
    full_story_output = gr.Textbox(label="Full Story", lines=20)

    # Set up the interactions
    generate_first.click(
        fn=generate_first_part,
        inputs=[hero_input, villain_input, sidekick_input, victim_input, witness_input],
        outputs=[first_part_output]
    )

    generate_second.click(
        fn=generate_second_part,
        inputs=[first_part_output],
        outputs=[second_part_output]
    )

    generate_third.click(
        fn=generate_third_part,
        inputs=[first_part_modified, second_part_output],
        outputs=[third_part_output]
    )

    generate_final.click(
        fn=generate_final_part,
        inputs=[first_part_modified, second_part_modified, third_part_output],
        outputs=[final_part_output]
    )

    generate_full.click(
        fn=generate_full_story,
        inputs=[hero_input, villain_input, sidekick_input, victim_input, witness_input, first_part_modified, second_part_modified, third_part_modified],
        outputs=[full_story_output]
    )

# Launch the Gradio interface
interface.launch()


  warn_deprecated(


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://0390f8dfa451088613.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


