#Assignment 4
Volodymyr Kalinin

#Installing Dependencies
Google Colab may require you to reconnect to the GPU after running this. If it does, please run it again to ensure that everything is installed.

In [None]:
!pip install torch torchvision --index-url https://download.pytorch.org/whl/cu118
!pip install transformers==4.33.2
!pip install diffusers==0.22.1
!pip install accelerate==0.22.0
!pip install safetensors==0.3.3
!pip install sentencepiece==0.1.99
!pip install gradio==3.41.2
!pip install huggingface_hub==0.17.3


Looking in indexes: https://download.pytorch.org/whl/cu118


#Hugging Face Access Token
Please login to hugging Face with a read Access Token for the stable diffusion model to work. In order to get your own access token, go to your account (or create one) on Hugging Face -> settings -> access token and create a read access token paste here

In [None]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

#Importing libraries

In [None]:
import torch
from transformers import pipeline
from diffusers import StableDiffusionPipeline
from accelerate import Accelerator
import safetensors
import sentencepiece
import gradio as gr
from huggingface_hub import hf_hub_download

#GPU check
Should output this:

*table with your GPU stats*

True

*your gpu here*

In [None]:
!nvidia-smi
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

Sun Jan 19 20:14:01 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   77C    P0              34W /  70W |   7027MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

#Creating Pipelines

In [None]:
device = 0 if torch.cuda.is_available() else -1

# Text generator using GPT-2
text_generator = pipeline(
    "text-generation",
    model="gpt2-medium",
    max_length=100,
    num_return_sequences=3,
    temperature=0.7,
    top_k=50,
    top_p=0.95,
    repetition_penalty=1.2,
    device=device
)
# Summarization using BART
summarizer = pipeline(
    "summarization",
    model="facebook/bart-large-cnn",
    device=device
)

# Image creation with stable diffusion
model_id = "runwayml/stable-diffusion-v1-5"
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe = pipe.to("cuda")

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["bos_token_id"]` will be overriden.
`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["eos_token_id"]` will be overriden.


Run this to see if the model uses the GPU (correct responce: cuda:0)

In [None]:
print(text_generator.model.device)

cuda:0


#Helper functions

In [None]:
def generate_story_ideas(prompt):
    outputs = text_generator(prompt, num_return_sequences=3)
    expansions = [out["generated_text"] for out in outputs]
    return expansions

def summarize_for_image(text, max_summary_tokens=60):
    summary_prompt = (
        "Summarize the following story focusing on its main VISUAL elements, "
        f"scenery, and unique descriptors:\n\n{text}"
    )
    summary_output = summarizer(
        summary_prompt,
        max_length=max_summary_tokens,
        min_length=10,
        do_sample=False
    )
    return summary_output[0]["summary_text"]

def generate_image(prompt):
    image = pipe(prompt).images[0]
    return image

#Gradio Interface
The UI may take a minute to load. Even though it seems like the code cell is done, the UI is still loading!

In [None]:
print("Loading UI...")
with gr.Blocks() as demo:
    gr.Markdown("## Visual Storyboarding Assistant")

    gr.Markdown(
        "1. Enter a short story to expand upon.\n"
        "2. Click **Generate Expansions** to see 3 expansions.\n"
        "3. Select your favorite expansion.\n"
        "4. **Summarize** it to focus on visual elements.\n"
        "5. **Generate Image** using Stable Diffusion."
    )

    expansions_state = gr.State([])

    with gr.Row():
        prompt_input = gr.Textbox(
            lines=2,
            label="Starting Prompt",
            placeholder="E.g. 'A fantasy world where ancient trees speak...'"
        )
        story_button = gr.Button("Generate Expansions")

    expansions_output = gr.Radio(
        label="Select an expansion",
        choices=[],
        value=None
    )

    summarize_button = gr.Button("Summarize for Image Generation")
    summary_output = gr.Textbox(
        label="Summarized Prompt",
        lines=2
    )

    generate_image_button = gr.Button("Generate Image")
    image_output = gr.Image()


    def on_story_button_click(user_prompt):
        expansions = generate_story_ideas(user_prompt)
        expansions_state.value = expansions
        return gr.update(choices=expansions, value=expansions[0])

    story_button.click(
        fn=on_story_button_click,
        inputs=prompt_input,
        outputs=expansions_output
    )

    def on_summarize_button_click(selected_snippet):
        if not selected_snippet:
            return "No expansion selected. Please select one."
        summary_text = summarize_for_image(selected_snippet)
        return summary_text

    summarize_button.click(
        fn=on_summarize_button_click,
        inputs=expansions_output,
        outputs=summary_output
    )

    def on_image_button_click(final_prompt):
        if not final_prompt.strip():
            return None
        return generate_image(final_prompt)

    generate_image_button.click(
        fn=on_image_button_click,
        inputs=summary_output,
        outputs=image_output
    )

demo.launch()

Loading UI...
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>

