<a href="https://colab.research.google.com/github/johnwesley755/ai-shorts/blob/main/ai-video.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch torchvision transformers diffusers opencv-python ffmpeg-python

Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Installing collected packages: ffmpeg-python
Successfully installed ffmpeg-python-0.2.0


In [None]:
import os

os.makedirs("datasets/raw", exist_ok=True)
os.makedirs("datasets/processed", exist_ok=True)
os.makedirs("results", exist_ok=True)


In [None]:
import os

# Create necessary directories
os.makedirs("datasets/raw", exist_ok=True)

# Use FFmpeg to create a placeholder video
!ffmpeg -f lavfi -i testsrc=duration=10:size=1280x720:rate=30 datasets/raw/sample_video.mp4


ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab

In [None]:
import cv2

def extract_frames(video_path, output_dir, fps=10):
    """
    Extract frames from a video at a specified FPS and save them as images.
    """
    os.makedirs(output_dir, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    frame_rate = int(cap.get(cv2.CAP_PROP_FPS) / fps)
    count = 0
    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if count % frame_rate == 0:
            frame_path = os.path.join(output_dir, f"frame_{frame_count:04d}.jpg")
            cv2.imwrite(frame_path, frame)
            frame_count += 1
        count += 1
    cap.release()
    print(f"Extracted {frame_count} frames to {output_dir}")

# Create the output directory for frames
os.makedirs("datasets/processed/frames", exist_ok=True)

# Extract frames from the placeholder video
extract_frames("datasets/raw/sample_video.mp4", "datasets/processed/frames", fps=10)


Extracted 100 frames to datasets/processed/frames


In [None]:
from diffusers import DiffusionPipeline
from PIL import Image
import torch  # Ensure PyTorch is imported

# Load Stable Diffusion pipeline
pipeline = DiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    torch_dtype=torch.float16  # Use torch.float16 instead of "float16"
).to("cuda")

# Generate a single image from text
prompt = "A futuristic cityscape at sunset"
image = pipeline(prompt).images[0]

# Save and display the image
image.save("results/frame_0000.png")
image.show()


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

In [None]:
def generate_frames(prompt, num_frames=10):
    frames = []
    for i in range(num_frames):
        image = pipeline(prompt).images[0]
        frame_path = f"results/frame_{i:04d}.png"
        image.save(frame_path)
        frames.append(frame_path)
    print(f"Generated {num_frames} frames.")
    return frames

# Example usage
frames = generate_frames("A futuristic cityscape at sunset", num_frames=10)


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

Generated 10 frames.


In [None]:
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import torch

# Load the model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Define frames as a list of image file paths
frame_paths = ["datasets/processed/frames/frame_0000.jpg", "datasets/processed/frames/frame_0001.jpg"]  # Update paths accordingly

# Load images using PIL
frames = [Image.open(frame_path) for frame_path in frame_paths]

# Encode text and images
inputs = processor(
    text=["A city skyline at sunset"],  # Text prompt
    images=frames,                     # List of PIL images
    return_tensors="pt",               # PyTorch tensors
    padding=True                       # Add padding for batch processing
)

# Get model outputs
outputs = model(**inputs)

# Access logits for text-to-image matching
print(outputs.logits_per_text)


tensor([[17.9839, 17.8241]], grad_fn=<MulBackward0>)


In [None]:
import gradio as gr
import torch
from diffusers import StableDiffusionPipeline
import os
import cv2
from PIL import Image

# Initialize the Stable Diffusion model
pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16).to("cuda")

# Function to generate frames from a prompt
def generate_frames(prompt, num_frames=30):
    os.makedirs("results/frames", exist_ok=True)

    frames = []
    for i in range(num_frames):
        # Generate an image based on the prompt
        image = pipe(prompt).images[0]

        # Save the image as a frame
        frame_path = f"results/frames/frame_{i:04d}.png"
        image.save(frame_path)
        frames.append(frame_path)

    return frames

# Function to combine frames into a video
def combine_frames_to_video(frame_dir, output_path, fps=10):
    frames = sorted([os.path.join(frame_dir, f) for f in os.listdir(frame_dir) if f.endswith(".png")])
    if not frames:
        raise ValueError("No frames found to combine into a video.")

    # Read the first frame to get the video size
    frame = cv2.imread(frames[0])
    height, width, _ = frame.shape

    # Create a video writer
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")  # You can change the codec if needed
    video_writer = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    # Write each frame into the video
    for frame_path in frames:
        frame = cv2.imread(frame_path)
        video_writer.write(frame)

    video_writer.release()

# Main Gradio function to handle video generation
def generate_video(prompt):
    # Generate frames based on the text prompt
    frames = generate_frames(prompt, num_frames=30)  # Generate 30 frames (3-second video at 10 fps)

    # Combine frames into a video
    combine_frames_to_video("results/frames", "results/generated_video.mp4")

    return "results/generated_video.mp4"

# Gradio interface
gr.Interface(fn=generate_video, inputs="text", outputs="video").launch()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://86c6119a5fe5f17f8e.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
!pip install transformers pillow gradio opencv-python


