In [None]:
import torch
from diffusers import StableDiffusionPipeline, DiffusionPipeline
from PIL import Image
import numpy as np
import cv2
import os
from tqdm import tqdm

class ImageToVideoGenerator:
    def __init__(self, model_id="stabilityai/stable-diffusion-2"):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.pipe = StableDiffusionPipeline.from_pretrained(model_id).to(self.device)
    def load_image(self, image_path):
        image = Image.open(image_path)
        width, height = image.size
        new_width = (width // 8) * 8
        new_height = (height // 8) * 8
        image = image.resize((new_width, new_height))
        return image
    def generate_frames(self, image, prompts, num_frames=3, strength=0.75):
        frames = []
        num_prompts = len(prompts)
        frames_per_transition = num_frames // (num_prompts)
        for i in range(num_prompts - 1):
            print(f"Generating transition {i+1}/{num_prompts-1}")
            start_prompt = prompts[i]
            end_prompt = prompts[i+1]
            for j in tqdm(range(frames_per_transition)):
                alpha = j / frames_per_transition
                current_prompt = f"{start_prompt} {(1-alpha):.2f}, {end_prompt} {alpha:.2f}"
                with torch.no_grad():
                    result = self.pipe(
                        prompt=current_prompt,
                        image=image,
                        strength=strength,
                        guidance_scale=7.5
                    ).images[0]

                frames.append(np.array(result))
                image = result
        return frames

    def save_video(self, frames, output_path, fps=3):
        height, width = frames[0].shape[:2]
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
        for frame in frames:
            frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            out.write(frame_bgr)
        out.release()
        print(f"Video saved to {output_path}")

def main():
    generator = ImageToVideoGenerator()
    image_path = "Watch.jpg"
    image = generator.load_image(image_path)
    prompts = [
        "Shoes steping on ground with water splashes around it in beach",
        "Shoes getting dirty in mud while playing football sports.",
        "A sport person playing basketball wearing this shoes",
    ]
    frames = generator.generate_frames(image, prompts, num_frames=3)
    generator.save_video(frames, "output_video.mp4", fps=3)
if __name__ == "__main__":
    main()

Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

Generating transition 1/2


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:09<00:00,  9.31s/it]


Generating transition 2/2


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:09<00:00,  9.28s/it]

Video saved to output_video.mp4





In [None]:
from moviepy.editor import VideoFileClip, vfx
def slow_down_video(input_path, output_path, slow_factor):
    video = VideoFileClip(input_path)
    slowed_video = video.fx(vfx.speedx, factor=1/slow_factor)
    slowed_video.write_videofile(output_path, codec="libx264")

input_path = "/content/output_video.mp4"
output_path = "output_slowed_video.mp4"
slow_factor = 10
slow_down_video(input_path, output_path, slow_factor)

Moviepy - Building video output_slowed_video.mp4.
Moviepy - Writing video output_slowed_video.mp4





Moviepy - Done !
Moviepy - video ready output_slowed_video.mp4


In [None]:
import cv2
input_video_path = '/content/output_slowed_video.mp4'
output_video_path = 'output_video_with_text.mp4'
cap = cv2.VideoCapture(input_video_path)
fps = int(cap.get(cv2.CAP_PROP_FPS))
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
text_timings = [
    (2, 4, "Walk through Water"),
    (4, 6, "Tear the Ground")
]
frame_texts = [(int(start * fps), int(end * fps), text) for start, end, text in text_timings]

current_frame = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    for (start_frame, end_frame, text) in frame_texts:
        if start_frame <= current_frame <= end_frame:
            font = cv2.FONT_HERSHEY_SIMPLEX
            font_scale = 2
            color = (255, 255, 255)
            thickness = 3
            position = (200, 200)
            cv2.putText(frame, text, position, font, font_scale, color, thickness, cv2.LINE_AA)
    out.write(frame)
    current_frame += 1
cap.release()
out.release()
cv2.destroyAllWindows()