<a href="https://colab.research.google.com/github/grayfruit/sdxs/blob/main/artv2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
from audiocraft.models import MusicGen
import numpy as np
import cv2
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeVideoClip, concatenate_videoclips, ImageClip
import os
import random
from scipy.io import wavfile
import subprocess



# Ustawienia urządzenia
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
dtype = torch.float16 if device == "cuda" else torch.float32

# Ładowanie modeli
pipe = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_576w", torch_dtype=dtype)
pipe.to(device)

pipe_xl = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_XL", torch_dtype=dtype)
pipe_xl.to(device)

def generate_prompt():
    artistic_concepts = [
        "abstract expressionism", "surrealism", "impressionism", "pop art", "minimalism",
        "cubism", "art nouveau", "futurism", "dadaism", "op art", "street art", "realism",
        "hyper-realism", "photorealism"
    ]

    visual_elements = [
        "vibrant colors", "geometric shapes", "dot", "contrasting textures",
        "dynamic movement", "dramatic lighting", "soup", "organic forms",
        "bold silhouettes", "ethereal atmosphere", "rich symbolism", "vivid details",
        "stark shadows", "soft gradients"
    ]

    subjects = [
        "portrait", "city", "people",
        "human face", "friends", "family",
        "street", "athlete", "workers",
        "children", "elderly", "fashion model",
        "musician", "artist", "animal",
        "space", "galaxy", "universe"
    ]

    human_actions = [
        "smiling", "laughing", "running", "walking", "talking",
        "gesturing", "working", "playing", "dancing", "singing",
        "thinking", "expressing emotion", "embracing", "celebrating",
        "contemplating", "creating"
    ]

    concept = random.choice(artistic_concepts)
    element = random.choice(visual_elements)
    subject = random.choice(subjects)
    action = random.choice(human_actions)
    additional_hint = "photorealistic, high detail, 4k resolution, without watermarks or stock photo elements"
    prompt = f"A {concept} inspired scene of {subject} {action} with {element}. {additional_hint}"

    return prompt, "realistic"



def generate_video(pipe, prompt, num_frames=24, height=320, width=576, fps=8):
    seed = random.randint(0, 2**32 - 1)
    generator = torch.Generator(device).manual_seed(seed)

    print(f"Używany seed: {seed}")

    frames = pipe(
        prompt,
        num_inference_steps=40,
        num_frames=num_frames,
        height=height,
        width=width,
        generator=generator,
    ).frames[0]

    return frames, seed

def upscale_video(pipe_xl, prompt, video):
    upscaled_video = pipe_xl(prompt, video=video, strength=0.6, height=576, width=1024).frames
    return upscaled_video

def generate_and_save_clip_with_audio(pipe, pipe_xl, prompt, index, height=320, width=576, fps=24):
    print(f"Generowanie klipu {index + 1}: {prompt}")

    duration = 3  # Stała długość 3 sekundy
    num_frames = 24  # Stała liczba klatek

    try:
        video, seed = generate_video(pipe, prompt, num_frames=num_frames, height=height, width=width, fps=fps)
        print(f"Klip {index + 1} wygenerowany z seedem: {seed}")

        upscaled_video = upscale_video(pipe_xl, prompt, video)
        print(f"Klip {index + 1} upskalowany")
    except Exception as e:
        print(f"Błąd podczas generowania lub upskalowania wideo: {e}")
        return None

    video_filename = f"clip_{index}.mp4"
    save_video(upscaled_video, video_filename, fps)

    audio = generate_music(prompt, duration=duration)
    audio_filename = f"audio_{index}.wav"
    wavfile.write(audio_filename, 32000, audio)

    video_clip = VideoFileClip(video_filename)
    audio_clip = AudioFileClip(audio_filename)

    final_clip = video_clip.set_audio(audio_clip)

    output_filename = f"clip_with_audio_{index}.mp4"
    final_clip.write_videofile(output_filename, codec="libx264", audio_codec="aac", fps=fps)

    print(f"Zapisano klip z dźwiękiem jako {output_filename}")
    return output_filename

def add_watermark(video_path, logo_path, output_path):
    video = VideoFileClip(video_path)
    logo = (ImageClip(logo_path)
            .set_duration(video.duration)
            .margin(right=10, top=10, opacity=0)
            .set_pos(("right", "top")))
    final = CompositeVideoClip([video, logo])
    final.write_videofile(output_path, codec="libx264", audio_codec="aac")
    print("Logo dodany do wideo.")

def stream_to_youtube(input_file, youtube_url):
    command = [
        'ffmpeg',
        '-re',
        '-i', input_file,
        '-c:v', 'libx264',
        '-preset', 'veryfast',
        '-maxrate', '3000k',
        '-bufsize', '6000k',
        '-pix_fmt', 'yuv420p',
        '-g', '50',
        '-c:a', 'aac',
        '-b:a', '128k',
        '-ar', '44100',
        '-f', 'flv',
        youtube_url
    ]
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout, stderr = process.communicate()
    if process.returncode != 0:
        print(f"Błąd FFmpeg: {stderr.decode()}")
    else:
        print("Strumieniowanie zakończone sukcesem.")

# Generowanie klipów
num_clips = 3
video_files = []
for i in range(num_clips):
    prompt, video_type = generate_prompt()
    print(f"{i+1}. {prompt} (Type: {video_type})")
    video_file = generate_and_save_clip_with_audio(pipe, pipe_xl, prompt, i)
    if video_file:
        video_files.append(video_file)
    torch.cuda.empty_cache()

# Łączenie klipów w jedno wideo
if video_files:
    clips = [VideoFileClip(file).crossfadein(0.05) for file in video_files]
    final_video = concatenate_videoclips(clips, method="compose")
    final_video.write_videofile("final_artv.mp4", codec="libx264", audio_codec="aac")

# Dodawanie logo
add_watermark("final_artv.mp4", "/content/logoARTV.png", "final_artv_with_logo.mp4")

# Strumieniowanie do YouTube
youtube_url = "rtmp://a.rtmp.youtube.com/live2/r987-0ze7-t3j9-arbg-4z75"
stream_to_youtube("final_artv_with_logo.mp4", youtube_url)

print("Proces zakończony. Sprawdź czy strumieniowanie do YouTube działa poprawnie.")

Using device: cuda


unet/diffusion_pytorch_model.safetensors not found


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

An error occurred while trying to fetch /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_576w/snapshots/6963642a64dbefa93663d1ecebb4ceda2d9ecb28/vae: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_576w/snapshots/6963642a64dbefa93663d1ecebb4ceda2d9ecb28/vae.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
An error occurred while trying to fetch /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_576w/snapshots/6963642a64dbefa93663d1ecebb4ceda2d9ecb28/unet: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_576w/snapshots/6963642a64dbefa93663d1ecebb4ceda2d9ecb28/unet.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
unet/diffusion_pytorch_model.safetensors not found


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

An error occurred while trying to fetch /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_XL/snapshots/6934ce594c2a8b39eec8bfece9d630a7ec93f642/vae: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_XL/snapshots/6934ce594c2a8b39eec8bfece9d630a7ec93f642/vae.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
An error occurred while trying to fetch /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_XL/snapshots/6934ce594c2a8b39eec8bfece9d630a7ec93f642/unet: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_XL/snapshots/6934ce594c2a8b39eec8bfece9d630a7ec93f642/unet.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.


1. A street art inspired scene of portrait embracing with organic forms. photorealistic, high detail, 4k resolution, without watermarks or stock photo elements (Type: realistic)
Generowanie klipu 1: A street art inspired scene of portrait embracing with organic forms. photorealistic, high detail, 4k resolution, without watermarks or stock photo elements
Używany seed: 171799161


  0%|          | 0/40 [00:00<?, ?it/s]

Klip 1 wygenerowany z seedem: 171799161
Błąd podczas generowania lub upskalowania wideo: VideoToVideoSDPipeline.__call__() got an unexpected keyword argument 'height'
2. A photorealism inspired scene of children singing with stark shadows. photorealistic, high detail, 4k resolution, without watermarks or stock photo elements (Type: realistic)
Generowanie klipu 2: A photorealism inspired scene of children singing with stark shadows. photorealistic, high detail, 4k resolution, without watermarks or stock photo elements
Używany seed: 1834024427


  0%|          | 0/40 [00:00<?, ?it/s]

Klip 2 wygenerowany z seedem: 1834024427
Błąd podczas generowania lub upskalowania wideo: VideoToVideoSDPipeline.__call__() got an unexpected keyword argument 'height'
3. A futurism inspired scene of fashion model playing with geometric shapes. photorealistic, high detail, 4k resolution, without watermarks or stock photo elements (Type: realistic)
Generowanie klipu 3: A futurism inspired scene of fashion model playing with geometric shapes. photorealistic, high detail, 4k resolution, without watermarks or stock photo elements
Używany seed: 2196337982


  0%|          | 0/40 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
import torch
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
from audiocraft.models import MusicGen
import numpy as np
import cv2
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeVideoClip, concatenate_videoclips, ImageClip
import os
import random
from scipy.io import wavfile
import subprocess



# Ustawienia urządzenia
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = pipe.to(device)
pipe_xl = pipe_xl.to(device)
print(f"Using device: {device}")


#dtype = torch.float32
# Ładowanie modeli
pipe = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_576w", torch_dtype=torch.float32, use_safetensors=False)
pipe_xl = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_XL", torch_dtype=torch.float32, use_safetensors=False)

def generate_prompt():
    artistic_concepts = [
        "abstract expressionism", "surrealism", "impressionism", "pop art", "minimalism",
        "cubism", "art nouveau", "futurism", "dadaism", "op art", "street art", "realism",
        "hyper-realism", "photorealism"
    ]

    visual_elements = [
        "vibrant colors", "geometric shapes", "dot", "contrasting textures",
        "dynamic movement", "dramatic lighting", "soup", "organic forms",
        "bold silhouettes", "ethereal atmosphere", "rich symbolism", "vivid details",
        "stark shadows", "soft gradients"
    ]

    subjects = [
        "portrait", "city", "people",
        "human face", "friends", "family",
        "street", "athlete", "workers",
        "children", "elderly", "fashion model",
        "musician", "artist", "animal",
        "space", "galaxy", "universe"
    ]

    human_actions = [
        "smiling", "laughing", "running", "walking", "talking",
        "gesturing", "working", "playing", "dancing", "singing",
        "thinking", "expressing emotion", "embracing", "celebrating",
        "contemplating", "creating"
    ]

    concept = random.choice(artistic_concepts)
    element = random.choice(visual_elements)
    subject = random.choice(subjects)
    action = random.choice(human_actions)
    additional_hint = "photorealistic, high detail, 4k resolution, without watermarks or stock photo elements"
    prompt = f"A {concept} inspired scene of {subject} {action} with {element}. {additional_hint}"

    return prompt, "realistic"



from torch.cuda.amp import autocast

def generate_video(pipe, prompt, num_frames=24, height=160, width=288, fps=8):
    with autocast():
        seed = random.randint(0, 2**32 - 1)
        generator = torch.Generator(device=pipe.device).manual_seed(seed)
        print(f"Używany seed: {seed}")
        frames = pipe(
            prompt,
            num_inference_steps=40,
            num_frames=num_frames,
            height=height,
            width=width,
            generator=generator,
        ).frames[0]
    return frames, seed

def upscale_video(pipe_xl, prompt, video):
    upscaled_video = pipe_xl(prompt, video=video, strength=0.6).frames
    return upscaled_video

    import cv2
import numpy as np

def save_video(video, filename, fps=24):
    """
    Zapisuje tablicę klatek jako plik wideo.

    :param video: Tablica klatek wideo (numpy array) o kształcie (num_frames, height, width, channels)
    :param filename: Nazwa pliku wyjściowego
    :param fps: Liczba klatek na sekundę
    """
    if len(video.shape) == 5:
        video = video[0]

    # Upewnij się, że wartości pikseli są w zakresie 0-255 i mają typ uint8
    video = (video * 255).astype(np.uint8)

    height, width = video.shape[1:3]
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(filename, fourcc, fps, (width, height))

    for frame in video:
        frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        out.write(frame_bgr)

    out.release()
    print(f"Wideo zapisane jako {filename}")

def generate_and_save_clip_with_audio(pipe, pipe_xl, prompt, index, height=320, width=576, fps=24):
    print(f"Generowanie klipu {index + 1}: {prompt}")

    duration = 3  # Stała długość 3 sekundy
    num_frames = 24  # Stała liczba klatek

    try:
        video, seed = generate_video(pipe, prompt, num_frames=num_frames, height=height, width=width, fps=fps)
        print(f"Klip {index + 1} wygenerowany z seedem: {seed}")

        upscaled_video = upscale_video(pipe_xl, prompt, video)
        print(f"Klip {index + 1} upskalowany")
    except Exception as e:
        print(f"Błąd podczas generowania lub upskalowania wideo: {e}")
        return None

    video_filename = f"clip_{index}.mp4"
    save_video(upscaled_video, video_filename, fps)

def add_watermark(video_path, logo_path, output_path):
    video = VideoFileClip(video_path)
    logo = (ImageClip(logo_path)
            .set_duration(video.duration)
            .margin(right=10, top=10, opacity=0)
            .set_pos(("right", "top")))
    final = CompositeVideoClip([video, logo])
    final.write_videofile(output_path, codec="libx264", audio_codec="aac")
    print("Logo dodany do wideo.")

def stream_to_youtube(input_file, youtube_url):
    command = [
        'ffmpeg',
        '-re',
        '-i', input_file,
        '-c:v', 'libx264',
        '-preset', 'veryfast',
        '-maxrate', '3000k',
        '-bufsize', '6000k',
        '-pix_fmt', 'yuv420p',
        '-g', '50',
        '-c:a', 'aac',
        '-b:a', '128k',
        '-ar', '44100',
        '-f', 'flv',
        youtube_url
    ]
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout, stderr = process.communicate()
    if process.returncode != 0:
        print(f"Błąd FFmpeg: {stderr.decode()}")
    else:
        print("Strumieniowanie zakończone sukcesem.")

# Generowanie klipów
num_clips = 3
video_files = []
for i in range(num_clips):
    prompt, video_type = generate_prompt()
    print(f"{i+1}. {prompt} (Type: {video_type})")
    video_file = generate_and_save_clip_with_audio(pipe, pipe_xl, prompt, i)
    if video_file:
        video_files.append(video_file)
    torch.cuda.empty_cache()

# Łączenie klipów w jedno wideo
if video_files:
    clips = [VideoFileClip(file).crossfadein(0.05) for file in video_files]
    final_video = concatenate_videoclips(clips, method="compose")
    final_video.write_videofile("final_artv.mp4", codec="libx264", audio_codec="aac")

# Dodawanie logo
add_watermark("final_artv.mp4", "/content/logoARTV.png", "final_artv_with_logo.mp4")

# Strumieniowanie do YouTube
youtube_url = "rtmp://a.rtmp.youtube.com/live2/r987-0ze7-t3j9-arbg-4z75"
stream_to_youtube("final_artv_with_logo.mp4", youtube_url)

print("Proces zakończony. Sprawdź czy strumieniowanie do YouTube działa poprawnie.")

Using device: cuda


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

An error occurred while trying to fetch /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_576w/snapshots/6963642a64dbefa93663d1ecebb4ceda2d9ecb28/unet: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_576w/snapshots/6963642a64dbefa93663d1ecebb4ceda2d9ecb28/unet.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
An error occurred while trying to fetch /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_576w/snapshots/6963642a64dbefa93663d1ecebb4ceda2d9ecb28/vae: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_576w/snapshots/6963642a64dbefa93663d1ecebb4ceda2d9ecb28/vae.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

An error occurred while trying to fetch /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_XL/snapshots/6934ce594c2a8b39eec8bfece9d630a7ec93f642/unet: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_XL/snapshots/6934ce594c2a8b39eec8bfece9d630a7ec93f642/unet.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
An error occurred while trying to fetch /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_XL/snapshots/6934ce594c2a8b39eec8bfece9d630a7ec93f642/vae: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_XL/snapshots/6934ce594c2a8b39eec8bfece9d630a7ec93f642/vae.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.


1. A op art inspired scene of athlete thinking with bold silhouettes. photorealistic, high detail, 4k resolution, without watermarks or stock photo elements (Type: realistic)
Generowanie klipu 1: A op art inspired scene of athlete thinking with bold silhouettes. photorealistic, high detail, 4k resolution, without watermarks or stock photo elements
Używany seed: 1202636158


  0%|          | 0/40 [00:00<?, ?it/s]

KeyboardInterrupt: 

# New section

In [None]:
!pip install torch diffusers audiocraft numpy opencv-python moviepy transformers scipy ipython ffmpeg-python accelerate




In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import torch
print(torch.cuda.is_available())
print(torch.version.cuda)

True
12.1


In [None]:
import torch
from diffusers import DiffusionPipeline
import cv2
import numpy as np
import time
import random
from tqdm import tqdm
from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_videoclips
from audiocraft.models import MusicGen
import soundfile as sf
import shutil
import os
import random


print(f"Using device: {device}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU: {torch.cuda.get_device_name(0)}")
else:
    print("CUDA not available")

# Ustawienia
device = "cuda" if torch.cuda.is_available() else "cpu"
height, width = 320, 576  # Oryginalna rozdzielczość Zeroscope 576 320
num_frames = 32  # Standardowa liczba klatek na sekundę
num_inference_steps = 20  # Zwiększamy liczbę kroków dla lepszej jakości
fps = 8
clip_duration = 3


# Wyczyść cache
#cache_dir = os.path.expanduser("~/.cache/huggingface/hub")
#if os.path.exists(cache_dir):
#    shutil.rmtree(cache_dir)
#print("Hugging Face cache cleared.")

#  załaduj model
pipe = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_576w", torch_dtype=torch.float32)
pipe = pipe.to(device)

music_model = MusicGen.get_pretrained('facebook/musicgen-small')
music_model.set_generation_params(duration=clip_duration)

#GENERUJEMY prompt
def generate_random_prompt():
    subjects = ["objects", "landscapes", "shapes", "figures", "people", "woman", "man", "animal", "creature"]
    environments = ["sky", "underwater", "cosmic", "forest", "city"]
    styles = ["surrealist", "photo", "impressionist", "cartoon", "dadaism"]
    actions = ["floating", "morphing", "dancing", "laughing", "contemplating"]
    elements = ["vibrant colors", "geometric shapes", "flowing lines", "ethereal atmosphere"]

    prompt = f"A {random.choice(styles)} inspired scene of {random.choice(subjects)} {random.choice(actions)} with {random.choice(elements)} in a {random.choice(environments)}. photorealistic, high detail, 4k resolution"

    return prompt


def generate_video(pipe, prompt, num_frames=32, height=320, width=576, num_inference_steps=20):
    start_time = time.time()
    seed = torch.randint(0, 2**32 - 1, (1,)).item()
    generator = torch.Generator(device=device).manual_seed(seed)
    print(f"Generating video with seed: {seed}")

    frames = pipe(
        prompt,
        num_inference_steps=num_inference_steps,
        num_frames=num_frames,
        height=height,
        width=width,
        generator=generator,
    ).frames[0]

    end_time = time.time()
    print(f"Video generation took {end_time - start_time:.2f} seconds")
    return frames, seed

def save_video(video, filename, fps=8):
    video = (video * 255).astype(np.uint8)
    height, width = video.shape[1:3]
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(filename, fourcc, fps, (width, height))

    for frame in video:
        frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        out.write(frame_bgr)

    out.release()
    print(f"Video saved as {filename}")



def generate_music(prompt):
    music = music_model.generate([prompt], progress=True)
    return music[0].cpu().numpy()

import librosa

def save_audio(audio, filename, sample_rate=32000):
    # Normalizacja audio do zakresu [-1, 1]
    audio = audio / np.max(np.abs(audio))
    librosa.output.write_wav(filename, audio, sample_rate)
    print(f"Audio saved as {filename}")

def generate_music(prompt):
    music = music_model.generate([prompt], progress=True)
    audio = music[0].cpu().numpy()
    print(f"Audio shape: {audio.shape}, dtype: {audio.dtype}, min: {audio.min()}, max: {audio.max()}")
    return audio



import os

def generate_multiple_clips(num_clips):
    generated_files = []
    for i in tqdm(range(num_clips), desc="Generating clips"):
        prompt = generate_random_prompt()
        print(f"\nGenerating clip {i+1}/{num_clips}")
        print(f"Prompt: {prompt}")

        # Generowanie wideo
        video, seed = generate_video(pipe, prompt, num_frames=num_frames, height=height, width=width, num_inference_steps=num_inference_steps)
        video_filename = f"generated_video_{i+1}.mp4"
        save_video(video, video_filename, fps=fps)

        generated_files.append(video_filename)

    return generated_files

def combine_videos(video_files, output_filename="combined_video.mp4"):
    clips = []
    for file in video_files:
        if os.path.exists(file):
            clips.append(VideoFileClip(file))
        else:
            print(f"Warning: File {file} not found. Skipping.")

    if clips:
        final_clip = concatenate_videoclips(clips)
        final_clip.write_videofile(output_filename)
        print(f"Combined video saved as {output_filename}")
    else:
        print("No valid video files found to combine.")

# Użycie:
generated_files = generate_multiple_clips(3)
combine_videos(generated_files)

# Po wygenerowaniu combined_video.mp4
add_watermark("combined_video.mp4", "/content/logoARTV.png", "combined_video_with_logo.mp4")
print("Process completed.")




def add_watermark(video_path, logo_path, output_path):
    # Wczytaj wideo
    video = VideoFileClip(video_path)

    # Wczytaj logo
    logo = (ImageClip(logo_path)
            .set_duration(video.duration)  # ustaw czas trwania logo na długość wideo
           # .resize(height=50)  # zmień rozmiar logo (dostosuj według potrzeb)
            .margin(right=10, top=10, opacity=0)  # dodaj margines
            .set_pos(("right", "top")))  # ustaw pozycję w prawym górnym rogu

    # Nałóż logo na wideo
    final = CompositeVideoClip([video, logo])

    # Zapisz wideo z logo
    final.write_videofile(output_path, codec="libx264", audio_codec="aac")

    print("Logo dodany do wideo.")

# Po wygenerowaniu final_artv.mp4
add_watermark("combined_video.mp4", "/content/logoARTV.png", "combined_video_with_logo.mp4")
if True: # Replace True with the actual condition
    print("Proces zakończony. Wideo zostało wygenerowane.")
else:
    print("Nie udało się wygenerować żadnego klipu wideo.")
print("Process completed.")

Using device: cuda
PyTorch version: 2.1.0+cu121
CUDA available: True
CUDA version: 12.1
GPU: NVIDIA A100-SXM4-40GB


vae/diffusion_pytorch_model.safetensors not found


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

An error occurred while trying to fetch /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_576w/snapshots/6963642a64dbefa93663d1ecebb4ceda2d9ecb28/vae: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_576w/snapshots/6963642a64dbefa93663d1ecebb4ceda2d9ecb28/vae.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.
An error occurred while trying to fetch /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_576w/snapshots/6963642a64dbefa93663d1ecebb4ceda2d9ecb28/unet: Error no file named diffusion_pytorch_model.safetensors found in directory /root/.cache/huggingface/hub/models--cerspense--zeroscope_v2_576w/snapshots/6963642a64dbefa93663d1ecebb4ceda2d9ecb28/unet.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.

Generating clips:   0%|          | 0/3 [00:00<?, ?it/s]


Generating clip 1/3
Prompt: A impressionist inspired scene of creature dancing with geometric shapes in a forest. photorealistic, high detail, 4k resolution
Generating video with seed: 395809770


  0%|          | 0/20 [00:00<?, ?it/s]

Generating clips:  33%|███▎      | 1/3 [00:46<01:33, 46.91s/it]

Video generation took 46.75 seconds
Video saved as generated_video_1.mp4

Generating clip 2/3
Prompt: A impressionist inspired scene of creature laughing with flowing lines in a sky. photorealistic, high detail, 4k resolution
Generating video with seed: 737548839


  0%|          | 0/20 [00:00<?, ?it/s]

Generating clips:  67%|██████▋   | 2/3 [01:33<00:46, 46.96s/it]

Video generation took 46.83 seconds
Video saved as generated_video_2.mp4

Generating clip 3/3
Prompt: A cartoon inspired scene of landscapes morphing with flowing lines in a city. photorealistic, high detail, 4k resolution
Generating video with seed: 982188572


  0%|          | 0/20 [00:00<?, ?it/s]

Generating clips: 100%|██████████| 3/3 [02:20<00:00, 46.97s/it]

Video generation took 46.83 seconds
Video saved as generated_video_3.mp4





Moviepy - Building video combined_video.mp4.
Moviepy - Writing video combined_video.mp4





Moviepy - Done !
Moviepy - video ready combined_video.mp4
Combined video saved as combined_video.mp4
Moviepy - Building video combined_video_with_logo.mp4.
Moviepy - Writing video combined_video_with_logo.mp4





Moviepy - Done !
Moviepy - video ready combined_video_with_logo.mp4
Logo dodany do wideo.
Process completed.
Moviepy - Building video combined_video_with_logo.mp4.
Moviepy - Writing video combined_video_with_logo.mp4





Moviepy - Done !
Moviepy - video ready combined_video_with_logo.mp4
Logo dodany do wideo.
Proces zakończony. Wideo zostało wygenerowane.
Process completed.


# New section

In [1]:
import torch
from diffusers import DiffusionPipeline
import cv2
import numpy as np
import time
import random
from tqdm import tqdm
from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_videoclips, CompositeVideoClip
from audiocraft.models import MusicGen
import librosa
import soundfile as sf
import os
import subprocess


# Ustawienia
device = "cuda" if torch.cuda.is_available() else "cpu"
height, width = 320, 576
num_frames = 32
num_inference_steps = 20
fps = 8
clip_duration = 4  # Zwiększamy do 4 sekund, aby lepiej pasowało do wideo

# Ładowanie modeli
pipe = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_576w", torch_dtype=torch.float32)
pipe = pipe.to(device)

music_model = MusicGen.get_pretrained('facebook/musicgen-small')
music_model.set_generation_params(duration=clip_duration)

def generate_random_prompt():
    subjects = ["object", "landscape", "human", "robot", "shape", "figure", "people", "woman", "man", "animal", "creature"]
    environments = ["sky", "room", "street", "university","underwater", "air", "cosmos", "nature", "city"]
    styles = ["surrealist", "photo", "colorful", "cartoon", "drawing", "painting", "sculpting"]
    actions = ["floating", "morphing", "dancing", "laughing", "contemplating", "living", "working", "making art"]
    elements = ["vibrant colors", "black and white", "minimalism"]

    prompt = f"A {random.choice(styles)} scene of {random.choice(subjects)} {random.choice(actions)} with {random.choice(elements)} in a {random.choice(environments)}. photorealistic, high detail, 4k resolution"

    return prompt

def generate_video(pipe, prompt, num_frames=32, height=320, width=576, num_inference_steps=20):
    start_time = time.time()
    seed = torch.randint(0, 2**32 - 1, (1,)).item()
    generator = torch.Generator(device=device).manual_seed(seed)
    print(f"Generating video with seed: {seed}")

    frames = pipe(
        prompt,
        num_inference_steps=num_inference_steps,
        num_frames=num_frames,
        height=height,
        width=width,
        generator=generator,
    ).frames[0]

    end_time = time.time()
    print(f"Video generation took {end_time - start_time:.2f} seconds")
    return frames, seed

def save_video(video, filename, fps=8):
    video = (video * 255).astype(np.uint8)
    height, width = video.shape[1:3]
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(filename, fourcc, fps, (width, height))

    for frame in video:
        frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        out.write(frame_bgr)

    out.release()
    print(f"Video saved as {filename}")

def generate_music(prompt):
    music = music_model.generate([prompt], progress=True)
    return music[0].cpu().numpy()

def save_audio(audio, filename, sample_rate=32000):
    sf.write(filename, audio, sample_rate)
    print(f"Audio saved as {filename}")

from pydub import AudioSegment

def save_audio(audio, filename, sample_rate=32000):
    audio = (audio * 32767).astype(np.int16)  # Konwertuj do int16
    audio_segment = AudioSegment(
        audio.tobytes(),
        frame_rate=sample_rate,
        sample_width=2,
        channels=1
    )
    audio_segment.export(filename, format="mp3")
    print(f"Audio saved as {filename}")

def generate_multiple_clips(num_clips):
    generated_files = []
    for i in tqdm(range(num_clips), desc="Generating clips"):
        prompt = generate_random_prompt()
        print(f"\nGenerating clip {i+1}/{num_clips}")
        print(f"Prompt: {prompt}")

        # Generowanie wideo
        video, seed = generate_video(pipe, prompt, num_frames=num_frames, height=height, width=width, num_inference_steps=num_inference_steps)
        video_filename = f"generated_video_{i+1}.mp4"
        save_video(video, video_filename, fps=fps)

        try:
            # Generowanie muzyki
            audio = generate_music(prompt)
            audio_filename = f"generated_audio_{i+1}.mp3"
            save_audio(audio, audio_filename)

            # Łączenie wideo i audio
            final_filename = f"final_clip_{i+1}.mp4"
            video_clip = VideoFileClip(video_filename)
            audio_clip = AudioFileClip(audio_filename)
            final_clip = video_clip.set_audio(audio_clip)
            final_clip.write_videofile(final_filename, codec="libx264", audio_codec="aac")
        except Exception as e:
            print(f"Error generating audio for clip {i+1}: {e}")
            final_filename = video_filename  # Use video without audio if there's an error

        generated_files.append(final_filename)

    return generated_files

def combine_videos(video_files, output_filename="combined_video.mp4"):
    clips = []
    for file in video_files:
        if os.path.exists(file):
            clips.append(VideoFileClip(file))
        else:
            print(f"Warning: File {file} not found. Skipping.")

    if clips:
        final_clip = concatenate_videoclips(clips)
        final_clip.write_videofile(output_filename, codec="libx264", audio_codec="aac")
        print(f"Combined video saved as {output_filename}")
    else:
        print("No valid video files found to combine.")

def add_watermark(video_path, logo_path, output_path):
    video = VideoFileClip(video_path)
    logo = (ImageClip(logo_path)
            .set_duration(video.duration)
            .margin(right=10, top=10, opacity=0)
            .set_pos(("right", "top")))
    final = CompositeVideoClip([video, logo])
    final.write_videofile(output_path, codec="libx264", audio_codec="aac")
    print("Logo added to video.")

#------------------------------ Główny proces
generated_files = generate_multiple_clips(20)
combine_videos(generated_files)
add_watermark("combined_video.mp4", "/content/logoARTV.png", "combined_video_with_logo.mp4")
print("Process completed.")


# ---------------------- strumien do youtube -------------------------


def stream_to_youtube(input_file, stream_key):
    youtube_url = f"rtmp://a.rtmp.youtube.com/live2/{stream_key}"

    command = [
        'ffmpeg',
        '-re',
        '-i', input_file,
        '-c:v', 'libx264',
        '-preset', 'veryfast',
        '-maxrate', '3000k',
        '-bufsize', '6000k',
        '-pix_fmt', 'yuv420p',
        '-g', '50',
        '-c:a', 'aac',
        '-b:a', '128k',
        '-ar', '44100',
        '-f', 'flv',
        youtube_url
    ]

    try:
        print("Rozpoczynam strumieniowanie...")
        process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        stdout, stderr = process.communicate()

        if process.returncode != 0:
            print(f"Błąd FFmpeg: {stderr.decode()}")
        else:
            print("Strumieniowanie zakończone sukcesem.")
    except Exception as e:
        print(f"Wystąpił nieoczekiwany błąd: {str(e)}")

# Strumieniowanie
stream_key = "r987-0ze7-t3j9-arbg-4z75"  # Twój klucz strumieniowania
input_file = "combined_video_with_logo.mp4"

if os.path.exists(input_file):
    print("Plik istnieje i jest gotowy do strumieniowania.")
    start_time = time.time()
    stream_to_youtube(input_file, stream_key)
    end_time = time.time()
    print(f"Czas strumieniowania: {end_time - start_time} sekund")
else:
    print(f"Plik {input_file} nie istnieje. Sprawdź nazwę i ścieżkę.")

print("Proces zakończony.")





ModuleNotFoundError: No module named 'diffusers'