<a href="https://colab.research.google.com/github/arvindk1/workflow-tiktok-flux-model/blob/main/workflow_tiktok.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# Install required Python libraries
%pip install openai google-cloud-texttospeech moviepy huggingface_hub

# --- Set Up API Keys in Colab Secrets ---
# Ensure you have a secret named HUGGINGFACE_API_KEY

import os
from google.colab import userdata

# Set environment variables from Colab Secrets
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')
os.environ['HUGGINGFACE_API_KEY'] = userdata.get('HUGGINGFACE_API_KEY')
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = userdata.get('GOOGLE_APPLICATION_CREDENTIALS')



In [11]:
import openai
import json

client = openai.OpenAI()

# The Fix: Add "Your response must be in JSON format" to the system message.
response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": "You are a viral video scriptwriter. Your response must be in JSON format."},
        {"role": "user", "content": "Create a script with 5 scenes about the myth that you need 8 hours of sleep."}
    ],
    response_format={"type": "json_object"}
)

script_json_string = response.choices[0].message.content
print("✅ Script Generated")

# --- ADD THIS LINE TO DEBUG ---
print(script_json_string)
# -----------------------------

# You can now parse and use the JSON data
script_data = json.loads(script_json_string)
print(script_data)

✅ Script Generated
{
  "title": "The 8-Hour Sleep Myth Uncovered",
  "scenes": [
    {
      "scene_number": 1,
      "title": "The Setup",
      "description": "The scene opens with a sleep expert in a cozy bedroom. Soft lighting and calming music play in the background.",
      "dialogue": [
        {
          "character": "Sleep Expert",
          "line": "Welcome! Today, we're diving into the myth that you need 8 hours of sleep every night. Let's unravel the truth!"
        }
      ],
      "visuals": "Camera pans to a clock showing 8:00 PM, then zooms in on a bed with a sleeping person."
    },
    {
      "scene_number": 2,
      "title": "The Science",
      "description": "Cut to an animated infographic that highlights various sleep needs based on age and lifestyle.",
      "dialogue": [
        {
          "character": "Voiceover",
          "line": "Studies show that sleep requirements vary. Adults average between 6 to 10 hours. It's all about what suits your body!"
        

In [14]:
import os
import json
from pathlib import Path
from huggingface_hub import InferenceClient
from google.cloud import texttospeech

# Create directories to store media
Path("output/images").mkdir(parents=True, exist_ok=True)
Path("output/audio").mkdir(parents=True, exist_ok=True)

# --- Hugging Face InferenceClient ---
hf_client = InferenceClient(
    provider="nebius",
    api_key=os.environ["HUGGINGFACE_API_KEY"],
)

def generate_image(prompt, filename):
    """Generates an image using the HF client and saves it."""
    image = hf_client.text_to_image(
        prompt,
        model="black-forest-labs/FLUX.1-dev",
    )
    image.save(filename)
    print(f"  ✅ Image saved: {filename}")

# --- Google Text-to-Speech ---
tts_client = texttospeech.TextToSpeechClient()

def synthesize_speech(text, filename):
    """Synthesizes speech and saves it as an MP3."""
    synthesis_input = texttospeech.SynthesisInput(text=text)
    voice = texttospeech.VoiceSelectionParams(language_code="en-US", name="en-US-Standard-C")
    audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3)
    response = tts_client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config)
    with open(filename, "wb") as out:
        out.write(response.audio_content)
    print(f"  ✅ Audio saved: {filename}")


# --- Process each scene from the script ---
# Assumes 'script_data' is a loaded JSON object from the previous cell
for i, scene in enumerate(script_data['scenes']):
    print(f"▶️ Processing Scene {i+1}...")
    image_path = f"output/images/scene_{i+1}.jpg"
    audio_path = f"output/audio/scene_{i+1}.mp3"

    # FIX 1: Use the 'description' key for the image prompt.
    image_prompt = scene['description']
    generate_image(image_prompt, image_path)

    # FIX 2: Extract the dialogue 'line' for the voiceover.
    # This takes the first line of dialogue from the scene.
    dialogue_line = scene['dialogue'][0]['line']
    synthesize_speech(dialogue_line, audio_path)

print("\n✅ All images and voice clips generated.")

▶️ Processing Scene 1...


HfHubHTTPError: 402 Client Error: Payment Required for url: https://router.huggingface.co/nebius/v1/images/generations (Request ID: Root=1-68859765-33b7836b368385f82bb57374;1e2117e9-68f3-4ca7-9be3-4f97c1667c75)

You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly included credits.

In [15]:
# Install the required libraries for running diffusion models
%pip install diffusers transformers accelerate torch

import torch
from diffusers import DUXLSuperResPipeline, DUXLTransformer2dModel
from diffusers.models.attention_processor import AttnProcessor2_0

# --- Load the FLUX model pipeline ---
# This part downloads the model to your Colab environment
# Using torch.float16 saves a lot of memory
pipe = DUXLSuperResPipeline.from_pretrained(
    "black-forest-labs/FLUX.1-dev",
    torch_dtype=torch.float16,
    variant="fp16",
    use_safetensors=True
)

# Move the pipeline to the GPU for fast processing
pipe.to("cuda")

print("✅ Model loaded successfully onto the GPU.")

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

ImportError: cannot import name 'DUXLSuperResPipeline' from 'diffusers' (/usr/local/lib/python3.11/dist-packages/diffusers/__init__.py)