# Instructions:

* Clone https://github.com/jammm/Phantom: `git clone https://github.com/jammm/Phantom`
* Place this demo notebook within the cloned `Phantom` folder.
* Create a venv: `python3 -m venv .venv`
* Activate the venv: `source .venv/bin/activate`
* Start a jupyter notebook server after `pip install jupyter`, or use VS Code jupyter extension, then run the notebook sever from the `Phantom` folder such that the $PWD is `Phantom` folder
* Run all the cells from top to bottom
* In the last cell, change parameters, prompts, input images as necessary and click "Generate Video"

In [1]:
import os
if os.environ.get("VIRTUAL_ENV") == None or os.environ.get("VIRTUAL_ENV") == "":
    raise Exception("ERROR: no venv found. Make sure to create one with 'python3 -m venv .venv' and activate it with 'source .venv/bin/activate'")

print("Using venv: ", os.environ.get("VIRTUAL_ENV"))

Using venv:  /home/jam/jam/log-linear-attention/venv


In [2]:


print("Installing dependencies...")
!pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm6.4
!pip install -r requirements.txt

# Download models
!pip install "huggingface_hub[cli]" ipywidgets

!huggingface-cli download Wan-AI/Wan2.1-T2V-1.3B --local-dir ./Wan2.1-T2V-1.3B
!huggingface-cli download bytedance-research/Phantom --local-dir ./Phantom-Wan-Models

Installing dependencies...
[33mDEPRECATION: Loading egg at /home/jam/jam/log-linear-attention/venv/lib/python3.12/site-packages/causal_conv1d-1.5.0.post8-py3.12-linux-x86_64.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation.. Discussion can be found at https://github.com/pypa/pip/issues/12330[0m[33m
[0mLooking in indexes: https://download.pytorch.org/whl/nightly/rocm6.4
[33mDEPRECATION: Loading egg at /home/jam/jam/log-linear-attention/venv/lib/python3.12/site-packages/causal_conv1d-1.5.0.post8-py3.12-linux-x86_64.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation.. Discussion can be found at https://github.com/pypa/pip/issues/12330[0m[33m
[33mDEPRECATION: Loading egg at /home/jam/jam/log-linear-attention/venv/lib/python3.12/site-packages/causal_conv1d-1.5.0.post8-py3.12-linux-x86_64.egg is deprecated. pip 24.3 will enforce this 

Looking in indexes: https://download.pytorch.org/whl/nightly/rocm6.4
Fetching 22 files: 100%|█████████████████████| 22/22 [00:00<00:00, 13861.30it/s]
/home/jam/jam/Phantom/Wan2.1-T2V-1.3B
Fetching 39 files: 100%|██████████████████████| 39/39 [00:00<00:00, 2797.40it/s]
/home/jam/jam/Phantom/Phantom-Wan-Models


In [6]:
import os
os.environ["OMP_NUM_THREADS"] = "16"

import ipywidgets as widgets
from IPython.display import display, clear_output
import subprocess
import glob
from IPython.display import Video

# Create interactive widgets for video generation parameters
print("🎬 Phantom Video Generation Configuration")
print("=" * 50)

# Text area for prompt
prompt_widget = widgets.Textarea(
    value="A cartoon old grandfather wearing a yellow hat, a yellow top and brown suspenders is holding a blue steaming coffee cup in a fresh cartoon-style cafe decorated with pink and blue tables and chairs, colorful chandeliers and colorful balls. The picture style is cartoony and fresh.",
    placeholder="Enter your video generation prompt here...",
    description="Prompt:",
    layout=widgets.Layout(width='100%', height='120px'),
    style={'description_width': '80px'}
)

# Resolution inputs
width_widget = widgets.IntText(
    value=832,
    description="Width:",
    style={'description_width': '80px'}
)

height_widget = widgets.IntText(
    value=480,
    description="Height:",
    style={'description_width': '80px'}
)

# Reference images input
ref_images_widget = widgets.Text(
    value="examples/ref14.png,examples/ref15.png,examples/ref16.png",
    placeholder="Comma-separated image paths",
    description="Ref Images:",
    layout=widgets.Layout(width='100%'),
    style={'description_width': '80px'}
)

# Number of frames
frames_widget = widgets.IntText(
    value=121,
    description="Frames:",
    style={'description_width': '80px'}
)

# FPS
fps_widget = widgets.IntText(
    value=24,
    description="FPS:",
    style={'description_width': '80px'}
)

# Advanced settings (collapsed by default)
advanced_accordion = widgets.Accordion(children=[
    widgets.VBox([
        widgets.Text(value="./Wan2.1-T2V-1.3B", description="Checkpoint Dir:", style={'description_width': '120px'}),
        widgets.Text(value="./Phantom-Wan-Models", description="Phantom Ckpt:", style={'description_width': '120px'}),
        widgets.IntText(value=8, description="Ulysses Size:", style={'description_width': '120px'}),
        widgets.IntText(value=1, description="Ring Size:", style={'description_width': '120px'}),
        widgets.IntText(value=8, description="Num GPUs:", style={'description_width': '120px'})
    ])
])
advanced_accordion.set_title(0, 'Advanced Settings')
advanced_accordion.selected_index = None  # Collapsed by default

# Generate button
generate_button = widgets.Button(
    description="🎬 Generate Video",
    button_style='success',
    layout=widgets.Layout(width='200px', height='40px')
)

def on_generate_click(b):
    # Get values from widgets
    prompt = prompt_widget.value
    width = width_widget.value
    height = height_widget.value
    ref_images = ref_images_widget.value
    frames = frames_widget.value
    fps = fps_widget.value
    
    # Get advanced settings
    advanced_widgets = advanced_accordion.children[0].children
    ckpt_dir = advanced_widgets[0].value
    phantom_ckpt = advanced_widgets[1].value
    ulysses_size = advanced_widgets[2].value
    ring_size = advanced_widgets[3].value
    num_gpus = advanced_widgets[4].value
    
    # Validate inputs
    if not prompt.strip():
        print("❌ Please enter a prompt!")
        return
    if not ref_images.strip():
        print("❌ Please enter reference image paths!")
        return
    if width <= 0 or height <= 0:
        print("❌ Width and height must be positive!")
        return
    if frames <= 0 or fps <= 0:
        print("❌ Frames and FPS must be positive!")
        return
    
    # Build command
    command = [
        "torchrun", f"--nproc_per_node={num_gpus}", "generate.py",
        "--task", "s2v-14B",
        "--size", f"{width}*{height}",
        "--frame_num", str(frames),
        "--sample_fps", str(fps),
        "--ckpt_dir", ckpt_dir,
        "--phantom_ckpt", phantom_ckpt,
        "--ref_image", ref_images,
        "--dit_fsdp",
        "--t5_fsdp",
        "--ulysses_size", str(ulysses_size),
        "--ring_size", str(ring_size),
        "--prompt", prompt
    ]
    
    print("🚀 Starting Phantom video generation...")
    print(f"📐 Resolution: {width}x{height}")
    print(f"🎞️ Frames: {frames} @ {fps} FPS")
    print(f"🖼️ Reference images: {ref_images}")
    print(f"💬 Prompt: {prompt[:100]}{'...' if len(prompt) > 100 else ''}")
    print("\n" + "="*50)
    
    # Run the command
    try:
        print("⏳ Running video generation (this may take several minutes)...")
        print("📺 Check your terminal/stdout for detailed progress output...")
        
        # Run command with output going to stdout/terminal
        result = subprocess.run(command, cwd=".")
        
        if result.returncode == 0:
            clear_output(wait=True)
            print("✅ Video generation completed successfully!")
            
            # Look for generated video files
            video_patterns = ["*.mp4", "*.avi", "*.mov", "*.mkv", "*.webm"]
            video_files = []
            
            for pattern in video_patterns:
                video_files.extend(glob.glob(pattern))
            
            # Also check in common output directories
            output_dirs = ["./outputs", "./results", "./generated", "."]
            for output_dir in output_dirs:
                if os.path.exists(output_dir):
                    for pattern in video_patterns:
                        video_files.extend(glob.glob(os.path.join(output_dir, pattern)))
            
            # Remove duplicates and sort by modification time (newest first)
            video_files = list(set(video_files))
            if video_files:
                video_files.sort(key=lambda x: os.path.getmtime(x), reverse=True)
                latest_video = video_files[0]
                
                print(f"📹 Generated video: {latest_video}")
                
                # Display the video
                display(Video(latest_video, width=width, height=height, embed=True))
            else:
                print("⚠️ No video files found. Please check the output directory manually.")
                
        else:
            print(f"❌ Video generation failed with return code: {result.returncode}")
            print("Check the terminal output above for error details.")
                
    except Exception as e:
        print(f"❌ Error running command: {e}")

generate_button.on_click(on_generate_click)

# Layout the widgets
resolution_box = widgets.HBox([width_widget, height_widget])
media_box = widgets.HBox([frames_widget, fps_widget])

display(widgets.VBox([
    prompt_widget,
    widgets.HTML("<b>Resolution:</b>"),
    resolution_box,
    ref_images_widget,
    widgets.HTML("<b>Media Settings:</b>"),
    media_box,
    advanced_accordion,
    widgets.HTML("<br>"),
    generate_button,
    output_area
]))


✅ Video generation completed successfully!
📹 Generated video: ./s2v-14B_832*480_8_1_A_cartoon_old_grandfather_wearing_a_yellow_hat,_a__20250609_112922.mp4
