In [1]:
# Copyright 2024-2025 The Alibaba Wan Team Authors. All rights reserved.
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [2]:
import sys
import warnings
import torch
import random
from PIL import Image

from datetime import datetime
from ipywidgets import interact, interactive, fixed, widgets
from IPython.display import Video

import wan
from wan.configs import WAN_CONFIGS, SIZE_CONFIGS, MAX_AREA_CONFIGS, SUPPORTED_SIZES
from wan.utils.prompt_extend import DashScopePromptExpander, QwenPromptExpander
from wan.utils.utils import cache_video, cache_image
from inference_lora import lora_name_to_path, add_lora_tag, LoraModel
from accelerate import PartialState # Can also be Accelerator of AcceleratorState

warnings.filterwarnings("ignore")
%load_ext autoreload
%autoreload 2

ModuleNotFoundError: No module named 'wan'

In [None]:
weights = torch.load()

In [3]:
task = "i2v-14B"
use_prompt_extend=False
prompt_extend_method=False
prompt_extend_target_lang=False

In [4]:
# Load model
ckpt_dir = '/home/jovyan/dmitrienko/workspace/checkpoints/pretrained/Wan2.1-I2V-14B-480P' 
pipe_cls = wan.WanI2V
cfg = WAN_CONFIGS[task]

pipe = pipe_cls(
    config=cfg,
    checkpoint_dir=ckpt_dir,
    device_id='0',
    rank=0,
)
pipe.model = LoraModel(pipe.model)

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

In [5]:
lora_name_to_path = {
    "opensource_civitai": "/home/jovyan/dmitrienko/workspace/checkpoints/pretrained/Wan2.1-I2V-14B-480P_lora-360-rotation/", # https://civitai.com/models/1379629/360-camera-orbit-wan-21-i2v-lora
    "opensource_remade_microwave": "/home/jovyan/dmitrienko/workspace/checkpoints/pretrained/Wan2.1-I2V-14B-480P_lora-360-rotation_remade/", # https://huggingface.co/Remade-AI/Rotate
    
    # trained on 180 degree rotation
    "180rotation_spell+arc_right_v2": "/home/jovyan/dmitrienko/workspace/checkpoints/wan14b_i2v/higgsfield+0.5arc_shot+spell_480_lora128_lr1e-4_bs4_2gpus/20250404_18-52-31",

    # trained on 360 degree rotation
    "360rotation_spell+arc_right_v1": "/home/jovyan/dmitrienko/workspace/checkpoints/wan14b_i2v/spell+arc_right_v3_512_lora128_lr1e-4_bs4_2gpus/20250401_18-14-24/",
    "360rotation_spell+arc_right_v2": "/home/jovyan/dmitrienko/workspace/checkpoints/wan14b_i2v/spell+arc_right_v3-x5_512_lora128_lr1e-4_bs4_2gpus/20250403_10-00-00/",
    "360rotation_spell+arc_right_v3": "/home/jovyan/dmitrienko/workspace/checkpoints/wan14b_i2v/spell+arc_right_v3-x5_shift5_512_lora128_lr1e-4_bs4_2gpus/20250408_18-10-51/",

    # microwave rotation (w/o background)
    # "microwave_rotation": "/home/jovyan/dmitrienko/workspace/checkpoints/wan14b_i2v/microwave-x5_shift5_512_lora128_lr1e-4_bs4_2gpus/20250408_19-21-39/",

}

# Optimal checkpoint
default_epoch = {
    "opensource_civitai": '0',
    "opensource_remade_rotation": '0',
    "360rotation_spell+arc_right_v1": 'epoch390',
    "360rotation_spell+arc_right_v2": 'epoch80',
    "180rotation_spell+arc_right_v2": 'epoch20', # also 'epoch40', 'epoch50' are good
     "360rotation_spell+arc_right_v3": 'epoch20', # also 'epoch15', 'epoch20' are good
    "microwave_rotation": 'epoch60',
}

lora_name_to_tag = {
    "opensource_remade_rotation": "r0t4tion 360 degrees rotation",
    "opensource_civitai": "r0t4tION orb1t",
    "rotation": "r0t4tION orb1t",
}

In [6]:
lora_name_widget = widgets.Dropdown(
    options=list(lora_name_to_path.keys()) + ['None'],
    value=list(lora_name_to_path.keys())[-1],
    description="LoRA Name:",
)
display(lora_name_widget,)

Dropdown(description='LoRA Name:', index=5, options=('opensource_civitai', 'opensource_remade_microwave', '180…

In [7]:
lora_name=lora_name_widget.value

if lora_name not in ['None', 'opensource_civitai', 'opensource_remade_rotation']:
    epochs = [p for p in os.listdir(lora_name_to_path[lora_name]) if p.startswith('epoch')]
else:
    epochs = ['0']
epoch_widget = widgets.Dropdown(
    options=epochs,
    value=default_epoch[lora_name] if lora_name in default_epoch else epochs[-1],
    description="LoRA epoch:",
)

# Display widgets
display(epoch_widget)

Dropdown(description='LoRA epoch:', index=5, options=('epoch10', 'epoch15', 'epoch16', 'epoch17', 'epoch18', '…

In [8]:
lora_scale = 1.
if lora_name != 'None':
    if lora_name not in ['opensource_arc']:
        lora_path = os.path.join(lora_name_to_path[lora_name], epoch_widget.value)
    else:
        lora_path = lora_name_to_path[lora_name]
    lora_path = os.path.join(lora_path, 'adapter_model.safetensors')
    pipe.model.apply_lora(
        lora_path,
        lora_scale=lora_scale
    )
else:
    lora_path = 'None'
    if isinstance(pipe.model, LoraModel):
        pipe.model.remove_lora()
lora_path

Loading model from: /home/jovyan/dmitrienko/workspace/checkpoints/wan14b_i2v/spell+arc_right_v3-x5_shift5_512_lora128_lr1e-4_bs4_2gpus/20250408_18-10-51/epoch20/adapter_model.safetensors
Reading file into memory...
Read 1.34GB into memory
Loading tensors...
Successfully loaded 960 tensors


'/home/jovyan/dmitrienko/workspace/checkpoints/wan14b_i2v/spell+arc_right_v3-x5_shift5_512_lora128_lr1e-4_bs4_2gpus/20250408_18-10-51/epoch20/adapter_model.safetensors'

In [9]:
steps_widget = widgets.IntSlider(
    value=40, min=15, max=50, step=1,
    description="Diffusion steps:",
     style= {'description_width': 'initial'}
)
frame_widget = widgets.Dropdown(
    value=65,
    description="Video frames:",
    options=[33, 65, 97],
)
size_widget = widgets.Dropdown(
    options=list(SIZE_CONFIGS.keys()),
    value='512*512',
    description="Video size:",
)
base_seed_widget = widgets.IntSlider(
    value=44, min=-1, max=100000, step=1,
    description="Base Seed:",
)
sample_shift_widget = widgets.IntSlider(
    value=5., min=0, max=10, step=1,
    description="Scale shift:",
)
guide_scale_widget = widgets.IntSlider(
    value=5., min=0, max=10, step=1,
    description="Guidance scale:",
    style= {'description_width': 'initial'}
)
square_image_widget = widgets.Checkbox(
    value=False,
    description="Crop image to square",
)
display(
    steps_widget, frame_widget,
    square_image_widget,
    size_widget, base_seed_widget, 
    sample_shift_widget, guide_scale_widget, 
    
)

IntSlider(value=40, description='Diffusion steps:', max=50, min=15, style=SliderStyle(description_width='initi…

Dropdown(description='Video frames:', index=1, options=(33, 65, 97), value=65)

Checkbox(value=False, description='Crop image to square')

Dropdown(description='Video size:', index=7, options=('720*1280', '1280*720', '480*832', '832*480', '256*144',…

IntSlider(value=44, description='Base Seed:', max=100000, min=-1)

IntSlider(value=5, description='Scale shift:', max=10)

IntSlider(value=5, description='Guidance scale:', max=10, style=SliderStyle(description_width='initial'))

Оптимальное количество шагов диффузии:  20-40, при 20 возможны небольшие артефакты...

Оптимальный Scale shift: 5-7

In [10]:

image = '/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/photo_2025-02-19_18-03-37.jpg'
prompt = 'A plush toy shaped like an avocado. It has a Bright green exterior representing the avocado flesh. The toy is made of soft, fuzzy fabric, making it look cuddly.'

image = '/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/cosmos.png'
prompt = 'an astronaut standing on a rocky, barren lunar or planetary surface, gazing at a large, bright moon or planet rising the horizon. The landscape is dusty and uneven, with craters and small boulders scattered around. The sky is dark, possibly space, filled with stars. The astronaut is wearing a white space suit with a helmet and a backpack-like life support system. The scene gives a feeling of solitude and exploration in an otherworldly environment.'

image = '/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/gagarin.png'
prompt = 'a smiling astronaut wearing a vintage space suit. The suit is orange with a helmet that has a clear visor and communication equipment visible. The person appears to be preparing for or returning from a space mission. The overall appearance, especially the design of the suit and helmet, resembles those used during the early Soviet space program.'

image = '/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/rocket2.jpg'
prompt = "the rocket ascending through the sky. Bright flames and thick plumes of exhaust trail behind the rocket, emphasizing its powerful thrust as it pierces through a cloudy sky. The rocket body has white and metallic sections, with orange and black bands near the top. Camera is tracking the rocket, gradually moves up."

# image = '/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/photo_2025-02-19_18-03-37 (2).jpg'
# prompt = "This plush toy is shaped like a popsicle and has a fun, relaxed 'cool guy' pose. It has a Bright pink exterior. The toy is made of soft, fuzzy fabric, making it look cuddly. It’s leaning back casually, propped up on a few colorful books, like it's chilling."

# prompt = 'This is an adorable plush toy of a penguin wearing a yellow and orange inflatable swim ring. There is a clear white background. The penguin has a chubby body, a round orange beak, small flippers at its sides, and a sweet, calm expression.'
# image = '/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/samokat/photo_2025-04-09_17-25-02.jpg'

# prompt = 'This is an adorable plush toy of a white bear wearing a yellow and orange inflatable swim ring. There is a clear white background. The penguin has a chubby body, a round orange beak, small flippers at its sides, and a sweet, calm expression.'
# image = '/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/samokat/photo_2025-04-09_17-25-00.jpg'

# prompt = 'This is an adorable flat plush toy of a capybara. There is a clear white background.'
# image = '/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/samokat/photo_2025-04-09_17-25-00 (2).jpg'



# prompt = 'A wooden comb for hair. The background is clear white.'
# image = '/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/samokat/photo_2025-04-09_17-24-59.jpg'


# prompt = "Summer beach vacation style, a white cat wearing sunglasses sits on a surfboard. \
# The fluffy-furred feline gazes directly at the camera with a relaxed expression. \
# Blurred beach scenery forms the background featuring crystal-clear waters, distant green hills, and a blue sky dotted with white clouds.\
# The cat assumes a naturally relaxed posture, as if savoring the sea breeze and warm sunlight.\
# A close-up shot highlights the feline's intricate details and the refreshing atmosphere of the seaside."
# image = "/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/submodules/Wan2_1/examples/i2v_input.JPG"

# prompt = "A majestic horse rearing on its hind legs in a lush green field under a partly cloudy sky.\
# The horse has a shiny, muscular coat with a dark mane and tail, wearing a light-colored halter."
# image = "/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/horse.jpg"

# prompt = "The medium shot of a woman. She has long, straight, dark brown hair parted down the middle and a calm. She is wearing a sleek, fitted, gray long-sleeve hooded top made of a slightly shiny, smooth material."
# image = "/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/girl_medium.jpeg"

# prompt = "A red Porsche 911 on the road in the mountains."
# image = "/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/car.jpg"

# prompt = "The overweight boy rides the bicycle down the dirt road as he descends the hill, with a shocked expression."
# image = "/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/videoframe_0.png"

# prompt = "The cartoonish boy stands ready with his backpack"
# image = "/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/boy.png"

# prompt = "The video shows a man seated on a chair."
# image = "/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/man.png"

# prompt = "The video features a wooden chair with a blue cushion."
# image = "/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/chair.png"


# prompt = "A hyper-realistic 3D close-up of a joyful grey cat with soft, well-defined fur. The cat has striking green eyes reflecting ambient light. It wears a green hoodie, and its pink nose and whiskers are visible. The background is dark with soft green and blue lights creating a cinematic depth. The lighting is warm and white, enhancing the lifelike details of its fur, eyes, and facial expression"
# prompt = "A hyper-realistic 3D  grey cat with soft, well-defined fur. It wears a green hoodie."
# image = '/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/sbercat/347400964_17982922346473644_951074851559425243_n.jpg'

# prompt = "A hyper-realistic 3D  model joyful grey cat with soft, well-defined fur. The cat has striking green eyes reflecting ambient light. It wears a green hoodie, and its pink nose and whiskers are visible. He stands with his arms outstretched at his sides. The background is dark with soft green and blue lights creating a cinematic depth. The lighting is warm and white, enhancing the lifelike details of its fur, eyes, and facial expression"
# image = '/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/submodules/Wan2_1/examples/sbercat/347418747_17982922316473644_1515624070986201305_n.jpg'

# prompt = "A high-resolution cinematic video of a seagull standing on a stone ledge near a canal in a European city. The seagull moves its head, looks around, and occasionally flaps its wings. The scene is illuminated by natural daylight, creating a lively urban atmosphere."
# image = '/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/submodules/Wan2_1/examples/images/photo_2025-04-01_16-22-03.jpg'

# prompt = "A cinematic video of a towering, realistic Gundam mecha standing in an open square under a clear blue sky. The scene has a futuristic, high-tech atmosphere."
# image = '/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/submodules/Wan2_1/examples/images/photo_2025-04-01_16-22-13.jpg'

# prompt = 'The towering, realistic Gundam mecha is standing in an open square under a clear blue sky. '
# image = '/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/submodules/Wan2_1/examples/images/photo_2025-04-01_16-22-10.jpg'

# prompt = "Chinese ancient tower in the courtyard of a traditional Chinese temple."
# image = "/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/photo_2025-04-01_16-22-10.jpg"

# prompt = 'A cinematic video of an old red telephone booth with a WiFi sign, standing alone on a quiet city street. Occasional pedestrians and cars pass by, adding a subtle urban atmosphere.'
# image = '/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/submodules/Wan2_1/examples/images/photo_2025-04-01_16-22-07.jpg'


# prompt = "An animated scene featuring a small, furry creature with oversized round ears, large expressive eyes, and a friendly face. The character sits inside a wooden crate, surrounded by warm, dim lighting."
# image = '/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/_normal.jpg'

# prompt = "An animated scene featuring a small, furry creature with oversized round ears, large expressive eyes, and a friendly face. The character stand in the room, surrounded by warm, dim lighting."
# image = '/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/4281c0dd8e075c977b34f7e7479388ba.jpg'

# prompt = "There is a large, juicy cheeseburger with a sesame seed bun. The burger is stacked high with two slices of cheddar cheese, crisp green lettuce, thick tomato slices, and a well-cooked beef patty.\
# A container of golden crispy French fries is near the burger. \
# Also there’s a red soft drink cup with a white lid and a straw, completing the classic fast food trio."
# image = "/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/fastfood.jpg"

# prompt = "a pair of low-top sneakers with a worn and distressed look. The shoes are black with off-white laces and a beige rubber sole"
# image = "/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/keds.jpg"

# prompt = "a woman with a serene, mysterious expression, often described as a subtle, enigmatic smile. \
# Her dark hair is long and flows over her shoulders, and she wears a dark, modest dress with a fine embroidered neckline and sheer veil."
# image = "/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/800px-Mona_Lisa,_by_Leonardo_da.png"

# prompt = "The video shows a young woman with blonde hair styled in a messy bun, wearing a distressed green jacket and a white top. Her outfit shows signs of wear, adding a rugged texture that's complemented by the rustic."
# image = "/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/blonde.png"

# prompt = "The video shows an animated character in traditional East Asian attire, his gray hair tied in a bun and a confident smile on his face. He is situated in an ancient-style street, adorned with red lanterns, imbuing the scene with a festive or cultural ambiance."
# image = "/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/images/pixar_character.png"

In [11]:
size = size_widget.value
sample_steps = 10 # int(steps_widget.value) 
frame_num = 33 # int(frame_widget.value)
sample_shift = sample_shift_widget.value # if "i2v" not in task or size not in ["832*480", "480*832"] else 3.0
guide_scale = guide_scale_widget.value
base_seed=base_seed_widget.value
square_image = square_image_widget.value

if lora_name != 'None':
    trigger_words = [v for k, v in lora_name_to_tag.items() if k in lora_name][0]
    prompt = f"{trigger_words} {prompt}"
    
else:
    prompt += ' Сamera performs 360 degree rotatation around the seagull.'

print(prompt)

im = Image.open(image).convert("RGB")
if square_image:
    new_height = int(size.split('*')[0])
    new_width = int(size.split('*')[1])

    if im.size[1] > im.size[0]:
        im = im.resize(( new_height, int(im.size[1]/im.size[0] * new_height)))
    else:
        im = im.resize(( int(im.size[0]/im.size[1] * new_width), new_width))

    width, height = im.size   # Get dimensions

    left = (width - new_width)/2
    top = (height - new_height)/2
    right = (width + new_width)/2
    bottom = (height + new_height)/2

    # Crop the center of the image
    im = im.crop((left, top, right, bottom))

output = pipe.generate(
    prompt,
    im,
    max_area=MAX_AREA_CONFIGS[size],
    frame_num=frame_num,
    shift=sample_shift,
    sample_solver="unipc",
    sampling_steps=sample_steps,
    guide_scale=guide_scale,
    seed=base_seed,
)

r0t4tION orb1t the rocket ascending through the sky. Bright flames and thick plumes of exhaust trail behind the rocket, emphasizing its powerful thrust as it pierces through a cloudy sky. The rocket body has white and metallic sections, with orange and black bands near the top. Camera is tracking the rocket, gradually moves up.


100%|██████████| 10/10 [00:27<00:00,  2.80s/it]


In [12]:
if lora_path != 'None':
    save_dir = f"/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/outputs/\
{task}_{lora_name}_{size}/{lora_path.split('/')[-2] if lora_path.split('/')[-2] != '' else lora_path.split('/')[-3]}"
else:
    save_dir = f"/home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/outputs/\
{task}_{lora_name}_{size}/"

os.makedirs(save_dir, exist_ok=True)

suffix = ".png" if "t2i" in task else ".mp4"
formatted_time = datetime.now().strftime("%Y%m%d_%H%M%S")
formatted_prompt = prompt.replace(" ", "_").replace("/", "_")[:100]
save_file = f"lorascale{lora_scale}_{sample_steps}steps_{formatted_prompt}_{formatted_time}{suffix}"
save_file = os.path.join(save_dir, save_file)


In [13]:
# Save output
if "t2i" in task:
    print(f"Saving generated image to {save_file}")
    cache_image(output.squeeze(1)[None], save_file, nrow=1, normalize=True, value_range=(-1, 1))
else:
    print(f"Saving generated video to {save_file}")
    cache_video(output[None], save_file, fps=cfg.sample_fps, nrow=1, normalize=True, value_range=(-1, 1))

Saving generated video to /home/jovyan/dmitrienko/workspace/diffusion-pipe_dmitrienko/Wan2_1/examples/outputs/i2v-14B_360rotation_spell+arc_right_v3_512*512/epoch20/lorascale1.0_40steps_r0t4tION_orb1t_the_rocket_ascending_through_the_sky._Bright_flames_and_thick_plumes_of_exhaust_trail_20250411_172351.mp4


In [13]:
output.min(), output.max()

(tensor(-0.9720, device='cuda:0'), tensor(1., device='cuda:0'))

In [14]:
Video(save_file)