In [1]:
from pathlib import Path
from PIL import Image
import os

In [90]:
image_path = './video/kid/'
output_dir = os.path.dirname(image_path)
print(output_dir)
all_images = [f for f in os.listdir(image_path)]
# all_images = sorted(all_images, key=lambda x: int(x.split('_')[1].split('.')[0]))
all_images.sort()
print(all_images)

./video/kid
['kid2_0.jpg', 'kid2_20.jpg', 'kid2_40.jpg', 'kid2_60.jpg', 'kid2_80.jpg']


In [91]:
def split_2x2_to_row(img):
    """将2x2网格图像切分并拼接成1行4列"""
    w, h = img.size
    half_w, half_h = w // 2, h // 2
    
    # 切分四个子图像 [左上, 右上, 左下, 右下]
    img1 = img.crop((0, 0, half_w, half_h))
    img2 = img.crop((half_w, 0, w, half_h))
    img3 = img.crop((0, half_h, half_w, h))
    img4 = img.crop((half_w, half_h, w, h))
    
    # 拼接成1行
    new_img = Image.new('RGB', (w * 2, half_h))
    new_img.paste(img1, (0, 0))
    new_img.paste(img2, (half_w, 0))
    new_img.paste(img3, (w, 0))
    new_img.paste(img4, (w + half_w, 0))
    
    return new_img

images = [
    split_2x2_to_row(Image.open(os.path.join(image_path, img_name))) for img_name in all_images
]

In [92]:
for idx, image in enumerate(images):
    image.save(os.path.join(output_dir, f"{idx}.png"))

In [69]:
# Walk throught video dir and convert all jpg to png
for root, dirs, files in os.walk('./video'):
    for file in files:
        if file.endswith('.jpg'):
            img_path = os.path.join(root, file)
            img = Image.open(img_path)
            png_path = os.path.splitext(img_path)[0] + '.png'
            img.save(png_path)
            os.remove(img_path)

# Make Videos

In [103]:
import cv2
import numpy as np
from pathlib import Path
from natsort import natsorted

def blend_frames(frame1, frame2, alpha):
    """混合两帧，alpha为frame2的权重(0-1)"""
    return cv2.addWeighted(frame1, 1 - alpha, frame2, alpha, 0)

def create_video_from_images(image_dir, output_path, 
                            display_time=1.0, last_frame_time=3.0, 
                            transition_time=0.5, output_fps=30):
    """
    从图像序列创建视频，带淡入淡出效果
    使用H.264编码确保浏览器兼容性
    """
    images = natsorted(Path(image_dir).glob("*.png"))
    
    if not images:
        print(f"No images found in {image_dir}")
        return
    
    first_frame = cv2.imread(str(images[0]))
    h, w = first_frame.shape[:2]
    
    # 关键修改：使用H.264编码器
    # 方案1：使用avc1 (推荐，最佳兼容性)
    fourcc = cv2.VideoWriter_fourcc(*'avc1')
    
    # 如果方案1不工作，尝试：
    # fourcc = cv2.VideoWriter_fourcc(*'H264')
    # 或
    # fourcc = cv2.VideoWriter_fourcc(*'X264')
    
    out = cv2.VideoWriter(str(output_path), fourcc, output_fps, (w, h))
    
    if not out.isOpened():
        print(f"错误: 无法创建视频文件 {output_path}")
        print("提示: 可能需要安装 opencv-python 的完整版本")
        return
    
    # 计算帧数
    display_frames = int(display_time * output_fps)
    transition_frames = int(transition_time * output_fps)
    
    # 处理第一帧到倒数第二帧
    for i in range(len(images) - 1):
        curr_frame = cv2.imread(str(images[i]))
        next_frame = cv2.imread(str(images[i + 1]))
        
        # 显示当前帧
        for _ in range(display_frames):
            out.write(curr_frame)
        
        # 添加过渡动画
        for t in range(1, transition_frames + 1):
            alpha = t / transition_frames
            blended = blend_frames(curr_frame, next_frame, alpha)
            out.write(blended)
    
    # 最后一帧停留
    last_frame = cv2.imread(str(images[-1]))
    last_frames = int(last_frame_time * output_fps)
    for _ in range(last_frames):
        out.write(last_frame)
    
    out.release()
    print(f"✓ Created: {output_path}")

def batch_create_videos(root_dir="video", output_dir="videos_output", 
                        display_time=1.0, last_frame_time=3.0, 
                        transition_time=0.5, output_fps=30):
    """批量处理所有子目录"""
    root = Path(root_dir)
    output = Path(output_dir)
    output.mkdir(exist_ok=True)
    
    for subdir in sorted(root.iterdir()):
        if subdir.is_dir():
            video_path = output / f"{subdir.name}.mp4"
            create_video_from_images(subdir, video_path, display_time, 
                                    last_frame_time, transition_time, output_fps)

# 使用示例
batch_create_videos(
    output_dir='static/videos',
    display_time=0.8,
    last_frame_time=2.0,
    transition_time=0.3,
    output_fps=30  # 改回30fps更流畅
)

✓ Created: static/videos/abs.mp4
✓ Created: static/videos/cartoon.mp4
✓ Created: static/videos/cook.mp4
✓ Created: static/videos/dentist.mp4
✓ Created: static/videos/draw.mp4
✓ Created: static/videos/fatter.mp4
✓ Created: static/videos/honey.mp4
✓ Created: static/videos/kid.mp4
✓ Created: static/videos/laugh.mp4
✓ Created: static/videos/pan.mp4
✓ Created: static/videos/recipe.mp4
✓ Created: static/videos/stronger.mp4


In [11]:
import cv2
import numpy as np
from pathlib import Path
from natsort import natsorted

def draw_step_label(frame, step):
    """在帧顶部居中绘制高清RL步数"""
    h, w = frame.shape[:2]
    frame = frame.copy()
    
    # 配置
    label_height = 80
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 1.5
    font_thickness = 3
    
    # 半透明背景
    overlay = frame.copy()
    cv2.rectangle(overlay, (0, 0), (w, label_height), (0, 0, 0), -1)
    cv2.addWeighted(overlay, 0.75, frame, 0.25, 0, frame)
    
    # 获取文本尺寸以实现居中
    text = f"RL Step: {step}"
    (text_width, text_height), baseline = cv2.getTextSize(
        text, font, font_scale, font_thickness
    )
    
    # 计算居中位置
    text_x = (w - text_width) // 2
    text_y = (label_height + text_height) // 2
    
    # 绘制文本阴影（增强可读性）
    cv2.putText(frame, text, (text_x + 2, text_y + 2), 
                font, font_scale, (0, 0, 0), font_thickness + 1)
    
    # 绘制主文本
    cv2.putText(frame, text, (text_x, text_y), 
                font, font_scale, (255, 255, 255), font_thickness, 
                lineType=cv2.LINE_AA)  # 抗锯齿
    
    return frame

def blend_frames(frame1, frame2, alpha):
    return cv2.addWeighted(frame1, 1 - alpha, frame2, alpha, 0)

def create_video_from_images(image_dir, output_path, 
                            display_time=1.0, last_frame_time=3.0, 
                            transition_time=0.5, output_fps=30,
                            step_stride=10
                            ):
    images = natsorted(Path(image_dir).glob("*.png"))
    
    if not images:
        print(f"No images found in {image_dir}")
        return
    
    first_frame = cv2.imread(str(images[0]))
    h, w = first_frame.shape[:2]
    
    fourcc = cv2.VideoWriter_fourcc(*'avc1')
    out = cv2.VideoWriter(str(output_path), fourcc, output_fps, (w, h))
    
    if not out.isOpened():
        print(f"错误: 无法创建视频文件 {output_path}")
        return
    
    display_frames = int(display_time * output_fps)
    transition_frames = int(transition_time * output_fps)
    total_images = len(images)
    
    # 处理每一帧
    for i in range(total_images - 1):
        curr_frame = cv2.imread(str(images[i]))
        next_frame = cv2.imread(str(images[i + 1]))
        
        # 步数递增：0, 10, 20, 30...
        step = i * step_stride
        
        # 显示当前帧
        for _ in range(display_frames):
            frame_with_label = draw_step_label(curr_frame, step)
            out.write(frame_with_label)
        
        # 过渡动画
        for t in range(1, transition_frames + 1):
            alpha = t / transition_frames
            blended = blend_frames(curr_frame, next_frame, alpha)
            frame_with_label = draw_step_label(blended, step)
            out.write(frame_with_label)
    
    # 最后一帧
    last_frame = cv2.imread(str(images[-1]))
    last_step = (total_images - 1) * step_stride
    last_frames = int(last_frame_time * output_fps)
    for _ in range(last_frames):
        frame_with_label = draw_step_label(last_frame, last_step)
        out.write(frame_with_label)
    
    out.release()
    print(f"✓ Created: {output_path}")

def batch_create_videos(root_dir="video", output_dir="videos_output", 
                        display_time=1.0, last_frame_time=3.0, 
                        transition_time=0.5, output_fps=30, step_stride=10
                        ):
    root = Path(root_dir)
    output = Path(output_dir)
    output.mkdir(exist_ok=True)
    
    for subdir in sorted(root.iterdir()):
        if subdir.is_dir():
            video_path = output / f"{subdir.name}.mp4"
            create_video_from_images(subdir, video_path, display_time, 
                                    last_frame_time, transition_time, output_fps, step_stride)

# 使用
batch_create_videos(
    output_dir='static/videos',
    display_time=0.8,
    last_frame_time=2.0,
    transition_time=0.3,
    output_fps=30,
    step_stride=20
)

✓ Created: static/videos/abs.mp4
✓ Created: static/videos/cartoon.mp4
✓ Created: static/videos/cook.mp4
✓ Created: static/videos/dentist.mp4
✓ Created: static/videos/draw.mp4
✓ Created: static/videos/fatter.mp4
✓ Created: static/videos/honey.mp4
✓ Created: static/videos/kid.mp4
✓ Created: static/videos/laugh.mp4
✓ Created: static/videos/pan.mp4
✓ Created: static/videos/pumpkin.mp4
✓ Created: static/videos/recipe.mp4
✓ Created: static/videos/stronger.mp4


# Compress

In [4]:
from pathlib import Path
from PIL import Image
import os

In [5]:
def compress_by_quality(input_path, output_path, quality=85):
    """通过降低质量压缩图像（推荐方法）
    
    Args:
        input_path: 输入文件路径
        output_path: 输出文件路径
        quality: JPEG质量 (1-100)，PNG会自动优化
    """
    with Image.open(input_path) as img:
        # 转换RGBA为RGB（JPG不支持透明度）
        if img.mode in ('RGBA', 'LA', 'P'):
            rgb_img = Image.new('RGB', img.size, (255, 255, 255))
            if img.mode == 'P':
                img = img.convert('RGBA')
            rgb_img.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None)
            img = rgb_img
        
        # 保存压缩后的图像
        if output_path.suffix.lower() in ['.jpg', '.jpeg']:
            img.save(output_path, 'JPEG', quality=quality, optimize=True)
        else:  # PNG
            img.save(output_path, 'PNG', optimize=True, compress_level=9)

In [6]:
def compress_by_resize(input_path, output_path, max_size=(1920, 1080), quality=85):
    """通过调整尺寸压缩图像（激进方法）
    
    Args:
        input_path: 输入文件路径
        output_path: 输出文件路径
        max_size: 最大尺寸 (width, height)
        quality: 保存质量
    """
    with Image.open(input_path) as img:
        # 保持宽高比缩放
        img.thumbnail(max_size, Image.Resampling.LANCZOS)
        
        if img.mode in ('RGBA', 'LA', 'P'):
            rgb_img = Image.new('RGB', img.size, (255, 255, 255))
            if img.mode == 'P':
                img = img.convert('RGBA')
            rgb_img.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None)
            img = rgb_img
        
        if output_path.suffix.lower() in ['.jpg', '.jpeg']:
            img.save(output_path, 'JPEG', quality=quality, optimize=True)
        else:
            img.save(output_path, 'PNG', optimize=True, compress_level=9)

In [7]:
current_dir = Path('static')
output_dir = Path('./compressed')
output_dir.mkdir(parents=True, exist_ok=True)

# 递归查找所有图像
image_extensions = {'.png', '.jpg', '.jpeg'}
image_files = [
    f for f in current_dir.rglob('*') 
    if f.is_file() and f.suffix.lower() in image_extensions
]
print(f"找到 {len(image_files)} 个图像文件")

找到 55 个图像文件


In [9]:
# 压缩图像
for img_path in image_files:
    # 保持相对目录结构
    relative_path = img_path.relative_to(current_dir)
    output_path = output_dir / relative_path
    output_path.parent.mkdir(parents=True, exist_ok=True)
    
    original_size = img_path.stat().st_size
    
    # 方案1: 质量压缩（推荐）
    # compress_by_quality(img_path, output_path, quality=75)
    
    # 方案2: 尺寸+质量压缩（取消注释使用）
    compress_by_resize(img_path, output_path, max_size=(1920, 1080), quality=85)
    
    compressed_size = output_path.stat().st_size
    ratio = (1 - compressed_size / original_size) * 100
    
    print(f"{relative_path}: {original_size/1024:.1f}KB -> {compressed_size/1024:.1f}KB "
            f"(压缩 {ratio:.1f}%)")

images/good_images/66.png: 1176.9KB -> 1013.8KB (压缩 13.9%)
images/good_images/115.png: 1728.0KB -> 1494.6KB (压缩 13.5%)
images/good_images/114.png: 1452.4KB -> 1260.6KB (压缩 13.2%)
images/good_images/128.png: 1497.1KB -> 1314.4KB (压缩 12.2%)
images/good_images/103.png: 1577.0KB -> 1385.9KB (压缩 12.1%)
images/good_images/111.png: 1402.9KB -> 1206.5KB (压缩 14.0%)
images/good_images/37.png: 1399.0KB -> 1230.1KB (压缩 12.1%)
images/good_images/122.png: 1334.5KB -> 1176.3KB (压缩 11.9%)
images/good_images/127.png: 1889.9KB -> 1631.5KB (压缩 13.7%)
images/good_images/18.png: 1561.7KB -> 1369.5KB (压缩 12.3%)
images/good_images/140.png: 1581.2KB -> 1365.0KB (压缩 13.7%)
images/good_images/69.png: 1694.4KB -> 1500.5KB (压缩 11.4%)
images/good_images/79.png: 1448.6KB -> 1280.1KB (压缩 11.6%)
images/good_images/92.png: 1526.1KB -> 1338.8KB (压缩 12.3%)
images/good_images/147.png: 1490.5KB -> 1285.1KB (压缩 13.8%)
images/method/dataset_pipeline.png: 5163.3KB -> 1099.6KB (压缩 78.7%)
images/method/RL_pipeline.png: 580.9KB