# Ditto Training Pipeline

This notebook sets up the environment and runs the training pipeline for Ditto on Google Colab Pro.

## 1. Mount Google Drive
Mount your Google Drive to save checkpoints and access the dataset.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## 2. Setup Environment
Clone the repository and install necessary dependencies.

In [None]:
!git clone https://github.com/dhruv0000/ditto-vace-fork.git
%cd ditto-vace-fork
!pip install -r requirements.txt
!pip install accelerate

## 3. Data Setup
Download the dataset and metadata from Hugging Face.

In [None]:
#@title Data Download/Setup
import os
from huggingface_hub import snapshot_download

# Download the metadata and source captions
snapshot_download(
    repo_id="QingyanBai/Ditto-1M",
    repo_type="dataset",
    local_dir="./Ditto-1M",
    allow_patterns=["source_video_captions/*", "training_metadata/*", "csvs_for_DiffSynth/*"]
)

# Download only the mini test videos (default)
# Uncomment other sections in the README to download more data if needed
snapshot_download(
    repo_id="QingyanBai/Ditto-1M",
    repo_type="dataset",
    local_dir="./Ditto-1M",
    allow_patterns=["mini_test_videos/*"]
)

print("Data setup complete.")

## 4. Configuration
Configure the training parameters. The default model is `Wan-AI/Wan2.1-VACE-1.3B`.

In [None]:
#@title Training Configuration

dataset_base_path = "./Ditto-1M/mini_test_videos" #@param {type:"string"}
dataset_metadata_path = "./Ditto-1M/csvs_for_DiffSynth/ditto_1m_mini.csv" #@param {type:"string"}
output_path = "/content/drive/MyDrive/exps/ditto" #@param {type:"string"}
model_id = "Wan-AI/Wan2.1-VACE-1.3B" #@param {type:"string"}
num_epochs = 5 #@param {type:"integer"}
learning_rate = "1e-4" #@param {type:"string"}

print(f"Configuration:")
print(f"  Dataset Base Path: {dataset_base_path}")
print(f"  Metadata Path: {dataset_metadata_path}")
print(f"  Output Path: {output_path}")
print(f"  Model ID: {model_id}")
print(f"  Epochs: {num_epochs}")
print(f"  Learning Rate: {learning_rate}")

## 5. Run Training
Execute the training script with the configured parameters.

In [None]:
!chmod +x train.sh
!./train.sh \
  --dataset_base_path "{dataset_base_path}" \
  --dataset_metadata_path "{dataset_metadata_path}" \
  --output_path "{output_path}" \
  --model_id "{model_id}" \
  --num_epochs "{num_epochs}" \
  --learning_rate "{learning_rate}"