From 52fdfe3802648b74afcc5867c08c45fd832e39ad Mon Sep 17 00:00:00 2001 From: Judith van Stegeren Date: Mon, 25 Mar 2024 15:52:48 +0000 Subject: [PATCH] Choose 16x512x512 model instead of 64x512x512 model --- .dockerignore | 1 - predict.py | 32 +++++++++++++++++++++++++++----- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/.dockerignore b/.dockerignore index 7bb610c2..1c954cfe 100644 --- a/.dockerignore +++ b/.dockerignore @@ -17,4 +17,3 @@ __pycache__ /venv pretrained_models/ output*.mp4 -.cog diff --git a/predict.py b/predict.py index 8f27431f..effb1aeb 100644 --- a/predict.py +++ b/predict.py @@ -22,7 +22,7 @@ from opensora.datasets import save_sample from opensora.registry import MODELS, SCHEDULERS, build_module -from opensora.utils.config_utils import parse_configs +from opensora.utils.config_utils import merge_args from opensora.utils.misc import to_torch_dtype from opensora.acceleration.parallel_states import set_sequence_parallel_group from colossalai.cluster import DistCoordinator @@ -95,12 +95,30 @@ def setup(self) -> None: # extra config: ckpt_path = "pretrained_models/Open-Sora/OpenSora-v1-HQ-16x512x512.pth" config_file = "configs/opensora/inference/64x512x512.py" + config_file = "configs/opensora/inference/16x512x512.py" # load config file - self.cfg = cog_config() - self.cfg.model["from_pretrained"] = ckpt_path - if "multi_resolution" not in self.cfg: - self.cfg["multi_resolution"] = False + # option 1: manually + #self.cfg = cog_config() + #self.cfg.model["from_pretrained"] = ckpt_path + #if "multi_resolution" not in self.cfg: + # self.cfg["multi_resolution"] = False + + # command line arguments from config_utils + extra_args = Config({ + 'seed': 42, + 'ckpt_path': ckpt_path, + 'batch-size': None, + 'prompt-path': None, + 'save-dir': None, + 'num-sampling-steps': None, + 'cfg_scale': None, + }) + + # option 2: use config_utils + self.cfg = Config.fromfile(config_file) + self.cfg = merge_args(self.cfg, args=extra_args, training=False) + # from scripts/inference @@ -134,8 +152,12 @@ def setup(self) -> None: # ====================================================== # 3.1. build model input_size = (self.cfg.num_frames, *self.cfg.image_size) + print(f"number of frames: {self.cfg.num_frames}, image_size: {self.cfg.image_size}") + print(f"resulting input size: {input_size}") self.vae = build_module(self.cfg.vae, MODELS) + print("vae", self.vae) self.latent_size = self.vae.get_latent_size(input_size) + print("latent size:", self.latent_size) self.text_encoder = build_module(self.cfg.text_encoder, MODELS, device=self.device) # T5 must be fp32 self.model = build_module( self.cfg.model,