version: 14.0.0

brycedrennan · Jan 3, 2024 · ed40a12 · ed40a12
1 parent 57dc27d
commit ed40a12
Show file tree

Hide file tree

Showing 5 changed files with 25 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -18,8 +18,6 @@ AI imagined images. Pythonic generation of stable diffusion images **and videos*
 # be sure to use Python 3.10, Python 3.11 is not supported at the moment
 >> pip install imaginairy
 >> imagine "a scenic landscape" "a photo of a dog" "photo of a fruit bowl" "portrait photo of a freckled woman" "a bluejay"
-# Make an animation showing the generation process
->> imagine --gif "a flower"
 # Make an AI video
 >> aimg videogen --start-image rocket.png
 ```
@@ -84,15 +82,27 @@ Options:
   - This was a huge rewrite which is why some features are not yet supported.  On the plus side, refiners supports
 cutting edge features (SDXL, image prompts, etc) which will be added to imaginairy soon.
   - [self-attention guidance](https://github.com/SusungHong/Self-Attention-Guidance) which makes details of images more accurate
+- 🎉 feature: larger image generations now work MUCH better and stay faithful to the same image as it looks at a smaller size. 
+For example `--size 720p --seed 1` and `--size 1080p --seed 1` will produce the same image for SD15
+- 🎉 feature: loading diffusers based models now supported. Example `--model https://huggingface.co/ainz/diseny-pixar --model-architecture sd15`
+- 🎉 feature: qrcode controlnet!
+- feature: generate word images automatically. great for use with qrcode controlnet: `imagine "flowers" --gif --size hd --control-mode qrcode --control-image "textimg='JOY' font_color=white background_color=gray" -r 10`
+- feature: opendalle 1.1 added. `--model opendalle` to use it
 - feature: added `--size` parameter for more intuitive sizing (e.g. 512, 256x256, 4k, uhd, FHD, VGA, etc)
 - feature: detect if wrong torch version is installed and provide instructions on how to install proper version
 - feature: better logging output: color, error handling
 - feature: support for pytorch 2.0
+- feature: command line output significantly cleaned up and easier to read
+- feature: adds --composition-strength parameter to cli (#416)
+- performance: lower memory usage for upscaling
+- performance: lower memory usage at startup
+- performance: add sliced attention to several models (lowers memory use)
+- fix: simpler memory management that avoids some of the previous bugs
 - deprecated: support for python 3.8, 3.9
 - deprecated: support for torch 1.13
 - deprecated: support for Stable Diffusion versions 1.4, 2.0, and 2.1
 - deprecated: image training
-- broken: most samplers, tile/details controlnet, and model memory management
+- broken: samplers other than ddim
 
 ### Run API server and StableStudio web interface (alpha)
 Generate images via API or web interface.  Much smaller featureset compared to the command line tool.

diff --git a/imaginairy/api/generate_refiners.py b/imaginairy/api/generate_refiners.py
@@ -336,6 +336,7 @@ def latent_logger(latents):
                     condition_scale=prompt.prompt_strength,
                     **text_conditioning_kwargs,
                 )
+                lc.progress_latent_callback(x)
             # trying to clear memory. not sure if this helps
             sd.unet.set_context(context="self_attention_map", value={})
             sd.unet._reset_context()
@@ -423,7 +424,7 @@ def latent_logger(latents):
             safety_score=safety_score,
             result_images=result_images,
             performance_stats=lc.get_performance_stats(),
-            progress_latents=[],  # todo
+            progress_latents=progress_latents,
         )
 
         _most_recent_result = result

diff --git a/imaginairy/api/video_sample.py b/imaginairy/api/video_sample.py
@@ -37,7 +37,7 @@ def generate_video(
     output_folder: str | None = None,
     num_frames: int = 6,
     num_steps: int = 30,
-    model_name: str = "svd_xt",
+    model_name: str = "svd-xt",
     fps_id: int = 6,
     output_fps: int = 6,
     motion_bucket_id: int = 127,

diff --git a/imaginairy/utils/animations.py b/imaginairy/utils/animations.py
@@ -38,11 +38,19 @@ def make_bounce_animation(
 
     # convert from latents
     converted_frames = []
+
     for frame in frames:
         if isinstance(frame, torch.Tensor):
             frame = model_latents_to_pillow_imgs(frame)[0]
         converted_frames.append(frame)
     frames = converted_frames
+    max_size = max([frame.size for frame in frames])
+    converted_frames = []
+    for frame in frames:
+        if frame.size != max_size:
+            frame = frame.resize(max_size)
+        converted_frames.append(frame)
+    frames = converted_frames
 
     durations = (
         [start_pause_duration_ms]

diff --git a/setup.py b/setup.py
@@ -44,7 +44,7 @@ def get_git_revision_hash() -> str:
     name="imaginAIry",
     author="Bryce Drennan",
     # author_email="b r y p y d o t io",
-    version="14.0.0b9",
+    version="14.0.0",
     description="AI imagined images. Pythonic generation of images.",
     long_description=readme,
     long_description_content_type="text/markdown",