Merge pull request #139 from edgenai/feat/image-generation

Feat/image generation
edgenai · Apr 19, 2024 · ada21e1 · ada21e1
2 parents 40fe88d + 95c6dd3
commit ada21e1
Show file tree

Hide file tree

Showing 18 changed files with 1,998 additions and 122 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -4,17 +4,19 @@ members = [
     "crates/edgen_core",
     "crates/edgen_server",
     "crates/edgen_async_compat",
+    "crates/edgen_rt_image_generation_candle",
     "crates/edgen_rt_llama_cpp",
     "crates/edgen_rt_whisper_cpp",
     "crates/edgen_rt_chat_faker",
-    "edgen/src-tauri"
+    "edgen/src-tauri",
 ]
 
 [workspace.dependencies]
 ahash = "0.8.6"
 argh = "0.1.12"
 async-executor = "1.6.0"
 async-task = "4.5.0"
+async-trait = "0.1.80"
 axum = "0.7.1"
 blake3 = "1.5.0"
 cfg-if = "1.0.0"
@@ -28,7 +30,7 @@ either = "1.9.0"
 flume = "0.11.0"
 futures = "0.3.29"
 futures-lite = "2.0.1"
-glommio = "0.8.0"
+glommio = "0.9.0"
 hyper = "1.0.1"
 hyper-util = "0.1.1"
 link-cplusplus = "1.0.9"
@@ -38,7 +40,7 @@ once_cell = "1.18.0"
 pin-project = "1.1.3"
 prost = "0.12.2"
 prost-build = "0.12.2"
-reqwest = { version = "0.11.22", default-features = false }
+reqwest = { version = "0.12.3", default-features = false }
 serde = "1.0.193"
 serde_derive = "1.0.193"
 serde_json = "1.0.108"

diff --git a/crates/edgen_core/Cargo.toml b/crates/edgen_core/Cargo.toml
@@ -5,18 +5,19 @@ edition = "2021"
 
 [dependencies]
 ahash = { workspace = true }
+async-trait = { workspace = true }
 dashmap = { workspace = true }
 directories = { workspace = true }
 derive_more = { workspace = true }
 edgen_async_compat = { path = "../edgen_async_compat", features = ["runtime-tokio"] }
 notify = { workspace = true }
 num_cpus = { workspace = true }
 once_cell = { workspace = true }
-rubato = "0.14.1"
+rubato = "0.15.0"
 serde = { workspace = true, features = ["derive"] }
 serde_yaml = { workspace = true }
 smol = { workspace = true }
-symphonia = { version = "0.5.3", features = ["all-codecs", "all-formats"] }
+symphonia = { version = "0.5.4", features = ["all-codecs", "all-formats"] }
 time = { workspace = true }
 tracing = { workspace = true }
 futures = { workspace = true }

diff --git a/crates/edgen_core/src/image_generation.rs b/crates/edgen_core/src/image_generation.rs
@@ -0,0 +1,44 @@
+use serde::Serialize;
+use std::path::PathBuf;
+use thiserror::Error;
+
+pub struct ImageGenerationArgs {
+    pub prompt: String,
+    pub uncond_prompt: String,
+    pub width: Option<usize>,
+    pub height: Option<usize>,
+    pub steps: usize,
+    pub images: u32,
+    pub seed: Option<u64>,
+    pub guidance_scale: f64,
+    pub vae_scale: f64,
+}
+
+pub struct ModelFiles {
+    pub tokenizer: PathBuf,
+    pub clip_weights: PathBuf,
+    pub clip2_weights: Option<PathBuf>,
+    pub vae_weights: PathBuf,
+    pub unet_weights: PathBuf,
+}
+
+#[derive(Serialize, Error, Debug)]
+pub enum ImageGenerationEndpointError {
+    #[error("Could not load model: {0}")]
+    Load(String),
+    #[error("Failed to tokenize prompts: {0}")]
+    Decoding(String),
+    #[error("Failed to generate image: {0}")]
+    Generation(String),
+    #[error("Could not convert the output tensor into an encoded image")]
+    Encoding(String),
+}
+
+#[async_trait::async_trait]
+pub trait ImageGenerationEndpoint {
+    async fn generate_image(
+        &self,
+        model: ModelFiles,
+        args: ImageGenerationArgs,
+    ) -> Result<Vec<Vec<u8>>, ImageGenerationEndpointError>;
+}
diff --git a/crates/edgen_core/src/lib.rs b/crates/edgen_core/src/lib.rs
@@ -26,6 +26,7 @@ pub mod whisper;
 
 pub mod settings;
 
+pub mod image_generation;
 pub mod perishable;
 
 /// A generic [`Box`]ed [`Future`], used to emulate `async` functions in traits.

diff --git a/crates/edgen_core/src/settings.rs b/crates/edgen_core/src/settings.rs
@@ -33,7 +33,7 @@ const FILE_NAME: &str = "edgen.conf";
 // TODO look for a better way to do this, since [Settings] already uses a lock
 pub static SETTINGS: Lazy<RwLock<StaticSettings>> = Lazy::new(Default::default);
 
-/// The configuration, and data directories for Edgen.
+/// The configuration and data directories for Edgen.
 pub static PROJECT_DIRS: Lazy<ProjectDirs> =
     Lazy::new(|| ProjectDirs::from("com", "EdgenAI", "Edgen").unwrap());
 pub static CONFIG_FILE: Lazy<PathBuf> = Lazy::new(build_config_file_path);
@@ -145,6 +145,18 @@ pub async fn embeddings_dir() -> String {
         .to_string()
 }
 
+/// Helper to get the image generation model directory.
+pub async fn image_generation_dir() -> String {
+    SETTINGS
+        .read()
+        .await
+        .read()
+        .await
+        .image_generation_models_dir
+        .trim()
+        .to_string()
+}
+
 /// Helper to get the chat completions model name.
 pub async fn chat_completions_name() -> String {
     SETTINGS
@@ -288,6 +300,8 @@ pub struct SettingsParams {
     /// The embeddings repo that Edgen will use for downloads
     pub embeddings_model_repo: String,
 
+    pub image_generation_models_dir: String,
+
     /// The policy used to decided if models/session should be allocated and run on acceleration
     /// hardware.
     pub gpu_policy: DevicePolicy,
@@ -325,13 +339,16 @@ impl Default for SettingsParams {
             "transcriptions",
         ]));
         let embeddings_dir = data_dir.join(&join_path_components(&["models", "embeddings"]));
+        let image_generation_dir =
+            data_dir.join(&join_path_components(&["models", "image", "generation"]));
 
         let chat_completions_str = chat_completions_dir.into_os_string().into_string().unwrap();
         let audio_transcriptions_str = audio_transcriptions_dir
             .into_os_string()
             .into_string()
             .unwrap();
         let embeddings_str = embeddings_dir.into_os_string().into_string().unwrap();
+        let image_generation_str = image_generation_dir.into_os_string().into_string().unwrap();
 
         let cpus = num_cpus::get_physical();
         let threads = if cpus > 1 { cpus - 1 } else { 1 };
@@ -349,6 +366,7 @@ impl Default for SettingsParams {
             embeddings_model_name: "nomic-embed-text-v1.5.f16.gguf".to_string(),
             embeddings_model_repo: "nomic-ai/nomic-embed-text-v1.5-GGUF".to_string(),
             embeddings_models_dir: embeddings_str,
+            image_generation_models_dir: image_generation_str,
             // TODO detect if the system has acceleration hardware to decide the default
             gpu_policy: DevicePolicy::AlwaysDevice {
                 overflow_to_cpu: true,

diff --git a/crates/edgen_rt_image_generation_candle/Cargo.toml b/crates/edgen_rt_image_generation_candle/Cargo.toml
@@ -0,0 +1,20 @@
+[package]
+name = "edgen_rt_image_generation_candle"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+async-trait = { workspace = true }
+candle-core = "0.4.1"
+candle-transformers = "0.4.1"
+edgen_core = { path = "../edgen_core" }
+image = "0.25.1"
+rand = "0.8.5"
+thiserror = { workspace = true }
+# https://github.com/huggingface/tokenizers/issues/1454
+tokenizers = { version = "0.19.1", default-features = false, features = ["progressbar", "onig"] }
+tokio = { workspace = true, features = ["sync", "rt", "fs"] }
+tracing = { workspace = true }
+
+[features]
+cuda = ["candle-core/cuda", "candle-transformers/cuda"]