Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix long answer bug in non-stream requests #96

Merged
merged 1 commit into from
Feb 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 3 additions & 19 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 9 additions & 4 deletions crates/edgen_core/src/perishable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,16 @@
//! Lazily self-destructing types.

use core::time::Duration;
use futures::executor::block_on;
use std::future::Future;
use std::ops::{Deref, DerefMut};
use std::pin::Pin;
use std::sync::{Arc, Mutex};
use std::time::Instant;

use futures::executor::block_on;
use tokio::select;
use tokio::sync::{oneshot, RwLock, RwLockReadGuard, RwLockWriteGuard};
use tracing::{span, Level};
use tracing::{info, span, Level};

/// An asynchronous `OnceCell` with expiration semantics.
///
Expand All @@ -39,6 +39,7 @@ pub struct Perishable<T> {
/// Channel used to signal that this [`Perishable`] has been dropped.
_drop_tx: oneshot::Sender<()>,
}

struct PerishableInner<T> {
current_value: RwLock<Option<T>>,
active_signal: ActiveSignal,
Expand Down Expand Up @@ -172,8 +173,8 @@ where
select! {
_ = &mut drop_rx => break,
_ = yield_until(check_date) => {
if watched_inner.state.read().await.last_accessed == accessed {
let _ = watched_inner.current_value.write().await.take();
if watched_inner.state.read().await.last_accessed == accessed && watched_inner.current_value.write().await.take().is_some() {
info!("A {} has perished", std::any::type_name::<T>());
}
}
}
Expand Down Expand Up @@ -219,6 +220,7 @@ impl<T: 'static> Perishable<T> {
// Value isn't initialized. Acquire a write lock and initialize it.
let mut guard = self.inner.current_value.write().await;

info!("(Re)Creating a new {}", std::any::type_name::<T>());
*guard = Some(constructor.construct().await);

(signal, PerishableReadGuard(guard.downgrade()))
Expand All @@ -245,6 +247,7 @@ impl<T: 'static> Perishable<T> {
// Value isn't initialized. Acquire a write lock and initialize it.
let mut guard = self.inner.current_value.write().await;

info!("(Re)Creating a new {}", std::any::type_name::<T>());
*guard = Some(constructor.construct().await?);

Ok((signal, PerishableReadGuard(guard.downgrade())))
Expand All @@ -261,6 +264,7 @@ impl<T: 'static> Perishable<T> {
let mut guard = self.inner.current_value.write().await;

if guard.is_none() {
info!("(Re)Creating a new {}", std::any::type_name::<T>());
*guard = Some(constructor.construct().await);
}

Expand All @@ -279,6 +283,7 @@ impl<T: 'static> Perishable<T> {
let mut guard = self.inner.current_value.write().await;

if guard.is_none() {
info!("(Re)Creating a new {}", std::any::type_name::<T>());
*guard = Some(constructor.construct().await?);
}

Expand Down
4 changes: 0 additions & 4 deletions crates/edgen_rt_llama_cpp/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,9 @@ blake3 = { workspace = true }
dashmap = { workspace = true }
derive_more = { workspace = true }
edgen_core = { path = "../edgen_core" }
flume = { workspace = true }
futures = { workspace = true }
llama_cpp = { git = "https://github.com/edgenai/llama_cpp-rs", branch = "main", features = ["native"] }
num_cpus = { workspace = true }
smol = { workspace = true }
thiserror = { workspace = true }
tinyvec = { workspace = true, features = ["alloc"] }
tokio = { workspace = true, features = ["sync", "rt", "fs"] }
tracing = { workspace = true }

Expand Down
11 changes: 4 additions & 7 deletions crates/edgen_rt_llama_cpp/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ use llama_cpp::standard_sampler::StandardSampler;
use llama_cpp::{
CompletionHandle, LlamaModel, LlamaParams, LlamaSession, SessionParams, TokensToStrings,
};
use smol::stream::StreamExt;
use tokio::sync::mpsc::{unbounded_channel, UnboundedSender};
use tokio::task::JoinHandle;
use tokio::time::{interval, MissedTickBehavior};
Expand Down Expand Up @@ -243,7 +242,7 @@ impl UnloadingModel {
let sampler = StandardSampler::default();
let handle = session.start_completing_with(sampler, SINGLE_MESSAGE_LIMIT);

Ok(model_guard.decode_tokens(handle))
Ok(handle.into_string_async().await)
} else {
let (session, mut id, new_context) = self.take_chat_session(&args.prompt).await;

Expand All @@ -263,7 +262,7 @@ impl UnloadingModel {
(session_signal, handle)
};

let res = model_guard.decode_tokens(handle);
let res = handle.into_string_async().await;

self.sessions.insert(id, session);

Expand Down Expand Up @@ -591,11 +590,9 @@ impl Stream for CompletionStream {
type Item = String;

fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
let stream = std::ops::DerefMut::deref_mut(&mut self);

match stream.handle.poll_next(cx) {
match std::pin::pin!(&mut self.handle).poll_next(cx) {
Poll::Ready(Some(val)) => {
if let Some(id) = &mut stream.session_id {
if let Some(id) = &mut self.session_id {
id.advance(&val);
}
Poll::Ready(Some(val))
Expand Down
Loading