Skip to content

Commit

Permalink
Optimze SourceBundle/DebugSession (#787)
Browse files Browse the repository at this point in the history
This gives each `SourceBundleDebugSession` its own `archive`, so multiple sessions do not contend on the same lock.
Also creates the manifest index eagerly and shares that across debug sessions. While previously the complete `SourceBundle` was cached in memory and shared, each `DebugSession` would still create its own index on each access.
Another driveby improvement is to provide a consuming `SourceFileDescriptor::into_contents` to avoid a clone.
  • Loading branch information
Swatinem committed May 15, 2023
1 parent 3b2f1f6 commit c44c475
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 50 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## Unreleased

**Fixes**:

- Optimze SourceBundle/DebugSession ([#787](https://github.com/getsentry/symbolic/pull/787))

## 12.1.3

**Fixes**:
Expand Down
127 changes: 77 additions & 50 deletions symbolic-debuginfo/src/sourcebundle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ use std::io::{BufReader, BufWriter, Read, Seek, Write};
use std::path::Path;
use std::sync::Arc;

use once_cell::sync::OnceCell;
use parking_lot::Mutex;
use regex::Regex;
use serde::{Deserialize, Deserializer, Serialize};
Expand Down Expand Up @@ -378,6 +377,14 @@ impl<'a> SourceFileDescriptor<'a> {
self.contents.as_deref()
}

/// The contents of the source file as string, if it's available.
///
/// This unwraps the [`SourceFileDescriptor`] directly and might avoid a copy of `contents`
/// later on.
pub fn into_contents(self) -> Option<Cow<'a, str>> {
self.contents
}

/// If available returns the URL of this source.
///
/// For certain files this is the canoncial URL of where the file is placed. This
Expand Down Expand Up @@ -515,6 +522,47 @@ struct SourceBundleManifest {
pub attributes: BTreeMap<String, String>,
}

struct SourceBundleIndex<'data> {
manifest: SourceBundleManifest,
indexed_files: HashMap<FileKey<'data>, Arc<String>>,
}

impl<'data> SourceBundleIndex<'data> {
pub fn parse(
archive: &mut zip::read::ZipArchive<std::io::Cursor<&'data [u8]>>,
) -> Result<Self, SourceBundleError> {
let manifest_file = archive
.by_name("manifest.json")
.map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadZip, e))?;
let manifest: SourceBundleManifest = serde_json::from_reader(manifest_file)
.map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadManifest, e))?;

let files = &manifest.files;
let mut indexed_files = HashMap::with_capacity(files.len());

for (zip_path, file_info) in files {
let zip_path = Arc::new(zip_path.clone());
if !file_info.path.is_empty() {
indexed_files.insert(
FileKey::Path(file_info.path.clone().into()),
zip_path.clone(),
);
}
if !file_info.url.is_empty() {
indexed_files.insert(FileKey::Url(file_info.url.clone().into()), zip_path.clone());
}
if let (Some(debug_id), Some(ty)) = (file_info.debug_id(), file_info.ty()) {
indexed_files.insert(FileKey::DebugId(debug_id, ty), zip_path.clone());
}
}

Ok(Self {
manifest,
indexed_files,
})
}
}

/// A bundle of source code files.
///
/// To create a source bundle, see [`SourceBundleWriter`]. For more information, see the [module
Expand All @@ -523,9 +571,9 @@ struct SourceBundleManifest {
/// [`SourceBundleWriter`]: struct.SourceBundleWriter.html
/// [module level documentation]: index.html
pub struct SourceBundle<'data> {
manifest: Arc<SourceBundleManifest>,
archive: Arc<Mutex<zip::read::ZipArchive<std::io::Cursor<&'data [u8]>>>>,
data: &'data [u8],
archive: zip::read::ZipArchive<std::io::Cursor<&'data [u8]>>,
index: Arc<SourceBundleIndex<'data>>,
}

impl fmt::Debug for SourceBundle<'_> {
Expand Down Expand Up @@ -555,15 +603,13 @@ impl<'data> SourceBundle<'data> {
pub fn parse(data: &'data [u8]) -> Result<SourceBundle<'data>, SourceBundleError> {
let mut archive = zip::read::ZipArchive::new(std::io::Cursor::new(data))
.map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadZip, e))?;
let manifest_file = archive
.by_name("manifest.json")
.map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadZip, e))?;
let manifest = serde_json::from_reader(manifest_file)
.map_err(|e| SourceBundleError::new(SourceBundleErrorKind::BadManifest, e))?;

let index = Arc::new(SourceBundleIndex::parse(&mut archive)?);

Ok(SourceBundle {
manifest: Arc::new(manifest),
archive: Arc::new(Mutex::new(archive)),
archive,
data,
index,
})
}

Expand All @@ -585,7 +631,8 @@ impl<'data> SourceBundle<'data> {
/// [`ObjectLike`]: ../trait.ObjectLike.html
/// [`SourceBundleWriter`]: struct.SourceBundleWriter.html
pub fn code_id(&self) -> Option<CodeId> {
self.manifest
self.index
.manifest
.attributes
.get("code_id")
.and_then(|x| x.parse().ok())
Expand All @@ -599,7 +646,8 @@ impl<'data> SourceBundle<'data> {
/// [`ObjectLike`]: ../trait.ObjectLike.html
/// [`SourceBundleWriter`]: struct.SourceBundleWriter.html
pub fn debug_id(&self) -> DebugId {
self.manifest
self.index
.manifest
.attributes
.get("debug_id")
.and_then(|x| x.parse().ok())
Expand All @@ -614,7 +662,8 @@ impl<'data> SourceBundle<'data> {
/// [`ObjectLike`]: ../trait.ObjectLike.html
/// [`SourceBundleWriter`]: struct.SourceBundleWriter.html
pub fn name(&self) -> Option<&str> {
self.manifest
self.index
.manifest
.attributes
.get("object_name")
.map(|x| x.as_str())
Expand All @@ -628,7 +677,8 @@ impl<'data> SourceBundle<'data> {
/// [`ObjectLike`]: ../trait.ObjectLike.html
/// [`SourceBundleWriter`]: struct.SourceBundleWriter.html
pub fn arch(&self) -> Arch {
self.manifest
self.index
.manifest
.attributes
.get("arch")
.and_then(|s| s.parse().ok())
Expand Down Expand Up @@ -679,10 +729,14 @@ impl<'data> SourceBundle<'data> {
/// efficient access to various records in the debug information. Since this can be quite a
/// costly process, try to reuse the debugging session as long as possible.
pub fn debug_session(&self) -> Result<SourceBundleDebugSession<'data>, SourceBundleError> {
// NOTE: The `SourceBundleDebugSession` still needs interior mutability, so it still needs
// to carry its own Mutex. However that is still preferable to sharing the Mutex of the
// `SourceBundle`, which might be shared by multiple threads.
// The only thing here that really needs to be `mut` is the `Cursor` / `Seek` position.
let archive = Mutex::new(self.archive.clone());
Ok(SourceBundleDebugSession {
manifest: self.manifest.clone(),
archive: self.archive.clone(),
indexed_files: OnceCell::new(),
index: Arc::clone(&self.index),
archive,
})
}

Expand All @@ -708,7 +762,7 @@ impl<'data> SourceBundle<'data> {

/// Returns true if this source bundle contains no source code.
pub fn is_empty(&self) -> bool {
self.manifest.files.is_empty()
self.index.manifest.files.is_empty()
}
}

Expand Down Expand Up @@ -806,16 +860,15 @@ enum FileKey<'a> {

/// Debug session for SourceBundle objects.
pub struct SourceBundleDebugSession<'data> {
manifest: Arc<SourceBundleManifest>,
archive: Arc<Mutex<zip::read::ZipArchive<std::io::Cursor<&'data [u8]>>>>,
indexed_files: OnceCell<HashMap<FileKey<'data>, Arc<String>>>,
archive: Mutex<zip::read::ZipArchive<std::io::Cursor<&'data [u8]>>>,
index: Arc<SourceBundleIndex<'data>>,
}

impl<'data> SourceBundleDebugSession<'data> {
/// Returns an iterator over all source files in this debug file.
pub fn files(&self) -> SourceBundleFileIterator<'_> {
SourceBundleFileIterator {
files: self.manifest.files.values(),
files: self.index.manifest.files.values(),
}
}

Expand All @@ -824,32 +877,6 @@ impl<'data> SourceBundleDebugSession<'data> {
std::iter::empty()
}

/// Get the indexed file mapping.
fn indexed_files(&self) -> &HashMap<FileKey, Arc<String>> {
self.indexed_files.get_or_init(|| {
let files = &self.manifest.files;
let mut rv = HashMap::with_capacity(files.len());

for (zip_path, file_info) in files {
let zip_path = Arc::new(zip_path.clone());
if !file_info.path.is_empty() {
rv.insert(
FileKey::Path(file_info.path.clone().into()),
zip_path.clone(),
);
}
if !file_info.url.is_empty() {
rv.insert(FileKey::Url(file_info.url.clone().into()), zip_path.clone());
}
if let (Some(debug_id), Some(ty)) = (file_info.debug_id(), file_info.ty()) {
rv.insert(FileKey::DebugId(debug_id, ty), zip_path.clone());
}
}

rv
})
}

/// Get source by the path of a file in the bundle.
fn source_by_zip_path(&self, zip_path: &str) -> Result<Option<String>, SourceBundleError> {
let mut archive = self.archive.lock();
Expand All @@ -868,13 +895,13 @@ impl<'data> SourceBundleDebugSession<'data> {
&self,
key: FileKey,
) -> Result<Option<SourceFileDescriptor<'_>>, SourceBundleError> {
let zip_path = match self.indexed_files().get(&key) {
let zip_path = match self.index.indexed_files.get(&key) {
Some(zip_path) => zip_path.as_str(),
None => return Ok(None),
};

let content = self.source_by_zip_path(zip_path)?;
let info = self.manifest.files.get(zip_path);
let info = self.index.manifest.files.get(zip_path);
Ok(content.map(|opt| SourceFileDescriptor::new_embedded(Cow::Owned(opt), info)))
}

Expand Down

0 comments on commit c44c475

Please sign in to comment.