Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Can handle corrupt dbs #1039

Closed
wants to merge 14 commits into from
61 changes: 60 additions & 1 deletion crates/holochain_sqlite/src/db.rs
Expand Up @@ -112,6 +112,46 @@ impl DbWrite {
std::fs::create_dir_all(parent)
.map_err(|_e| DatabaseError::DatabaseMissing(parent.to_owned()))?;
}
// Check if the database is valid and take the appropriate
// action if it isn't.
match Connection::open(&path)
// For some reason calling pragma_update is necessary to prove the database file is valid.
.and_then(|c| c.pragma_update(None, "synchronous", &"0".to_string()))
{
Ok(_) => (),
// These are the two errors that can
// occur if the database is not valid.
err
@
Err(Error::SqliteFailure(
rusqlite::ffi::Error {
code: ErrorCode::DatabaseCorrupt,
..
},
..,
))
| err
@
Err(Error::SqliteFailure(
rusqlite::ffi::Error {
code: ErrorCode::NotADatabase,
..
},
..,
)) => {
// Check if this database kind requires wiping.
if kind.if_corrupt_wipe() {
std::fs::remove_file(&path)?;
} else {
// If we don't wipe we need to return an error.
err?;
}
}
// Another error has occurred when trying to open the db.
Err(e) => return Err(e.into()),
}

// Now we know the database file is valid we can open a connection pool.
let pool = new_connection_pool(&path, kind.clone(), sync_level);
let mut conn = pool.get()?;
// set to faster write-ahead-log mode
Expand Down Expand Up @@ -197,7 +237,7 @@ pub enum DbKind {

impl DbKind {
/// Constuct a partial Path based on the kind
fn filename(&self) -> PathBuf {
pub fn filename(&self) -> PathBuf {
let mut path: PathBuf = match self {
DbKind::Cell(cell_id) => ["cell", &cell_id.to_string()].iter().collect(),
DbKind::Cache(dna) => ["cache", &format!("cache-{}", dna)].iter().collect(),
Expand All @@ -213,6 +253,25 @@ impl DbKind {
path.set_extension("sqlite3");
path
}

/// Whether to wipe the database if it is corrupt.
/// Some database it's safe to wipe them if they are corrupt because
/// they can be refilled from the network. Other databases cannot
/// be refilled and some manual intervention is required.
fn if_corrupt_wipe(&self) -> bool {
match self {
// These databases can safely be wiped if they are corrupt.
DbKind::Cache(_) => true,
DbKind::P2pAgentStore(_) => true,
DbKind::P2pMetrics(_) => true,
// These databases cannot be safely wiped if they are corrupt.
// TODO: When splitting the source chain and authority db the
// authority db can be wiped but the source chain db cannot.
DbKind::Cell(_) => false,
DbKind::Wasm => false,
DbKind::Conductor => false,
}
}
}

/// Implementors are able to create a new read-only DB transaction
Expand Down
1 change: 1 addition & 0 deletions crates/holochain_state/CHANGELOG.md
Expand Up @@ -3,6 +3,7 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## \[Unreleased\]
- Some databases can handle corruption by wiping the db file and starting again. [#1039](https://github.com/holochain/holochain/pull/1039).

## 0.0.10

Expand Down
90 changes: 90 additions & 0 deletions crates/holochain_state/tests/corrupt_db.rs
@@ -0,0 +1,90 @@
use std::path::Path;

use contrafact::arbitrary;
use contrafact::arbitrary::Arbitrary;
use holo_hash::{AgentPubKey, DnaHash};
use holochain_sqlite::rusqlite::Connection;
use holochain_state::prelude::{fresh_reader_test, mutations_helpers, test_keystore, DbKind};
use holochain_types::{
dht_op::{DhtOp, DhtOpHashed},
env::EnvWrite,
};
use holochain_zome_types::{CellId, Header, Signature};
use tempdir::TempDir;

#[tokio::test(flavor = "multi_thread")]
/// Checks a corrupt cache will be wiped on load.
async fn corrupt_cache_creates_new_db() {
let mut u = arbitrary::Unstructured::new(&holochain_zome_types::NOISE);
observability::test_run().ok();

let kind = DbKind::Cache(DnaHash::arbitrary(&mut u).unwrap());

// - Create a corrupt cache db.
let testdir = create_corrupt_db(&kind, &mut u);

// - Try to open it.
let env = EnvWrite::test(&testdir, kind, test_keystore()).unwrap();

// - It opens successfully but the data is wiped.
let n: usize = fresh_reader_test(env, |txn| {
txn.query_row("SELECT COUNT(rowid) FROM DhtOp", [], |row| row.get(0))
.unwrap()
});
assert_eq!(n, 0);
}

#[tokio::test(flavor = "multi_thread")]
async fn corrupt_source_chain_panics() {
let mut u = arbitrary::Unstructured::new(&holochain_zome_types::NOISE);
observability::test_run().ok();

let kind = DbKind::Cell(CellId::new(
DnaHash::arbitrary(&mut u).unwrap(),
AgentPubKey::arbitrary(&mut u).unwrap(),
));

// - Create a corrupt cell db.
let testdir = create_corrupt_db(&kind, &mut u);

// - Try to open it.
let result = EnvWrite::test(&testdir, kind, test_keystore());

// - It cannot open.
assert!(result.is_err());
}

/// Corrupts some bytes of the db.
fn corrupt_db(path: &Path) {
let mut file = std::fs::read(path).unwrap();

for (i, b) in file.iter_mut().take(200).enumerate() {
if i % 2 == 0 {
*b = 0;
}
}
std::fs::write(path, file).unwrap();
}

/// Creates a db with some data in it then corrupts the db.
fn create_corrupt_db(kind: &DbKind, u: &mut arbitrary::Unstructured) -> TempDir {
let testdir = tempdir::TempDir::new("corrupt_source_chain").unwrap();
let path = testdir.path().join(kind.filename());
std::fs::create_dir_all(path.parent().unwrap()).unwrap();
let mut conn = Connection::open(&path).unwrap();
holochain_sqlite::schema::SCHEMA_CELL
.initialize(&mut conn, Some(&kind))
.unwrap();
let op = DhtOpHashed::from_content_sync(DhtOp::RegisterAgentActivity(
Signature::arbitrary(u).unwrap(),
Header::arbitrary(u).unwrap(),
));
let mut txn = conn
.transaction_with_behavior(holochain_sqlite::rusqlite::TransactionBehavior::Exclusive)
.unwrap();
mutations_helpers::insert_valid_integrated_op(&mut txn, op).unwrap();
txn.commit().unwrap();
conn.close().unwrap();
corrupt_db(path.as_ref());
testdir
}