Skip to content

Commit

Permalink
feat(compaction): implement the classic strategy
Browse files Browse the repository at this point in the history
  • Loading branch information
graelo committed Aug 18, 2022
1 parent 7a38e56 commit b79e274
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 22 deletions.
16 changes: 16 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ thiserror = "1"

regex = "1.4"

# waiting for https://doc.rust-lang.org/std/primitive.slice.html#method.group_by
itertools = "0.10.3"

futures = "0.3"
async-std = { version = "1", features = ["unstable"] }

Expand Down
9 changes: 7 additions & 2 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,13 @@ pub struct Config {
)]
strategy_most_recent: Option<u16>,

/// Apply a classic backup strategy (keep last hour, then last day, then last week, then last
/// month).
/// Apply a classic backup strategy.
///
/// Keep
/// the lastest per hour for the past 24 hours,
/// the lastest per day for the past 7 days,
/// the lastest per week of the past 4 weeks,
/// the lastest per month of this year.
#[clap(
group = "strategy",
short = 'l',
Expand Down
1 change: 1 addition & 0 deletions src/management/backup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use clap::ValueEnum;
///
/// This is sufficient for the [`Catalog`](crate::management::catalog::Catalog) to list backups
/// and decide whether or not a backup should be deleted or kept.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Backup {
/// Path to the backup file.
pub filepath: PathBuf,
Expand Down
11 changes: 4 additions & 7 deletions src/management/catalog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,11 @@ impl Catalog {
self.backups.is_empty()
}

/// Filepath of the current backup.
/// Filepath of the most recent backup.
///
/// This is usually the most recent backup.
/// Because backups are sorted from oldest to most recent, both strategies agree on this.
pub fn latest(&self) -> Option<&Backup> {
match self.strategy {
Strategy::KeepMostRecent { .. } => self.backups.last(),
Strategy::Classic => unimplemented!(),
}
self.backups.last()
}

/// Simulate the compaction strategy: list the backup files to delete, and the ones to keep.
Expand Down Expand Up @@ -250,7 +247,7 @@ impl Catalog {
return "1 minute ago".into();
}

return format!("{} seconds ago", duration_secs);
format!("{} seconds ago", duration_secs)
}

async fn full_list(&self, details_flag: bool) {
Expand Down
127 changes: 114 additions & 13 deletions src/management/compaction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

use std::fmt;

use chrono::{Datelike, Timelike};
use chrono::{Duration, Local};
use itertools::Itertools;

use super::backup::{Backup, BackupStatus};

/// Backups compaction strategy.
Expand All @@ -17,11 +21,8 @@ pub enum Strategy {

/// Classic backup strategy.
///
/// This keeps
/// - the latest backup in the hour,
/// - the latest backup of the previous day,
/// - the latest backup of the previous week,
/// - the latest backup of the previous month,
/// This is only useful if you save _very_ often, probably in an automated manner. See
/// the method [`Strategy::plan`] for details.
Classic,
}

Expand All @@ -34,11 +35,30 @@ impl Strategy {
/// Determine which backup files should be kept.
///
/// The `backup_files` are assumed to be sorted from oldest to newest.
pub fn plan<'a>(&self, backup_files: &'a [Backup]) -> Plan<'a> {
///
/// # KeepMostRecent strategy
///
/// Simply splits the list of all backups into 2 lists: the `k` recent ones (or less if the
/// catalog does not contain as much) and the remaining ones are considered outdated
/// (disposable).
///
/// # Classic strategy
///
/// Its goal is to keep
///
/// - the lastest backup per hour for the past 24 hours (max 23 backups - exclude the past hour),
/// - the lastest backup per day for the past 7 days (max 6 backups - exclude the past 24 hours),
/// - the lastest backup per week of the past 4 weeks (max 3 backups - exclude the past week),
/// - the lastest backup per month of this year (max 11 backups - exclude the past month).
///
/// The time windows above are a partition; they do not overlap. Within each partition,
/// only the most recent backup is kept.
///
pub fn plan<'a>(&self, backups: &'a [Backup]) -> Plan<'a> {
match self {
Strategy::KeepMostRecent { k } => {
let index = std::cmp::max(0, backup_files.len() - k);
let (outdated_backups, recent_backups) = backup_files.split_at(index);
let index = std::cmp::max(0, backups.len() - k);
let (outdated_backups, recent_backups) = backups.split_at(index);

let mut statuses = vec![];
statuses.extend(
Expand All @@ -53,13 +73,94 @@ impl Strategy {
);

Plan {
disposable: outdated_backups,
retainable: recent_backups,
disposable: outdated_backups.iter().collect(),
retainable: recent_backups.iter().collect(),
statuses,
}
}

Strategy::Classic => unimplemented!(),
Strategy::Classic => {
let now = Local::now().naive_local();
let _24h_ago = now - Duration::days(1);
let _7d_ago = now - Duration::days(7);
let _4w_ago = now - Duration::weeks(4);
let _year_ago = now - Duration::days(365);

// Last 24 h, grouped by hour
let last_24h_per_hour: Vec<_> = backups
.iter()
.filter(|&b| b.creation_date > _24h_ago)
.group_by(|&b| b.creation_date.hour())
.into_iter()
.map(|(_key, group)| group.collect::<Vec<_>>())
.filter_map(|group| group.last().cloned())
.collect();

// Last 7 days excluding the last 24 h, grouped by day
let last_7d_per_day: Vec<_> = backups
.iter()
.filter(|&b| _24h_ago > b.creation_date && b.creation_date >= _7d_ago)
.group_by(|&b| b.creation_date.day())
.into_iter()
.map(|(_key, group)| group.collect::<Vec<_>>())
.filter_map(|group| group.last().cloned())
.collect();

// Last 4 weeks excluding the last 7 days, grouped by week number
let last_4w_per_isoweek: Vec<_> = backups
.iter()
.filter(|&b| _7d_ago > b.creation_date && b.creation_date >= _4w_ago)
.group_by(|&b| b.creation_date.iso_week())
.into_iter()
.map(|(_key, group)| group.collect::<Vec<_>>())
.filter_map(|group| group.last().cloned())
.collect();

// Last year (365 days) excluding the last 4 weeks, grouped by month
let last_year_per_month: Vec<_> = backups
.iter()
.filter(|&b| _4w_ago > b.creation_date && b.creation_date >= _year_ago)
.group_by(|&b| b.creation_date.month())
.into_iter()
.map(|(_key, group)| group.collect::<Vec<_>>())
.filter_map(|group| group.last().cloned())
.collect();

let retainable: Vec<_> = vec![
last_year_per_month,
last_4w_per_isoweek,
last_7d_per_day,
last_24h_per_hour,
]
.into_iter()
.flatten()
.collect();

let retain_set: std::collections::HashSet<&Backup> =
retainable.iter().copied().collect();

let disposable: Vec<_> = backups
.iter()
.filter(|&b| !retain_set.contains(b))
.collect();

let statuses: Vec<_> = backups
.iter()
.map(|b| {
if retain_set.contains(b) {
(b, BackupStatus::Retainable)
} else {
(b, BackupStatus::Disposable)
}
})
.collect();

Plan {
disposable,
retainable,
statuses,
}
}
}
}
}
Expand All @@ -78,10 +179,10 @@ impl fmt::Display for Strategy {
/// Describes what the strategy would do.
pub struct Plan<'a> {
/// List of backup files that should be deleted.
pub disposable: &'a [Backup],
pub disposable: Vec<&'a Backup>,

/// List of backup files that should be kept.
pub retainable: &'a [Backup],
pub retainable: Vec<&'a Backup>,

/// Sorted list of backup files along with their status (disposable/retainable).
pub statuses: Vec<(&'a Backup, BackupStatus)>,
Expand Down

0 comments on commit b79e274

Please sign in to comment.