Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(Deployment-Wise) Retention Period for Scheduled Task Cleanup #2520

Merged
merged 7 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions jobs/director/spec
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,13 @@ properties:
director.max_tasks:
description: Max number of tasks per each type to keep in disk
default: 2000
director.tasks_retention_period:
description: the retention period for tasks and their log files (days)
director.tasks_deployments_retention_period:
description: the retention period for tasks and their log files of specific deployments (days)
example: |
- deployment_name: "deployment-name"
retention_period: 14
director.max_threads:
description: Max number of director concurrent threads
default: 32
Expand Down
8 changes: 8 additions & 0 deletions jobs/director/templates/director.yml.erb
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,14 @@ params['config_server'] = config_server

params['scheduled_jobs'] = []

if_p('director.tasks_retention_period') do |tasks_retention_period|
params['tasks_retention_period'] = tasks_retention_period
end

if_p('director.tasks_deployments_retention_period') do |tasks_deployments_retention_period|
params['tasks_deployments_retention_period'] = tasks_deployments_retention_period
end

if_p('director.snapshot_schedule') do |snapshot_schedule|
params['scheduled_jobs'] << {
'command' => 'SnapshotDeployments',
Expand Down
42 changes: 38 additions & 4 deletions src/bosh-director/lib/bosh/director/api/task_remover.rb
Original file line number Diff line number Diff line change
@@ -1,15 +1,33 @@
require 'date'

module Bosh::Director::Api
class TaskRemover
def initialize(max_tasks)
def initialize(max_tasks, retention_period, deployment_retention_period)
@max_tasks = max_tasks
@retention_period = retention_period
@deployment_retention_period = deployment_retention_period
end

def remove(type)
tasks_removed = 0
removal_candidates_dataset(type).paged_each(strategy: :filter, stream: false) do |task|
removal_max_tasks_candidates_dataset(type).paged_each(strategy: :filter, stream: false) do |task|
tasks_removed += 1
remove_task(task)
end
unless @retention_period.nil?
removal_retention_candidates_dataset(type).paged_each(strategy: :filter, stream: false) do |task|
tasks_removed += 1
remove_task(task)
end
end
unless @deployment_retention_period.nil?
@deployment_retention_period.each do |d|
removal_deployment_retention_candidates_dataset(type, d).paged_each(strategy: :filter, stream: false) do |task|
tasks_removed += 1
remove_task(task)
end
end
end
tasks_removed
end

Expand All @@ -29,14 +47,30 @@ def remove_task(task)

private

def removal_candidates_dataset(type)
def removal_max_tasks_candidates_dataset(type)
base_filter = Bosh::Director::Models::Task.where(type: type)
.exclude(state: %w[processing queued])
.select(:id, :output).order { Sequel.desc(:id) }

starting_id = base_filter.limit(1, @max_tasks).first&.id || 0

base_filter.where { id <= starting_id }
end

def removal_retention_candidates_dataset(type)
retention_time = DateTime.now - @retention_period.to_i
Bosh::Director::Models::Task.where(type: type)
.where { checkpoint_time < retention_time }
.exclude(state: %w[processing queued])
.select(:id, :output)
end

def removal_deployment_retention_candidates_dataset(type, deployment_with_retention_period)
retention_time = DateTime.now - deployment_with_retention_period['retention_period'].to_i
Bosh::Director::Models::Task.where(type: type)
.where(deployment_name: deployment_with_retention_period['deployment_name'])
.where { checkpoint_time < retention_time }
.exclude(state: %w[processing queued])
.select(:id, :output)
end
end
end
6 changes: 6 additions & 0 deletions src/bosh-director/lib/bosh/director/config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ class << self
:local_dns,
:logger,
:max_tasks,
:tasks_retention_period,
:tasks_deployments_retention_period,
:max_threads,
:max_vm_create_tries,
:name,
Expand Down Expand Up @@ -131,6 +133,10 @@ def configure(config)
# by default keep only last 2000 tasks of each type in disk
@max_tasks = config.fetch('max_tasks', 2000).to_i

# by default keep all tasks of each type in disk if retention period is not set
@tasks_retention_period = config.fetch('tasks_retention_period', nil)
@tasks_deployments_retention_period = config.fetch('tasks_deployments_retention_period', nil)

@max_threads = config.fetch('max_threads', 32).to_i

@revision = get_revision
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@ def self.schedule_message
end

def initialize(_param = {})
@task_remover = Bosh::Director::Api::TaskRemover.new(Config.max_tasks)
@task_remover = Bosh::Director::Api::TaskRemover.new(
Config.max_tasks,
Config.tasks_retention_period,
Config.tasks_deployments_retention_period
)
end

def update_orphaned_tasks_with_state_error(result)
Expand Down
6 changes: 6 additions & 0 deletions src/bosh-director/spec/assets/test-director-config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ port: 8081
logging:
level: ERROR

max_tasks: 2000
tasks_retention_period: 20
tasks_deployments_retention_period:
- deployment: fake-deployment
retention_period: 30

dir: /tmp/boshdir
director_certificate_expiry_json_path: /tmp/boshdir/certificate_expiry.json
db:
Expand Down
105 changes: 94 additions & 11 deletions src/bosh-director/spec/unit/api/task_remover_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,17 @@ module Bosh::Director::Api
describe TaskRemover do
include FakeFS::SpecHelpers

def make_n_tasks(num_tasks, type = default_type)
def make_n_tasks(num_tasks, task_type: default_type, checkpoint_time: inside_retention, deployment: 'deployment1')
num_tasks.times do |i|
task = Bosh::Director::Models::Task.make(state: 'done', output: "/director/tasks/#{type}_#{i}", type: type)
task = Bosh::Director::Models::Task.make(state: 'done', output: "/director/tasks/#{task_type}_#{i}", checkpoint_time: checkpoint_time, deployment_name: deployment, type: task_type)
FileUtils.mkpath(task.output)
end
end

subject(:remover) { TaskRemover.new(3) }
subject(:remover) { TaskRemover.new(3, nil, nil) }
let(:default_type) { 'type' }
let(:inside_retention) { '2024-05-12 15:35:45.834392' }
let(:outside_retention) { '2023-05-12 15:35:45.834392' }
let(:second_type) { 'type1' }

def tasks
Expand All @@ -24,7 +26,7 @@ def tasks
describe '#remove' do
context 'when there are fewer than max_tasks task of the given type in the database' do
before do
make_n_tasks(1, second_type)
make_n_tasks(1, task_type: second_type)
make_n_tasks(2)
end

Expand All @@ -37,7 +39,7 @@ def tasks

context 'when there are =max_tasks of the given type in the database' do
before do
make_n_tasks(1, second_type)
make_n_tasks(1, task_type: second_type)
make_n_tasks(3)
end

Expand All @@ -50,7 +52,7 @@ def tasks

context 'when there is one more than max_tasks tasks of the given type in the database' do
before do
make_n_tasks(1, second_type)
make_n_tasks(1, task_type: second_type)
make_n_tasks(4)
end

Expand All @@ -63,7 +65,7 @@ def tasks

context 'when there are 2 more than max_tasks task of the given type in the database' do
before do
make_n_tasks(1, second_type)
make_n_tasks(1, task_type: second_type)
make_n_tasks(5)
end

Expand All @@ -77,7 +79,7 @@ def tasks

context 'when there are 10 more than max_tasks task of the given type in the database' do
before do
make_n_tasks(1, second_type)
make_n_tasks(1, task_type: second_type)
make_n_tasks(13)
end

Expand All @@ -92,7 +94,7 @@ def tasks

context 'when there are 2 types with more than max_tasks tasks in the database' do
before do
make_n_tasks(4, second_type)
make_n_tasks(4, task_type: second_type)
make_n_tasks(4)
end

Expand All @@ -107,7 +109,7 @@ def tasks

context 'when specific states should be ignored from removal' do
before do
make_n_tasks(1, second_type)
make_n_tasks(1, task_type: second_type)
make_n_tasks(5)
running_task = tasks[1]
running_task.update(state: state)
Expand Down Expand Up @@ -135,7 +137,7 @@ def tasks
end

context 'when task output is nil' do
subject(:remover) { described_class.new(0) }
subject(:remover) { described_class.new(0, nil, nil) }

before do
Bosh::Director::Models::Task.make(state: 'done', output: nil)
Expand Down Expand Up @@ -168,6 +170,87 @@ def tasks
}.from(4).to(3)
end
end

context 'when there are tasks exceeding the retention period 1 day in the database' do
subject(:remover) do
TaskRemover.new(2000, 1, nil)
end

before do
make_n_tasks(1, checkpoint_time: inside_retention)
make_n_tasks(1, checkpoint_time: outside_retention)
end

it 'it removes the task outside retention' do
expect(remover).to_not receive(:remove_task).with(tasks[0])
expect(remover).to receive(:remove_task).with(tasks[1])

Timecop.freeze(Time.parse(inside_retention) + 60 * 60) do
remover.remove(default_type)
end
end
end

context 'when there is task exceeding the retention period 1 day in the database and the deployment is configured in deployment_retention_period' do
subject(:remover) do
TaskRemover.new(2000, nil, [{ 'deployment_name' => 'deployment1', 'retention_period' => 1 }])
end

before do
make_n_tasks(1, checkpoint_time: outside_retention, deployment: 'deployment1')
make_n_tasks(1, checkpoint_time: outside_retention, deployment: 'deployment2')
end

it 'it removes the task of specific deployment outside retention' do
expect(remover).to receive(:remove_task).with(tasks[0])
expect(remover).to_not receive(:remove_task).with(tasks[1])

Timecop.freeze(Time.parse(inside_retention) + 60 * 60) do
remover.remove(default_type)
end
end
end

context 'when the deployment configured in deployment_retention_period does not exist' do
subject(:remover) do
TaskRemover.new(2000, nil, [{ 'deployment_name' => 'deployment3', 'retention_period' => 1 }])
end

before do
make_n_tasks(1, checkpoint_time: outside_retention, deployment: 'deployment1')
make_n_tasks(1, checkpoint_time: outside_retention, deployment: 'deployment2')
end

it 'it does nothing' do
expect(remover).to_not receive(:remove_task)

Timecop.freeze(Time.parse(inside_retention) + 60 * 60) do
remover.remove(default_type)
end
end
end

context 'when both retention_period and deployment_retention_period configured' do
subject(:remover) do
TaskRemover.new(2000, 2, [{ 'deployment_name' => 'deployment1', 'retention_period' => 1 }])
end

before do
make_n_tasks(1, checkpoint_time: outside_retention, deployment: 'deployment1')
make_n_tasks(1, checkpoint_time: outside_retention, deployment: 'deployment2')
make_n_tasks(1, checkpoint_time: inside_retention, deployment: 'deployment1')
end

it 'it removes the task outside retention' do
expect(remover).to receive(:remove_task).at_least(1).times.with(tasks[0])
expect(remover).to receive(:remove_task).with(tasks[1])
expect(remover).to_not receive(:remove_task).with(tasks[2])

Timecop.freeze(Time.parse(inside_retention) + 60 * 60) do
remover.remove(default_type)
end
end
end
end
Malsourie marked this conversation as resolved.
Show resolved Hide resolved

describe '#remove_task' do
Expand Down
9 changes: 9 additions & 0 deletions src/bosh-director/spec/unit/config_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,15 @@
expect(described_class.nats_config_fingerprint).to eq(Digest::SHA1.hexdigest("client_ca_certificate_pathclient_ca_private_key_pathserver_ca_path"))
end
end

describe 'task remover configurations' do
it 'returns the task remover configurations' do
described_class.configure(test_config)
expect(described_class.max_tasks).to eq(2000)
expect(described_class.tasks_retention_period).to eq(20)
expect(described_class.tasks_deployments_retention_period).to eq([{'deployment' => 'fake-deployment', 'retention_period' => 30}])
end
end
end

describe '#identity_provider' do
Expand Down