From d9726786573677429a953b1d8b8ccab81c42ae1b Mon Sep 17 00:00:00 2001 From: Matthew Smith Date: Thu, 11 Feb 2016 10:52:19 -0800 Subject: [PATCH] enabled scaling without bounces when editing soa_configs --- paasta_itests/setup_marathon_job.feature | 17 +++- .../steps/setup_marathon_job_steps.py | 24 +++++- paasta_tools/bounce_lib.py | 5 +- paasta_tools/graceful_app_drain.py | 16 +++- paasta_tools/marathon_tools.py | 4 +- paasta_tools/setup_marathon_job.py | 78 +++++++++++++------ 6 files changed, 113 insertions(+), 31 deletions(-) diff --git a/paasta_itests/setup_marathon_job.feature b/paasta_itests/setup_marathon_job.feature index 478b28ded7..6b9548f7a4 100644 --- a/paasta_itests/setup_marathon_job.feature +++ b/paasta_itests/setup_marathon_job.feature @@ -2,6 +2,21 @@ Feature: setup_marathon_job can create a "complete" app Scenario: complete apps can be deployed Given a working paasta cluster - When we create a complete app + When we run setup_marathon_job for 5s Then we should see it in the list of apps Then we can run get_app on it + Then we should see the number of instances become 3 + + Scenario: marathon apps can be scaled up + Given a working paasta cluster + When we run setup_marathon_job for 5s + And we change the number of instances to 5 + And we run setup_marathon_job for 5s + Then we should see the number of instances become 5 + + Scenario: marathon apps can be scaled down + Given a working paasta cluster + When we run setup_marathon_job for 5s + And we change the number of instances to 1 + And we run setup_marathon_job for 5s + Then we should see the number of instances become 1 diff --git a/paasta_itests/steps/setup_marathon_job_steps.py b/paasta_itests/steps/setup_marathon_job_steps.py index 18d67fe8e3..c561beb522 100644 --- a/paasta_itests/steps/setup_marathon_job_steps.py +++ b/paasta_itests/steps/setup_marathon_job_steps.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import contextlib +import time import mock from behave import then @@ -51,14 +52,14 @@ {'hostPath': u'/nail/srv', 'containerPath': '/nail/srv', 'mode': 'RO'}, {'hostPath': u'/etc/boto_cfg', 'containerPath': '/etc/boto_cfg', 'mode': 'RO'}] }, - 'instances': 1, + 'instances': 3, 'mem': 300, 'args': [], 'backoff_factor': 2, 'cpus': 0.25, 'uris': ['file:///root/.dockercfg'], 'backoff_seconds': 1, - 'constraints': None + 'constraints': None, } @@ -69,6 +70,7 @@ def create_complete_app(context): mock.patch('paasta_tools.marathon_tools.load_marathon_config', return_value=context.marathon_config), mock.patch('paasta_tools.marathon_tools.load_system_paasta_config', return_value=context.system_paasta_config), mock.patch('paasta_tools.bounce_lib.load_system_paasta_config', return_value=context.system_paasta_config), + mock.patch('paasta_tools.setup_marathon_job.send_sensu_bounce_keepalive', autospec=True), mock.patch('paasta_tools.setup_marathon_job.load_system_paasta_config', return_value=context.system_paasta_config), ) as ( @@ -76,6 +78,7 @@ def create_complete_app(context): _, _, _, + _, mock_load_system_paasta_config, ): mock_create_complete_config.return_value = fake_service_config @@ -93,6 +96,18 @@ def create_complete_app(context): assert 'deployed' in return_tuple[1] +@when(u'we change the number of instances to {number}') +def change_number_of_context_instances(context, number): + fake_service_config['instances'] = int(number) + + +@when(u'we run setup_marathon_job for {number}s') +def run_setup_marathon_job(context, number): + for _ in xrange(int(number)): + create_complete_app(context) + time.sleep(1) + + @then(u'we should see it in the list of apps') def see_it_in_list(context): assert fake_appid in marathon_tools.list_all_marathon_app_ids(context.marathon_client) @@ -101,3 +116,8 @@ def see_it_in_list(context): @then(u'we can run get_app on it') def can_run_get_app(context): assert context.marathon_client.get_app(fake_appid) + + +@then(u'we should see the number of instances become {number}') +def assert_instances_equals(context, number): + assert context.marathon_client.get_app(fake_appid).instances == int(number) diff --git a/paasta_tools/bounce_lib.py b/paasta_tools/bounce_lib.py index 1d54e36f3e..5f105d5f9a 100644 --- a/paasta_tools/bounce_lib.py +++ b/paasta_tools/bounce_lib.py @@ -394,7 +394,8 @@ def down_bounce( new_config, new_app_running, happy_new_tasks, - old_app_live_tasks, + old_app_live_happy_tasks, + old_app_live_unhappy_tasks, ): """ Stops old apps, doesn't start any new apps. @@ -402,7 +403,7 @@ def down_bounce( """ return { "create_app": False, - "tasks_to_drain": set(*old_app_live_tasks.values()), + "tasks_to_drain": flatten_tasks(old_app_live_happy_tasks) | flatten_tasks(old_app_live_unhappy_tasks), } # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 diff --git a/paasta_tools/graceful_app_drain.py b/paasta_tools/graceful_app_drain.py index a126f41fac..722a396e6b 100755 --- a/paasta_tools/graceful_app_drain.py +++ b/paasta_tools/graceful_app_drain.py @@ -9,7 +9,7 @@ from paasta_tools import drain_lib from paasta_tools import marathon_tools from paasta_tools.setup_marathon_job import do_bounce -from paasta_tools.setup_marathon_job import get_old_live_draining_tasks +from paasta_tools.setup_marathon_job import get_old_happy_unhappy_draining_tasks from paasta_tools.utils import decompose_job_id from paasta_tools.utils import load_system_paasta_config @@ -72,14 +72,24 @@ def main(): while marathon_tools.is_app_id_running(app_id=full_appid, client=client): app_to_kill = client.get_app(full_appid) - old_app_live_tasks, old_app_draining_tasks = get_old_live_draining_tasks([app_to_kill], drain_method) + (old_app_live_happy_tasks, + old_app_live_unhappy_tasks, + old_app_draining_tasks, + ) = get_old_happy_unhappy_draining_tasks( + other_apps=[app_to_kill], + drain_method=drain_method, + service=service, + nerve_ns=nerve_ns, + bounce_health_params=service_instance_config.get_bounce_health_params(service_namespace_config), + ) do_bounce( bounce_func=bounce_func, drain_method=drain_method, config=complete_config, new_app_running='', happy_new_tasks=[], - old_app_live_tasks=old_app_live_tasks, + old_app_live_happy_tasks=old_app_live_happy_tasks, + old_app_live_unhappy_tasks=old_app_live_unhappy_tasks, old_app_draining_tasks=old_app_draining_tasks, serviceinstance="{0}.{1}".format(service, instance), bounce_method='down', diff --git a/paasta_tools/marathon_tools.py b/paasta_tools/marathon_tools.py index ae89efe068..5bdde73201 100644 --- a/paasta_tools/marathon_tools.py +++ b/paasta_tools/marathon_tools.py @@ -884,6 +884,8 @@ def wait_for_app_to_launch_tasks(client, app_id, expected_tasks, exact_matches_o def create_complete_config(service, instance, marathon_config, soa_dir=DEFAULT_SOA_DIR): """Generates a complete dictionary to be POST'ed to create an app on Marathon""" + CONFIG_HASH_BLACKLIST = ['instances'] + system_paasta_config = load_system_paasta_config() partial_id = format_job_id(service=service, instance=instance) instance_config = load_marathon_service_config( @@ -907,7 +909,7 @@ def create_complete_config(service, instance, marathon_config, soa_dir=DEFAULT_S ) code_sha = get_code_sha_from_dockerurl(docker_url) config_hash = get_config_hash( - complete_config, + {key: value for key, value in complete_config.items() if key not in CONFIG_HASH_BLACKLIST}, force_bounce=instance_config.get_force_bounce(), ) full_id = format_job_id(service, instance, code_sha, config_hash) diff --git a/paasta_tools/setup_marathon_job.py b/paasta_tools/setup_marathon_job.py index f79fa352ef..f78dbbe983 100755 --- a/paasta_tools/setup_marathon_job.py +++ b/paasta_tools/setup_marathon_job.py @@ -288,6 +288,26 @@ def log_bounce_action(line, level='debug'): ) +def get_old_happy_unhappy_draining_tasks_for_app(app, drain_method, service, nerve_ns, bounce_health_params): + tasks_by_state = { + 'happy': set(), + 'unhappy': set(), + 'draining': set(), + } + + happy_tasks = bounce_lib.get_happy_tasks(app, service, nerve_ns, **bounce_health_params) + for task in app.tasks: + if drain_method.is_draining(task): + state = 'draining' + elif task in happy_tasks: + state = 'happy' + else: + state = 'unhappy' + tasks_by_state[state].add(task) + + return tasks_by_state + + def get_old_happy_unhappy_draining_tasks(other_apps, drain_method, service, nerve_ns, bounce_health_params): """Split tasks from old apps into 3 categories: - live (not draining) and happy (according to get_happy_tasks) @@ -300,21 +320,9 @@ def get_old_happy_unhappy_draining_tasks(other_apps, drain_method, service, nerv old_app_draining_tasks = {} for app in other_apps: - tasks_by_state = { - 'happy': set(), - 'unhappy': set(), - 'draining': set(), - } - - happy_tasks = bounce_lib.get_happy_tasks(app, service, nerve_ns, **bounce_health_params) - for task in app.tasks: - if drain_method.is_draining(task): - state = 'draining' - elif task in happy_tasks: - state = 'happy' - else: - state = 'unhappy' - tasks_by_state[state].add(task) + + tasks_by_state = get_old_happy_unhappy_draining_tasks_for_app( + app, drain_method, service, nerve_ns, bounce_health_params) old_app_live_happy_tasks[app.id] = tasks_by_state['happy'] old_app_live_unhappy_tasks[app.id] = tasks_by_state['unhappy'] @@ -400,17 +408,43 @@ def log_deploy_error(errormsg, level='event'): bounce_health_params ) + if new_app_running: + protected_draining_tasks = set() + if new_app.instances < config['instances']: + client.scale_app(app_id=new_app.id, instances=config['instances'], force=True) + elif new_app.instances > config['instances']: + num_tasks_to_scale = new_app.instances - config['instances'] + task_dict = get_old_happy_unhappy_draining_tasks_for_app( + new_app, + drain_method, + service, + nerve_ns, + bounce_health_params, + ) + scaling_app_happy_tasks = list(task_dict['happy']) + scaling_app_unhappy_tasks = list(task_dict['unhappy']) + scaling_app_draining_tasks = list(task_dict['draining']) + tasks_to_move_draining = max(min(len(scaling_app_draining_tasks), num_tasks_to_scale), 0) + num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_draining + tasks_to_move_unhappy = max(min(len(scaling_app_unhappy_tasks), num_tasks_to_scale), 0) + num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_unhappy + tasks_to_move_happy = max(min(len(scaling_app_happy_tasks), num_tasks_to_scale), 0) + protected_draining_tasks.union(scaling_app_draining_tasks[:tasks_to_move_draining]) + old_app_draining_tasks.update({new_app.id: set(scaling_app_draining_tasks[:tasks_to_move_draining])}) + old_app_live_unhappy_tasks.update({new_app.id: set(scaling_app_unhappy_tasks[:tasks_to_move_unhappy])}) + old_app_live_happy_tasks.update({new_app.id: set(scaling_app_happy_tasks[:tasks_to_move_happy])}) + happy_new_tasks = scaling_app_happy_tasks[tasks_to_move_happy:] + # If any tasks on the new app happen to be draining (e.g. someone reverts to an older version with + # `paasta mark-for-deployment`), then we should undrain them. + for task in new_app.tasks: + if task not in protected_draining_tasks: + drain_method.stop_draining(task) + # Re-drain any already draining tasks on old apps for tasks in old_app_draining_tasks.values(): for task in tasks: drain_method.drain(task) - # If any tasks on the new app happen to be draining (e.g. someone reverts to an older version with - # `paasta mark-for-deployment`), then we should undrain them. - if new_app_running: - for task in new_app.tasks: - drain_method.stop_draining(task) - # log all uncaught exceptions and raise them again try: try: @@ -544,7 +578,7 @@ def main(): sys.exit(0) except NoConfigurationForServiceError: error_msg = "Could not read marathon configuration file for %s in cluster %s" % \ - (args.service_instance, load_system_paasta_config().get_cluster()) + (args.service_instance, load_system_paasta_config().get_cluster()) log.error(error_msg) send_event(service, instance, soa_dir, pysensu_yelp.Status.CRITICAL, error_msg) sys.exit(1)