Skip to content

Commit

Permalink
enabled scaling without bounces when editing soa_configs
Browse files Browse the repository at this point in the history
  • Loading branch information
mjksmith committed Feb 12, 2016
1 parent 76e7c72 commit d972678
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 31 deletions.
17 changes: 16 additions & 1 deletion paasta_itests/setup_marathon_job.feature
Expand Up @@ -2,6 +2,21 @@ Feature: setup_marathon_job can create a "complete" app

Scenario: complete apps can be deployed
Given a working paasta cluster
When we create a complete app
When we run setup_marathon_job for 5s
Then we should see it in the list of apps
Then we can run get_app on it
Then we should see the number of instances become 3

Scenario: marathon apps can be scaled up
Given a working paasta cluster
When we run setup_marathon_job for 5s
And we change the number of instances to 5
And we run setup_marathon_job for 5s
Then we should see the number of instances become 5

Scenario: marathon apps can be scaled down
Given a working paasta cluster
When we run setup_marathon_job for 5s
And we change the number of instances to 1
And we run setup_marathon_job for 5s
Then we should see the number of instances become 1
24 changes: 22 additions & 2 deletions paasta_itests/steps/setup_marathon_job_steps.py
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import contextlib
import time

import mock
from behave import then
Expand Down Expand Up @@ -51,14 +52,14 @@
{'hostPath': u'/nail/srv', 'containerPath': '/nail/srv', 'mode': 'RO'},
{'hostPath': u'/etc/boto_cfg', 'containerPath': '/etc/boto_cfg', 'mode': 'RO'}]
},
'instances': 1,
'instances': 3,
'mem': 300,
'args': [],
'backoff_factor': 2,
'cpus': 0.25,
'uris': ['file:///root/.dockercfg'],
'backoff_seconds': 1,
'constraints': None
'constraints': None,
}


Expand All @@ -69,13 +70,15 @@ def create_complete_app(context):
mock.patch('paasta_tools.marathon_tools.load_marathon_config', return_value=context.marathon_config),
mock.patch('paasta_tools.marathon_tools.load_system_paasta_config', return_value=context.system_paasta_config),
mock.patch('paasta_tools.bounce_lib.load_system_paasta_config', return_value=context.system_paasta_config),
mock.patch('paasta_tools.setup_marathon_job.send_sensu_bounce_keepalive', autospec=True),
mock.patch('paasta_tools.setup_marathon_job.load_system_paasta_config',
return_value=context.system_paasta_config),
) as (
mock_create_complete_config,
_,
_,
_,
_,
mock_load_system_paasta_config,
):
mock_create_complete_config.return_value = fake_service_config
Expand All @@ -93,6 +96,18 @@ def create_complete_app(context):
assert 'deployed' in return_tuple[1]


@when(u'we change the number of instances to {number}')
def change_number_of_context_instances(context, number):
fake_service_config['instances'] = int(number)


@when(u'we run setup_marathon_job for {number}s')
def run_setup_marathon_job(context, number):
for _ in xrange(int(number)):
create_complete_app(context)
time.sleep(1)


@then(u'we should see it in the list of apps')
def see_it_in_list(context):
assert fake_appid in marathon_tools.list_all_marathon_app_ids(context.marathon_client)
Expand All @@ -101,3 +116,8 @@ def see_it_in_list(context):
@then(u'we can run get_app on it')
def can_run_get_app(context):
assert context.marathon_client.get_app(fake_appid)


@then(u'we should see the number of instances become {number}')
def assert_instances_equals(context, number):
assert context.marathon_client.get_app(fake_appid).instances == int(number)
5 changes: 3 additions & 2 deletions paasta_tools/bounce_lib.py
Expand Up @@ -394,15 +394,16 @@ def down_bounce(
new_config,
new_app_running,
happy_new_tasks,
old_app_live_tasks,
old_app_live_happy_tasks,
old_app_live_unhappy_tasks,
):
"""
Stops old apps, doesn't start any new apps.
Used for the graceful_app_drain script.
"""
return {
"create_app": False,
"tasks_to_drain": set(*old_app_live_tasks.values()),
"tasks_to_drain": flatten_tasks(old_app_live_happy_tasks) | flatten_tasks(old_app_live_unhappy_tasks),
}

# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
16 changes: 13 additions & 3 deletions paasta_tools/graceful_app_drain.py
Expand Up @@ -9,7 +9,7 @@
from paasta_tools import drain_lib
from paasta_tools import marathon_tools
from paasta_tools.setup_marathon_job import do_bounce
from paasta_tools.setup_marathon_job import get_old_live_draining_tasks
from paasta_tools.setup_marathon_job import get_old_happy_unhappy_draining_tasks
from paasta_tools.utils import decompose_job_id
from paasta_tools.utils import load_system_paasta_config

Expand Down Expand Up @@ -72,14 +72,24 @@ def main():

while marathon_tools.is_app_id_running(app_id=full_appid, client=client):
app_to_kill = client.get_app(full_appid)
old_app_live_tasks, old_app_draining_tasks = get_old_live_draining_tasks([app_to_kill], drain_method)
(old_app_live_happy_tasks,
old_app_live_unhappy_tasks,
old_app_draining_tasks,
) = get_old_happy_unhappy_draining_tasks(
other_apps=[app_to_kill],
drain_method=drain_method,
service=service,
nerve_ns=nerve_ns,
bounce_health_params=service_instance_config.get_bounce_health_params(service_namespace_config),
)
do_bounce(
bounce_func=bounce_func,
drain_method=drain_method,
config=complete_config,
new_app_running='',
happy_new_tasks=[],
old_app_live_tasks=old_app_live_tasks,
old_app_live_happy_tasks=old_app_live_happy_tasks,
old_app_live_unhappy_tasks=old_app_live_unhappy_tasks,
old_app_draining_tasks=old_app_draining_tasks,
serviceinstance="{0}.{1}".format(service, instance),
bounce_method='down',
Expand Down
4 changes: 3 additions & 1 deletion paasta_tools/marathon_tools.py
Expand Up @@ -884,6 +884,8 @@ def wait_for_app_to_launch_tasks(client, app_id, expected_tasks, exact_matches_o

def create_complete_config(service, instance, marathon_config, soa_dir=DEFAULT_SOA_DIR):
"""Generates a complete dictionary to be POST'ed to create an app on Marathon"""
CONFIG_HASH_BLACKLIST = ['instances']

system_paasta_config = load_system_paasta_config()
partial_id = format_job_id(service=service, instance=instance)
instance_config = load_marathon_service_config(
Expand All @@ -907,7 +909,7 @@ def create_complete_config(service, instance, marathon_config, soa_dir=DEFAULT_S
)
code_sha = get_code_sha_from_dockerurl(docker_url)
config_hash = get_config_hash(
complete_config,
{key: value for key, value in complete_config.items() if key not in CONFIG_HASH_BLACKLIST},
force_bounce=instance_config.get_force_bounce(),
)
full_id = format_job_id(service, instance, code_sha, config_hash)
Expand Down
78 changes: 56 additions & 22 deletions paasta_tools/setup_marathon_job.py
Expand Up @@ -288,6 +288,26 @@ def log_bounce_action(line, level='debug'):
)


def get_old_happy_unhappy_draining_tasks_for_app(app, drain_method, service, nerve_ns, bounce_health_params):
tasks_by_state = {
'happy': set(),
'unhappy': set(),
'draining': set(),
}

happy_tasks = bounce_lib.get_happy_tasks(app, service, nerve_ns, **bounce_health_params)
for task in app.tasks:
if drain_method.is_draining(task):
state = 'draining'
elif task in happy_tasks:
state = 'happy'
else:
state = 'unhappy'
tasks_by_state[state].add(task)

return tasks_by_state


def get_old_happy_unhappy_draining_tasks(other_apps, drain_method, service, nerve_ns, bounce_health_params):
"""Split tasks from old apps into 3 categories:
- live (not draining) and happy (according to get_happy_tasks)
Expand All @@ -300,21 +320,9 @@ def get_old_happy_unhappy_draining_tasks(other_apps, drain_method, service, nerv
old_app_draining_tasks = {}

for app in other_apps:
tasks_by_state = {
'happy': set(),
'unhappy': set(),
'draining': set(),
}

happy_tasks = bounce_lib.get_happy_tasks(app, service, nerve_ns, **bounce_health_params)
for task in app.tasks:
if drain_method.is_draining(task):
state = 'draining'
elif task in happy_tasks:
state = 'happy'
else:
state = 'unhappy'
tasks_by_state[state].add(task)

tasks_by_state = get_old_happy_unhappy_draining_tasks_for_app(
app, drain_method, service, nerve_ns, bounce_health_params)

old_app_live_happy_tasks[app.id] = tasks_by_state['happy']
old_app_live_unhappy_tasks[app.id] = tasks_by_state['unhappy']
Expand Down Expand Up @@ -400,17 +408,43 @@ def log_deploy_error(errormsg, level='event'):
bounce_health_params
)

if new_app_running:
protected_draining_tasks = set()
if new_app.instances < config['instances']:
client.scale_app(app_id=new_app.id, instances=config['instances'], force=True)
elif new_app.instances > config['instances']:
num_tasks_to_scale = new_app.instances - config['instances']
task_dict = get_old_happy_unhappy_draining_tasks_for_app(
new_app,
drain_method,
service,
nerve_ns,
bounce_health_params,
)
scaling_app_happy_tasks = list(task_dict['happy'])
scaling_app_unhappy_tasks = list(task_dict['unhappy'])
scaling_app_draining_tasks = list(task_dict['draining'])
tasks_to_move_draining = max(min(len(scaling_app_draining_tasks), num_tasks_to_scale), 0)
num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_draining
tasks_to_move_unhappy = max(min(len(scaling_app_unhappy_tasks), num_tasks_to_scale), 0)
num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_unhappy
tasks_to_move_happy = max(min(len(scaling_app_happy_tasks), num_tasks_to_scale), 0)
protected_draining_tasks.union(scaling_app_draining_tasks[:tasks_to_move_draining])
old_app_draining_tasks.update({new_app.id: set(scaling_app_draining_tasks[:tasks_to_move_draining])})
old_app_live_unhappy_tasks.update({new_app.id: set(scaling_app_unhappy_tasks[:tasks_to_move_unhappy])})
old_app_live_happy_tasks.update({new_app.id: set(scaling_app_happy_tasks[:tasks_to_move_happy])})
happy_new_tasks = scaling_app_happy_tasks[tasks_to_move_happy:]
# If any tasks on the new app happen to be draining (e.g. someone reverts to an older version with
# `paasta mark-for-deployment`), then we should undrain them.
for task in new_app.tasks:
if task not in protected_draining_tasks:
drain_method.stop_draining(task)

# Re-drain any already draining tasks on old apps
for tasks in old_app_draining_tasks.values():
for task in tasks:
drain_method.drain(task)

# If any tasks on the new app happen to be draining (e.g. someone reverts to an older version with
# `paasta mark-for-deployment`), then we should undrain them.
if new_app_running:
for task in new_app.tasks:
drain_method.stop_draining(task)

# log all uncaught exceptions and raise them again
try:
try:
Expand Down Expand Up @@ -544,7 +578,7 @@ def main():
sys.exit(0)
except NoConfigurationForServiceError:
error_msg = "Could not read marathon configuration file for %s in cluster %s" % \
(args.service_instance, load_system_paasta_config().get_cluster())
(args.service_instance, load_system_paasta_config().get_cluster())
log.error(error_msg)
send_event(service, instance, soa_dir, pysensu_yelp.Status.CRITICAL, error_msg)
sys.exit(1)
Expand Down

0 comments on commit d972678

Please sign in to comment.