Skip to content

Commit

Permalink
Update to use threshold from /etc/paasta/monitoring.json
Browse files Browse the repository at this point in the history
  • Loading branch information
Domenic Del Nano committed Aug 14, 2018
1 parent 28c95a6 commit a601120
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 34 deletions.
14 changes: 6 additions & 8 deletions paasta_tools/autoscaling/autoscaling_cluster_lib.py
Expand Up @@ -97,9 +97,6 @@

AWS_SPOT_MODIFY_TIMEOUT = 30
MISSING_SLAVE_PANIC_THRESHOLD = .3
# Age threshold in seconds that should be met before an asg or sfr should
# exceed before being checked for slave registration.
CHECK_REGISTERED_SLAVE_THRESHOLD = 3600

log = logging.getLogger(__name__)
log.addHandler(logging.NullHandler())
Expand Down Expand Up @@ -153,18 +150,19 @@ def __init__(
self.enable_maintenance_reservation = enable_maintenance_reservation

self.log.info('Initialized with utilization error %s' % self.utilization_error)
self.setup_metrics()
config = load_system_paasta_config()
self.slave_newness_threshold = config.get_monitoring_config().get('check_registered_slave_threshold')
self.setup_metrics(config)

@property
def log(self) -> logging.Logger:
resource_id = self.resource.get("id", "unknown")
name = '.'.join([__name__, self.__class__.__name__, resource_id])
return logging.getLogger(name)

def setup_metrics(self) -> None:
def setup_metrics(self, config: SystemPaastaConfig) -> None:
if not self.enable_metrics:
return None
config = load_system_paasta_config()
dims = {
'paasta_cluster': config.get_cluster(),
'region': self.resource.get('region', 'unknown'),
Expand Down Expand Up @@ -817,7 +815,7 @@ def is_new_autoscaling_resource(self) -> bool:
return True

now = datetime.now(timezone.utc)
return (now - self.sfr['CreateTime']).total_seconds() < CHECK_REGISTERED_SLAVE_THRESHOLD
return (now - self.sfr['CreateTime']).total_seconds() < self.slave_newness_threshold

def get_spot_fleet_instances(
self,
Expand Down Expand Up @@ -1000,7 +998,7 @@ def is_new_autoscaling_resource(self) -> bool:
return True

now = datetime.now(timezone.utc)
return (now - self.asg['CreatedTime']).total_seconds() < CHECK_REGISTERED_SLAVE_THRESHOLD
return (now - self.asg['CreatedTime']).total_seconds() < self.slave_newness_threshold

def get_asg(self, asg_name: str, region: Optional[str]=None) -> Optional[Dict[str, Any]]:
asg_client = boto3.client('autoscaling', region_name=region)
Expand Down
7 changes: 4 additions & 3 deletions paasta_tools/contrib/check_registered_slaves_aws.py
Expand Up @@ -4,7 +4,6 @@

from a_sync import block

from paasta_tools.autoscaling.autoscaling_cluster_lib import CHECK_REGISTERED_SLAVE_THRESHOLD
from paasta_tools.autoscaling.autoscaling_cluster_lib import get_scaler
from paasta_tools.mesos.exceptions import MasterNotAvailableException
from paasta_tools.mesos_tools import get_mesos_master
Expand All @@ -18,7 +17,8 @@ def check_registration(threshold_percentage):
print("Could not find Mesos Master: %s" % e.message)
sys.exit(1)

autoscaling_resources = load_system_paasta_config().get_cluster_autoscaling_resources()
config = load_system_paasta_config()
autoscaling_resources = config.get_cluster_autoscaling_resources()
for resource in autoscaling_resources.values():
print("Checking %s" % resource['id'])
try:
Expand All @@ -39,8 +39,9 @@ def check_registration(threshold_percentage):
continue
elif scaler.is_new_autoscaling_resource():
# See OPS-13784
threshold = config.get_monitoring_config().get('check_registered_slave_threshold')
print(
f"Autoscaling resource was created within last {CHECK_REGISTERED_SLAVE_THRESHOLD}"
f"Autoscaling resource was created within last {threshold}"
" seconds and would probably fail this check",
)
continue
Expand Down
63 changes: 40 additions & 23 deletions tests/autoscaling/test_autoscaling_cluster_lib.py
Expand Up @@ -31,6 +31,7 @@
from paasta_tools.autoscaling import autoscaling_cluster_lib
from paasta_tools.mesos_tools import SlaveTaskCount
from paasta_tools.metrics.metastatus_lib import ResourceInfo
from paasta_tools.utils import SystemPaastaConfig
from paasta_tools.utils import TimeoutError


Expand Down Expand Up @@ -498,22 +499,29 @@ def setUp(self):
self.autoscaler = self.create_autoscaler()

def create_autoscaler(self, utilization_error=0.3, resource=None, asg=None):
config = SystemPaastaConfig({'monitoring_config': {'check_registered_slave_threshold': 3600}}, '/etc/paasta')
with mock.patch(
'paasta_tools.autoscaling.autoscaling_cluster_lib.AsgAutoscaler.get_asg',
autospec=True,
return_value=asg or {},
):
autoscaler = autoscaling_cluster_lib.AsgAutoscaler(
resource=resource or self.mock_resource,
pool_settings=self.mock_pool_settings,
config_folder=self.mock_config_folder,
dry_run=False,
utilization_error=utilization_error,
max_increase=0.2,
max_decrease=0.1,
)
autoscaler.instances = []
return autoscaler
with mock.patch(
'paasta_tools.autoscaling.autoscaling_cluster_lib.load_system_paasta_config',
autospec=True,
return_value=config,
):
print(config.get_monitoring_config())
autoscaler = autoscaling_cluster_lib.AsgAutoscaler(
resource=resource or self.mock_resource,
pool_settings=self.mock_pool_settings,
config_folder=self.mock_config_folder,
dry_run=False,
utilization_error=utilization_error,
max_increase=0.2,
max_decrease=0.1,
)
autoscaler.instances = []
return autoscaler

def create_mock_resource(self, **kwargs):
mock_resource = self.mock_resource.copy()
Expand Down Expand Up @@ -542,7 +550,7 @@ def test_is_new_autoscaling_resource_when_asg_is_above_threshold(self):
asg = {
'Instances': [mock.Mock()],
'CreatedTime': datetime.now(timezone.utc) - timedelta(
seconds=autoscaling_cluster_lib.CHECK_REGISTERED_SLAVE_THRESHOLD + 60,
seconds=3600 + 60,
),
}
autoscaler = self.create_autoscaler(asg=asg)
Expand Down Expand Up @@ -764,16 +772,25 @@ def create_autoscaler(self, utilization_error=0.3, resource=None, sfr=None):
) as mock_get_spot_fleet_instances:
mock_get_sfr.return_value = sfr or {}
mock_get_spot_fleet_instances.return_value = []

return autoscaling_cluster_lib.SpotAutoscaler(
resource=resource or self.mock_resource,
pool_settings=self.mock_pool_settings,
config_folder=self.mock_config_folder,
dry_run=False,
utilization_error=utilization_error,
max_increase=0.2,
max_decrease=0.1,
)
with mock.patch(
'paasta_tools.autoscaling.autoscaling_cluster_lib.load_system_paasta_config',
autospec=True,
return_value=SystemPaastaConfig(
{
'monitoring_config': {'check_registered_slave_threshold': 3600},
}, '/etc/paasta',
),
):

return autoscaling_cluster_lib.SpotAutoscaler(
resource=resource or self.mock_resource,
pool_settings=self.mock_pool_settings,
config_folder=self.mock_config_folder,
dry_run=False,
utilization_error=utilization_error,
max_increase=0.2,
max_decrease=0.1,
)

def create_mock_resource(self, **kwargs):
mock_resource = self.mock_resource.copy()
Expand Down Expand Up @@ -881,7 +898,7 @@ def test_is_new_autoscaling_resource_when_sfr_is_above_threshold(self):
'SpotFleetRequestState': 'active',
'Instances': [mock.Mock()],
'CreateTime': datetime.now(timezone.utc) - timedelta(
seconds=autoscaling_cluster_lib.CHECK_REGISTERED_SLAVE_THRESHOLD + 60,
seconds=3600 + 60,
),
}
autoscaler = self.create_autoscaler(sfr=sfr)
Expand Down

0 comments on commit a601120

Please sign in to comment.