From e3f12bc82463a36132bad596fd2d7629e53576cc Mon Sep 17 00:00:00 2001 From: Jeremy Gibson Date: Tue, 4 Oct 2022 17:27:59 -0400 Subject: [PATCH 1/4] WIP: restore NR infra to the servers --- docs/maintenance.rst | 2 +- fabulaws/library/wsgiautoscale/api.py | 52 +- .../templates/newrelic_infra.yml | 927 ++++++++++++++++++ 3 files changed, 978 insertions(+), 3 deletions(-) create mode 100644 fabulaws/library/wsgiautoscale/templates/newrelic_infra.yml diff --git a/docs/maintenance.rst b/docs/maintenance.rst index 13a7b70..1c86877 100644 --- a/docs/maintenance.rst +++ b/docs/maintenance.rst @@ -87,7 +87,7 @@ On production the process is similarly, but the secrets should be updated with a full deployment, whenever possible, to avoid any unnecessary downtime:: fab production update_server_passwords - fab production upload_newrelic_sysmon_conf + fab production install_newrelic_infrastructure_agent fab production upload_newrelic_conf fab deploy_serial:myproject,production diff --git a/fabulaws/library/wsgiautoscale/api.py b/fabulaws/library/wsgiautoscale/api.py index 27837cb..418d9e0 100644 --- a/fabulaws/library/wsgiautoscale/api.py +++ b/fabulaws/library/wsgiautoscale/api.py @@ -503,7 +503,7 @@ def _new( avail_zone=None, count=1, terminate_on_failure=False, - **kwargs + **kwargs, ): """create new server on AWS using the given deployment, environment, and role""" if deployment not in env.deployments: @@ -554,7 +554,7 @@ def _new( volume_type=vol_type, deploy_user=env.deploy_user, security_groups=sec_grps, - **extra_args + **extra_args, ) try: server.setup() @@ -576,6 +576,7 @@ def _new( env.roledefs[role] = [server.hostname for server in servers] env.servers[role] = servers executel("update_server_passwords", hosts=env.roledefs[role]) + executel("install_newrelic_infrastructure_agent", hosts=env.roledefs[role]) executel("install_munin", hosts=env.roledefs[role]) if env.gelf_log_host: executel("install_logstash", hosts=env.roledefs[role]) @@ -1535,6 +1536,7 @@ def promote_replica(index=0, override_servers=None): if override_servers is None: override_servers = {} _setup_env(override_servers=override_servers) + executel("upload_newrelic_infrastructure_agent", roles=["db-master"]) if env.gelf_log_host: executel("install_logstash", roles=["db-primary"]) if env.syslog_server: @@ -1633,6 +1635,7 @@ def mount_encrypted(drive_letter="f"): ) if exists(current_server.default_swap_file): sudo("swapon %s" % current_server.default_swap_file) + upload_newrelic_infrastructure_conf() if "db-primary" in _current_roles() or "db-replica" in _current_roles(): sudo("service postgresql start") if "cache" in _current_roles(): @@ -1676,6 +1679,50 @@ def mount_encrypted(drive_letter="f"): sudo("service awslogs restart") +@task +@parallel +def install_newrelic_infrastructure_agent(): + require("environment", provided_by=env.environments) + release = run("cat /etc/lsb-release").strip() + sudo( + "curl -s https://download.newrelic.com/infrastructure_agent/gpg/newrelic-infra.gpg | apt-key add -", + shell=True, + ) + sudo( + f'printf "deb https://download.newrelic.com/infrastructure_agent/linux/apt/ {release} main" | tee -a /etc/apt/sources.list.d/newrelic-infra.list', + shell=True, + ) + with settings(warn_only=True): + sudo("apt-get -qq update || apt-get -qq update") + sudo("apt-get install newrelic-infra -y") + upload_newrelic_infrastructure_conf() + + +@task +@parallel +def upload_newrelic_infrastructure_conf(): + require("environment", provided_by=env.environments) + _load_passwords(["newrelic_license_key"]) + context = dict(env) + context["current_role"] = _current_roles()[0] + hostname = "_".join([_instance_name(_current_roles()[0]), run("hostname")]) + context["hostname"] = hostname + # main, official monitoring agent + template = "newrelic_infra.yml" + destination = f"/etc/{template}" + upload_template( + template, + destination, + context=context, + use_sudo=True, + use_jinja=True, + template_dir=env.templates_dir, + ) + # leave the hostname the same for the system monitoring so the servers + # can be linked up properly with the apps by New Relic + sudo("systemctl restart newrelic-infra") + + # TESTING and USAGE EXAMPLES @@ -1787,6 +1834,7 @@ def update_newrelic_keys(deployment_tag, environment): if answer != "y": abort("Aborted.") executel("upload_newrelic_conf") + executel("upload_newrelic_infrastructure_conf") executel("supervisor", "restart", "celery", roles=["worker"]) executel("begin_upgrade") executel("supervisor", "restart", "web", roles=["web"]) diff --git a/fabulaws/library/wsgiautoscale/templates/newrelic_infra.yml b/fabulaws/library/wsgiautoscale/templates/newrelic_infra.yml new file mode 100644 index 0000000..463da3a --- /dev/null +++ b/fabulaws/library/wsgiautoscale/templates/newrelic_infra.yml @@ -0,0 +1,927 @@ +# +# New Relic infrastructure agent configuration file +# +# This file overrides the agent defaults. Lines that begin with # are comments, +# which are ignored by the infrastructure agent. Uncomment the desired options +# to enable them. Restart the agent or the server after changing settings. +# +# To use this file, change its name to newrelic-infra.yml and move it to: +# - Linux: /etc/newrelic-infra.yml +# - Windows: C:\Program Files\New Relic\newrelic-infra\newrelic-infra.yml +# +# The infrastructure agent only requires the license key to be +# configured; the rest of the default values represent best practices. +# +# If options have command line equivalents, New Relic uses the command line +# options to override values set in this file. +# +# Environment variables (documented here as "Env var") always override the +# values set in the configuration file. We recommend setting any sensitive +# information through environment variables. +# +# For more information on each setting, see https://docs.newrelic.com/docs/infrastructure/install-configure-manage-infrastructure/configuration/infrastructure-configuration-settings +# + +# +# Option : license_key +# Value : 40-character hexadecimal string provided by New Relic. This is the +# only required value in your config file. +# Default: none +# +license_key: {{newrelic_license_key}} + +# +# Option : fedramp +# Value : true in case you want to use Fedramp endpoints. +# Default: false +# +#fedramp: false + + +# +# Option : payload_compression_level +# Env var : NRIA_PAYLOAD_COMPRESSION_LEVEL +# Value : Sets the compression level of the agent requests payload. +# Range : 0-9 +# Default : 6 +# +#payload_compression_level: 6 +# + +# +# Option : display_name +# Env var : NRIA_DISPLAY_NAME +# Value : Replaces the automatically generated hostname for +# reporting. +# Default : Automatically generated hostname +# Risk : Changing this value could create a different host entity, causing +# some alarms to trigger, since the previous host would appear +# disconnected. +#display_name: new_name +# + +# +# Option : passthrough_environment +# Env var : NRIA_PASSTHROUGH_ENVIRONMENT +# Value : A list of environment variables that will be passed to all +# integrations. If an integration already has an existing +# configuration option with the same name, the environment variable +# takes precedence. +# Default : Empty +# +#passthrough_environment: +# - HOST +# - PORT + +# +# Option : custom_attributes +# Env var : NRIA_CUSTOM_ATTRIBUTES +# Value : Use optional key-value pairs to build filter sets, group your +# results, annotate your data, etc. +# +#custom_attributes: + environment: {{ environment.title() }} + role: {{ current_role.title().replace('-','') }} + deployment: {{ deployment_tag.title().replace('-', '') }} +# service: login service +# team: alpha-team +# + +# +# Option : enable_process_metrics +# Env var : NRIA_ENABLE_PROCESS_METRICS +# Value : Enables the sending of process metrics to New Relic. If you +# want to send metric data about all the operating system's processes +# to New Relic, set enable_process_metrics to true. +# Tip : Sending all process data could increase the volume of data sent +# to New Relic. To fine-tune which processes you want to monitor, +# configure include_matching_metrics. +# +#enable_process_metrics: false +# + +# +# Option : include_matching_metrics +# Env var : NRIA_INCLUDE_MATCHING_METRICS +# Value : Use lists of metric attributes and values to only send to New Relic +# the metric data of matching entities. +# Note : Currently limited to process metrics (process.name and .executable). +# Tip : You can combine different attributes. +# +#include_matching_metrics: +# metric.attribute: +# - regex "pattern" +# - "string" +# - "string-with-wildcard*" +# + +# +# Option : log +# Env var : NRIA_LOG_FILE, NRIA_LOG_LEVEL, NRIA_LOG_FORMAT, NRIA_LOG_FORWARD, NRIA_LOG_STDOUT +# Value : Map configuration for the agent logging. The key-values can be any of the following: +# "file" Full path and file name of the log file. +# "format" Defines the log output format. Available values are text and json. One line per log entry. +# "level" Defines the log level (info, smart, debug, trace). +# "forward" Set to true to send logs to New Relic platform. +# "stdout" Set to false to disable logs in the standard output. +# "smart_level_entry_limit" Defines the number of entries that will be cached before being flushed. If smart level is enabled. +# "exclude_filters" A map to define the messages with a specific log field that must be excluded from the logs. +# "include_filters" A map to define the messages with a specific log field that must be included in the logs. +# If exclude_filters is set to wildcard. + +# Default : file: +# - Linux: /var/log/newrelic-infra/newrelic-infra.log +# - Windows: C:\Program Files\New Relic\newrelic-infra\newrelic-infra.log +# level: info +# format: text +# forward: false +# stdout: true +# smart_level_entry_limit: 1000 +# Risk : Providing a log file path that does not yet exist causes the agent +# to fail on startup. +# Tip : Use json format when forwarding the agent logs to New Relic logs for +# troubleshooting (verbose:3). +# Tip : Run the agent in debug mode only for troubleshooting. To disable +# debug logging, set the level to info and restart the agent. +# Tip : The following configuration will only log the entries with the field integration_name=nri-flex or integration_name=nri-powerdns +#log: +# file: /tmp/agent.log +# format: json +# level: smart +# forward: false +# stdout: false +# smart_level_entry_limit: 500 +# exclude_filters: +# "*": +# include_filters: +# integration_name: +# - nri-flex +# - nri-powerdns +# +# rotate: +# max_size_mb: 1000 +# max_files: 5 +# compression_enabled: true +# file_pattern: rotated.YYYY-MM-DD_hh-mm-ss.log + +# +# Option : network_interface_filters +# Env var : NRIA_NETWORK_INTERFACE_FILTERS +# Value : List of network interfaces to be filtered out. +# Default : Network interfaces that start with dummy, lo, vmnet, sit, tun, tap, +# or veth, or that contain tun or tap. +# Tip : Use the network interface filter configuration to hide network +# interfaces from the infrastructure agent. This helps reduce +# resource usage and noise in your data. +#network_interface_filters: +# prefix: +# - dummy +# - lo +# index-1: +# - tun +# + +# +# Option : disable_all_plugins +# Env var : NRIA_DISABLE_ALL_PLUGINS +# Value : To disable all the inventory, set to true. +# Default : false +# +#disable_all_plugins: false +# + +# +# Option : cloud_security_group_refresh_sec +# Env var : NRIA_CLOUD_SECURITY_GROUP_REFRESH_SEC +# Value : Sampling interval for CloudSecurityGroups plugin, in seconds. Set +# to -1 to disable it. Minimum value is 30. This plugin is activated +# only if the agent is running in an AWS instance. +# Default : 60 +# Tip : If not explicitly set in the config file, this option can be +# disabled by setting DisableAllPlugins to true. +#cloud_security_group_refresh_sec: 60 +# + +# +# Option : daemontools_interval_sec +# Env var : NRIA_DAEMONTOOLS_INTERVAL_SEC +# Value : Sampling interval for the daemontools plugin, in seconds. Set to -1 +# to disable it. Minimum value is 10. +# Default : 15 +# Tip : If not explicitly set in the config file, this option can be +# disabled by setting DisableAllPlugins to true. +# +#daemontools_interval_sec: 15 +# + +# +# Option : dpkg_interval_sec +# Env var : NRIA_DPKG_INTERVAL_SEC +# Value : Sampling interval for the dpkg plugin, in seconds. Set to -1 to +# disable it. Minimum value is 30. Only activated on Debian based +# distros in either root or privileged mode. +# Default : 30 +# Tip : If not explicitly set in the config file, this option can be +# disabled by setting DisableAllPlugins to true. +# +#dpkg_interval_sec: 30 +# + +# +# Option : facter_interval_sec +# Env var : NRIA_FACTER_INTERVAL_SEC +# Value : Sampling interval for the facter plugin, in seconds. Set to -1 +# to disable it. Minimum value is 30. +# Default : 30 +# Tip : If not explicitly set in the config file, this option can be +# disabled by setting DisableAllPlugins to true. +# +#facter_interval_sec: 30 +# + +# +# Option : kernel_modules_refresh_sec +# Env var : NRIA_KERNEL_MODULES_REFRESH_SEC +# Value : Sampling interval for the CloudSecurityGroups plugin, in seconds. +# Set to -1 to disable it. Minimum value is 10. This plugin can be +# activated only in root or privileged mode. +# Default : 10 +# Tip : If not explicitly set in the config file, this option can be +# disabled by setting DisableAllPlugins to true. +# +#kernel_modules_refresh_sec: 10 +# + +# +# Option : network_interface_interval_sec +# Env var : NRIA_NETWORK_INTERFACE_INTERVAL_SEC +# Value : Sampling interval for the NetworkInterface plugin, in seconds. Set +# to -1 to disable it. Minimum value is 10. +# Default : 60 +# Tip : If not explicitly set in the config file, this option can be +# disabled by setting DisableAllPlugins to true. +# +#network_interface_interval_sec: 60 +# + +# +# Option : rpm_interval_sec +# Env var : NRIA_RPM_INTERVAL_SEC +# Value : Sampling interval for the Rpm plugin, in seconds. Set to -1 +# to disable it. Minimum value is 30. Can be activated only for +# RedHat, RedHat AWS, and SuSE in root or privileged modes. +# Default : 30 +# Tip : If not explicitly set in the config file, this option can be +# disabled by setting DisableAllPlugins to true. +# +#rpm_interval_sec: 30 +# + +# +# Option : selinux_interval_sec +# Env var : NRIA_SELINUX_INTERVAL_SEC +# Value : Sampling interval for the SELinux plugin, in seconds. Set to -1 to +# disable it. Minimum value is 30. Can be activated only in root mode. +# Default : 30 +# Tip : If not explicitly set in the config file, this option can be +# disabled by setting DisableAllPlugins to true. +# +#selinux_interval_sec: 30 +# + +# +# Option : sshd_config_refresh_sec +# Env var : NRIA_SSHD_CONFIG_REFRESH_SEC +# Value : Sampling interval for the sshd plugin, in seconds. Set to -1 +# to disable it. Minimum value is 10. +# Default : 15 +# Tip : If not explicitly set in the config file, this option can be +# disabled by setting DisableAllPlugins to true. +# +#sshd_config_refresh_sec: 15 +# + +# +# Option : supervisor_interval_sec +# Env var : NRIA_SUPERVISOR_INTERVAL_SEC +# Value : Sampling interval for the Supervisor plugin, in seconds. Set to -1 +# to disable it. Minimum value is 10. +# Default : 15 +# Tip : If not explicitly set in the config file, this option can be +# disabled by setting DisableAllPlugins to true. +# +#supervisor_interval_sec: 15 +# + +# +# Option : sysctl_interval_sec +# Env var : NRIA_SYSCTL_INTERVAL_SEC +# Value : Sampling interval for the sysctl plugin, in seconds. Set to -1 +# to disable it. Minimum value is 30. Can only be activated in root +# or privileged modes. +# Default : 60 +# Tip : If not explicitly set in the config file, this option can be +# disabled by setting DisableAllPlugins to true. +# +#sysctl_interval_sec: 60 +# + +# +# Option : systemd_interval_sec +# Env var : NRIA_SYSTEMD_INTERVAL_SEC +# Value : Sampling interval for the systemd plugin, in seconds. Set to -1 +# to disable it. Minimum value is 10. +# Default : 30 +# Tip : If not explicitly set in the config file, this option can be +# disabled by setting DisableAllPlugins to true. +# +#systemd_interval_sec: 30 +# + +# +# Option : sysvinit_interval_sec +# Env var : NRIA_SYSVINIT_INTERVAL_SEC +# Value : Sampling interval for the SysV plugin, in seconds. Set to -1 +# to disable it. Minimum value is 10. Can only be activated in root +# or privileged modes. +# Default : 30 +# Tip : If not explicitly set in the config file, this option can be +# disabled by setting DisableAllPlugins to true. +# +#sysvinit_interval_sec: 30 +# + +# +# Option : upstart_interval_sec +# Env var : NRIA_UPSTART_INTERVAL_SEC +# Value : Sampling interval for the upstart plugin, in seconds. Set to -1 +# to disable it. Minimum value is 10. +# Default : 30 +# Tip : If not explicitly set in the config file, this option can be +# disabled by setting DisableAllPlugins to true. +# +#upstart_interval_sec: 30 +# + +# +# Option : users_refresh_sec +# Env var : NRIA_USERS_REFRESH_SEC +# Value : Sampling interval for the Users plugin, in seconds. Set to -1 +# to disable it. Minimum value is 10. +# Default : 15 +# Tip : If not explicitly set in the config file, this option can be +# disabled by setting DisableAllPlugins to true. +# +#users_refresh_sec: 15 +# + +# +# Option : windows_services_refresh_sec +# Env var : NRIA_WINDOWS_SERVICES_REFRESH_SEC +# Value : Sampling interval for the Windows services plugin, in seconds. Set +# to -1 to disable it. Minimum value is 10. +# Default : 30 +# Tip : If not explicitly set in the config file, this option can be +# disabled by setting DisableAllPlugins to true. +# +#windows_services_refresh_sec: 30 +# + +# +# Option : windows_updates_refresh_sec +# Env var : NRIA_WINDOWS_UPDATES_REFRESH_SEC +# Value : Sampling interval for the Windows Updates plugin, in seconds. Set +# to -1 to disable it. Minimum value is 10. +# Default : 60 +# Tip : If not explicitly set in the config file, this option can be +# disabled by setting DisableAllPlugins to true. +# +#windows_updates_refresh_sec: 60 +# + +# +# Option : metrics_network_sample_rate +# Env var : NRIA_METRICS_NETWORK_SAMPLE_RATE +# Value : Sampling interval of network samples, in seconds. Set to -1 +# to disable it. Minimum value is 10. +# Default : 10 +# +#metrics_network_sample_rate: 10 +# + +# +# Option : metrics_process_sample_rate +# Env var : NRIA_METRICS_PROCESS_SAMPLE_RATE +# Value : Sampling interval of system samples, in seconds. Set to -1 +# to disable it. Minimum value is 20. +# Default : 20 +# +#metrics_process_sample_rate: 20 +# + +# +# Option : metrics_storage_sample_rate +# Env var : NRIA_METRICS_STORAGE_SAMPLE_RATE +# Value : Sampling interval of storage samples, in seconds. Set to -1 +# to disable it. Minimum value is 5. +# Default : 20 +# +#metrics_storage_sample_rate: 20 +# + +# +# Option : metrics_system_sample_rate +# Env var : NRIA_METRICS_SYSTEM_SAMPLE_RATE +# Value : Sampling interval of system samples, in seconds. Set to -1 +# to disable it. Minimum value is 5. +# Default : 5 +# +#metrics_system_sample_rate: 5 +# + +# +# Option : selinux_enable_semodule +# Env var : NRIA_SELINUX_ENABLE_SEMODULE +# Value : Enable to retrieve the versions of policy modules installed using +# semodule. If disabled, the plugin only retrieves the status using +# sestatus. +# Default : true +# +#selinux_enable_semodule: true +# + +# +# Option : http_server_enabled +# Env var : NRIA_HTTP_SERVER_ENABLED +# Value : Enable to receive data from the New Relic StatsD backend +# (https://github.com/newrelic/statsd-infra-backend). The agent opens +# an HTTP port (by default, 8001) to receive the data. +# Default : false +# +#http_server_enabled: true +# + +# +# Option : http_server_host +# Env var : NRIA_HTTP_SERVER_HOST +# Value : The HTTP server used by the StatsD integration. +# Default : localhost +# +#http_server_host: localhost +# + +# +# Option : http_server_port +# Env var : NRIA_HTTP_SERVER_PORT +# Value : HTTP port of http_server_host used by the StatsD integration. +# Default : 8001 +# +#http_server_port: 8001 +# + +# +# Option : ca_bundle_dir +# Env var : NRIA_CA_BUNDLE_DIR +# Value : If the proxy config option references a proxy with self-signed +# certificates, this option lets you specify the certificate +# directory. The certificates in the directory must have the .pem +# extension. +# +#ca_bundle_dir: /etc/my-certificates +# + +# +# Option : ca_bundle_file +# Env var : NRIA_CA_BUNDLE_FILE +# Value : If the proxy config option references a proxy with self-signed +# certificates, this option lets you specify the certificate +# filename. +# +#ca_bundle_file: /etc/my-certificates/secureproxy.pem +# + +# +# Option : ignore_system_proxy +# Env var : NRIA_IGNORE_SYSTEM_PROXY +# Value : When ignore_system_proxy is set to true, the HTTPS_PROXY and +# HTTP_PROXY environment variables are ignored. +# Default : false +# Tip : Use this option when the agent connects directly to the New Relic +# metrics collector, bypassing the system proxy. +# +#ignore_system_proxy: false +# + +# +# Option : proxy +# Env var : NRIA_PROXY +# Value : The proxy URL. +# Default : none +# Tip : Useful if your firewall rules require the agent to use a +# proxy (HTTP or HTTPS) to communicate with New Relic. +# +#proxy: https://user:password@hostname:port +# + +# +# Option : proxy_validate_certificates +# Env var : NRIA_PROXY_VALIDATE_CERTIFICATES +# Value : Set to True to validate the proxy certificates (HTTPS connections). +# Certificates must have been issued by a valid Certificate Authority +# or defined in the ca_bundle_file or ca_bundle_dir properties. +# Default : false +# +#proxy_validate_certificates: false +# + +# +# Option : max_procs +# Env var : NRIA_MAX_PROCS +# Value : The number of logical processors available to the agent. Default is +# 1. When set to -1 the agent reads the GOMAXPROCS environment +# variable and defaults to the total number of available cores +# available in the host if the environment variable is not set. +# Default : 1 +# Tip : Increasing this value can help to distribute the load between different +# cores. +# +#max_procs: 1 +# + + +# +# Option : agent_dir +# Env var : NRIA_AGENT_DIR +# Value : Directory where the agent stores files such as cache, inventory, +# integrations, etc. +# Default : Linux: /var/db/newrelic-infra +# : Windows: C:\\Program Files\NewRelic\newrelic-infra\ +# +#agent_dir: +# + +# +# Option : plugin_dir +# Env var : NRIA_PLUGIN_DIR +# Value : Directory containing integrations’ configuration files. Each +# integration has its own configuration file, named +# -config.yml by default, and placed in a +# predefined location. +# Default : Linux: /etc/newrelic-infra/integrations.d/ +# : Windows: C:\Program Files\New Relic\newrelic-infra\integrations.d +# +#plugin_dir: +# + +# +# Option : entityname_integrations_v2_update +# Env var : NRIA_ENTITYNAME_INTEGRATIONS_V2_UPDATE +# Value : Set to True to enable automatic replacement of the +# loopback-addresses in entity names when using v2 of the integration +# protocol. +# Default : false +# Risk : Enabling this flag causes all integrations run by the agent using +# the v2 protocol to have their names replaced when carrying a local +# address. If this option is not set, services reporting from +# different machines may collide. +# +#entityname_integrations_v2_update: false +# + +# +# Option : pid_file +# Env var : NRIA_PID_FILE +# Value : Location of the pid file of the agent process on Linux. Used at +# startup to ensure that no other instances of the agent are running. +# Default : /var/run/newrelic-infra/newrelic-infra.pid +# Risk : If the agent detects that the pid file already exists at startup, +# the following error will be raised: "Existing pid-file, can't +# guarantee no other newrelic-infra agent is running". +# +#pid_file: +# + +# +# Option : app_data_dir +# Env var : NRIA_APP_DATA_DIR +# Value : Path to store cache data other than the program files directory. +# This setting is for Windows only. +# Default : Windows: %PROGRAMDATA%\New Relic\newrelic-infra +# : Linux: Not applicable +# +#app_data_dir: +# + +# +# Option : cloud_max_retry_count +# Env var : NRIA_CLOUD_MAX_RETRY_COUNT +# Value : The number of retries if cloud detection fails. If cloud +# detection fails during agent initialization, the agent retries +# after waiting for a number of seconds as defined in +# cloud_retry_backoff_sec. +# Default : 10 +# Info : When the agent runs in a cloud instance, it tries to detect the +# source for fetching metadata, such as: instanceID, instanceType, +# cloudSource, hostType. +# +#cloud_max_retry_count: 10 +# + +# +# Option : cloud_retry_backoff_sec +# Env var : NRIA_CLOUD_RETRY_BACKOFF_SEC +# Value : The delay, in seconds, between cloud detection retries if +# cloud detection failed. If cloud detection fails during +# initialization the agent retries as many times as defined in +# cloud_max_retry_count. +# Default : 60 +# +#cloud_retry_backoff_sec: 60 +# + +# +# Option : cloud_metadata_expiry_sec +# Env var : NRIA_CLOUD_METADATA_EXPIRY_SEC +# Value : The time interval for metadata expiration and re-fetching. +# Default : 300 +# Info : When the agent runs in a cloud instance, it tries to detect the +# source for fetching metadata, such as: instanceID, instanceType, +# cloudSource, hostType. +# +#cloud_metadata_expiry_sec: 300 +# + +# +# Option : disable_cloud_metadata +# Env var : NRIA_DISABLE_CLOUD_METADATA +# Value : Set to True to disable cloud metadata collection. +# Default : false +# Risk : Disabling cloud metadata could cause APM linkage to break if the +# hosts are allocated on a cloud provider. +# Info : When the agent runs in a cloud instance, it tries to detect the +# source for fetching metadata, such as: instanceID, instanceType, +# cloudSource, hostType. +# +#disable_cloud_metadata: false +# + +# +# Option : disable_cloud_instance_id +# Env var : NRIA_DISABLE_CLOUD_INSTANCE_ID +# Value : Set to True to disable cloud metadata collection for the hostalias +# plugin. +# Default : false +# +#disable_cloud_instance_id: false +# + +# +# Option : startup_connection_retries +# Env var : NRIA_STARTUP_CONNECTION_RETRIES +# Value : Number of times the agent retries the request for checking +# New Relic’s platform availability on startup before throwing an +# error. When set to a negative value, the agent keeps checking until +# the check succeeds. +# Default : 6 +# +#startup_connection_retries: 6 +# + +# +# Option : startup_connection_retry_time +# Env var : NRIA_STARTUP_CONNECTION_RETRY_TIME +# Value : Time to wait before the agent retries the request for checking New +# Relic’s platform availability at startup, in seconds. +# Default : 5s +# +#startup_connection_retry_time: 5s +# + +# +# Option : startup_connection_timeout +# Env var : NRIA_STARTUP_CONNECTION_TIMEOUT +# Value : Time to wait, in seconds, before expiring the check for New Relic’s +# platform availability made at startup. +# Default : 10s +# +#startup_connection_timeout: 10s +# + +# +# Option : container_cache_metadata_limit +# Env var : NRIA_CONTAINER_CACHE_METADATA_LIMIT +# Value : Time before cached containers metadata expires and must be fetched +# again, in seconds. +# Default : 60 +# +#container_cache_metadata_limit: 60 +# + +# +# Option : docker_api_version +# Env var : NRIA_DOCKER_API_VERSION +# Value : The Docker API version to use for the Docker client. +# Default : 1.24 +# +#docker_api_version: 1.24 +# + +# +# Option : custom_supported_file_systems +# Env var : NRIA_CUSTOM_SUPPORTED_FILESYSTEMS +# Value : List of filesystem types supported by the agent. This value should +# be a subset of the default list. Items not in the default list are +# discarded. +# Default : Linux: ["xfs", "btrfs", "ext", "ext2", "ext3", "ext4", "hfs", +# "vxfs"] +# : Windows: ["NTFS", "ReFS"] +# +#custom_supported_file_systems: +# - xfs +# - btrfs +# + +# +# Option : file_devices_ignored +# Env var : NRIA_FILE_DEVICES_IGNORED +# Value : List of storage devices to be ignored by the agent when gathering +# storage samples. +# Default : [] +# +#file_devices_ignored: +# - sda1 +# - sda2 +# + +# +# Option : ignored_inventory +# Env var : NRIA_IGNORED_INVENTORY +# Value : List of inventory paths to be ignored by the agent. +# Default : [] +# +#ignored_inventory: +# - files/config/stuff.bar +# - files/config/stuff.foo +# + +# +# Option : ignore_reclaimable +# Env var : NRIA_IGNORE_RECLAIMABLE +# Value : When True, the calculation of the host virtual memory considers +# SReclaimable as available memory; otherwise SReclaimable is +# considered part of the used memory. +# Default : false +# +#ignore_reclaimable: false +# + +# +# Option : supervisor_rpc_sock +# Env var : NRIA_SUPERVISOR_RPC_SOCK +# Value : Location of the supervisor (http://supervisord.org/) socket. +# Default : /var/run/supervisor.sock +# +#supervisor_rpc_sock: +# + +# +# Option : proxy_config_plugin +# Env var : NRIA_PROXY_CONFIG_PLUGIN +# Value : Sends the following proxy configuration information as inventory: +# HTTPS_PROXY, HTTP_PROXY, proxy, ca_bundle_dir, ca_bundle_file, +# ignore_system_proxy, proxy_validate_certificates. +# Default : true +# +#proxy_config_plugin: true +# + +# +# Option : facter_home_dir +# Env var : NRIA_FACTER_HOME_DIR +# Value : Sets the HOME environment variable for Facter. If unset, it +# defaults to the current user's home directory. +# Default : +# +#facter_home_dir: +# + +# +# Option : strip_command_line +# Env var : NRIA_STRIP_COMMAND_LINE +# Value : When true, the agent removes the command arguments from the +# commandLine attribute of ProcessSample. +# Default : true +# Risk : Disabling this option causes all the command line arguments passed +# to commands to be sent to, and stored by, New Relic. This might +# include usernames, passwords, API keys, etc. +# Tip : Use this as a security measure to prevent leaking sensitive +# information. +# +#strip_command_line: true +# + +# +# Option : dns_hostname_resolution +# Env var : NRIA_DNS_HOSTNAME_RESOLUTION +# Value : When true, the full hostname is resolved by performing a reverse +# lookup of the hosts address; otherwise, it’s retrieved using the +# hostname command on Linux, and from the TCP/IP Registry parameters +# on Windows. +# Default : true +# Risk : Changing this value could create a different host entity, causing +# some alarms to trigger, since the previous host would appear +# disconnected. +# +#dns_hostname_resolution: true +# + +# +# Option : override_hostname +# Env var : NRIA_OVERRIDE_HOSTNAME +# Value : Value to be reported for the full hostname; otherwise, the agent +# performs a standard lookup. +# Default : +# Risk : Changing this value could create a different host entity, causing +# some alarms to trigger, since the previous host would appear +# disconnected. +# +#override_hostname: custom.hostname.org +# + +# +# Option : override_hostname_short +# Env var : NRIA_OVERRIDE_HOSTNAME_SHORT +# Value : Value to be reported for the hostname; otherwise, the agent +# performs a standard lookup. +# Default : +# Risk : Changing this value could create a different host entity, causing +# some alarms to trigger, since the previous host would appear +# disconnected. +# +#override_hostname_short: custom-hostname +# + +# +# Option : remove_entities_period +# Env var : NRIA_REMOVE_ENTITIES_PERIOD +# Value : Frequency for engaging the process of deleting entities that +# haven't reported information during the defined time interval. +# Valid time units are: "s" (seconds), "m" (minutes), "h" (hours). +# Default : 48h +# +#remove_entities_period: 48h +# + +# +# Option : enable_win_update_plugin +# Env var : NRIA_ENABLE_WIN_UPDATE_PLUGIN +# Value : Enables the Windows Updates plugin, which retrieves the lists of +# hotfixes installed on the host. +# Default : false +# +#enable_win_update_plugin: false +# + +# +# Option : legacy_storage_sampler +# Env var : NRIA_LEGACY_STORAGE_SAMPLER +# Value : Set to True to force the agent to use windows WMI, the legacy +# method for collecting metrics on Windows (such as StorageSampler) +# instead of the PDH library. +# Default : Depending on the Windows version: +# : false for amd64 +# : true for 386 +# +#legacy_storage_sampler: false +# + +# +# Option : win_process_priority_class +# Env var : NRIA_WIN_PROCESS_PRIORITY_CLASS +# Value : Priority of the newrelic-infra.exe process. Possible values are: +# Normal, Idle, High, RealTime, BelowNormal, AboveNormal. +# Default : +# +#win_process_priority_class: Normal +# + +# +# Option : win_removable_drives +# Env var : NRIA_WIN_REMOVABLE_DRIVES +# Value : Enables the agent to report drives A: and B: when mapped to +# removable drives. +# Default : true +# +#win_removable_drives: true +# + +# +# Option : disable_zero_mem_process_filter +# Env var : NRIA_DISABLE_ZERO_MEM_PROCESS_FILTER +# Value : Set to True to let the ZeroRSSFilter exclude processes that are not +# using memory from being sampled. If disabled, the agent includes +# those processes in the ProcessSample. +# Default : false +# +#disable_zero_mem_process_filter: false +# From 89428880a52311b0d9f621a327ae91dcedd877c1 Mon Sep 17 00:00:00 2001 From: Jeremy Gibson Date: Thu, 6 Oct 2022 11:56:47 -0400 Subject: [PATCH 2/4] refs #sc-16888 Adds tasks to install new-relic infrastructure agent --- fabulaws/__init__.py | 2 +- fabulaws/library/wsgiautoscale/api.py | 38 ++++++++++++++----- ...{newrelic_infra.yml => newrelic-infra.yml} | 6 +-- 3 files changed, 32 insertions(+), 14 deletions(-) rename fabulaws/library/wsgiautoscale/templates/{newrelic_infra.yml => newrelic-infra.yml} (99%) diff --git a/fabulaws/__init__.py b/fabulaws/__init__.py index bd538f7..66c607f 100644 --- a/fabulaws/__init__.py +++ b/fabulaws/__init__.py @@ -1 +1 @@ -__version__ = "1.0.12" +__version__ = "1.0.13" diff --git a/fabulaws/library/wsgiautoscale/api.py b/fabulaws/library/wsgiautoscale/api.py index 418d9e0..b0914d5 100644 --- a/fabulaws/library/wsgiautoscale/api.py +++ b/fabulaws/library/wsgiautoscale/api.py @@ -8,6 +8,7 @@ import sys import time from getpass import getpass +from io import BytesIO from runpy import run_path from tempfile import mkstemp @@ -16,6 +17,7 @@ from boto.ec2.autoscale import AutoScaleConnection, LaunchConfiguration, Tag from boto.ec2.elb import ELBConnection from boto.exception import BotoServerError +from fabric import operations from fabric.api import ( abort, cd, @@ -55,6 +57,20 @@ logger.setLevel(logging.INFO) +def render_template(filename, context, template_dir): + # Pulled from Fabric's files.upload_template method. + from fabric.utils import apply_lcwd + + template_dir = apply_lcwd(template_dir, env) + from jinja2 import Environment, FileSystemLoader + + jenv = Environment( + loader=FileSystemLoader(template_dir), keep_trailing_newline=False + ) + text = jenv.get_template(filename).render(**context or {}) + return text + + def _reset_hosts(): """Reset the roledefs and servers environment variables to their default values.""" # roledefs must be defined, even with empty lists, for Fabric to run @@ -1683,13 +1699,13 @@ def mount_encrypted(drive_letter="f"): @parallel def install_newrelic_infrastructure_agent(): require("environment", provided_by=env.environments) - release = run("cat /etc/lsb-release").strip() + release = run("lsb_release -c -s").strip() sudo( "curl -s https://download.newrelic.com/infrastructure_agent/gpg/newrelic-infra.gpg | apt-key add -", shell=True, ) sudo( - f'printf "deb https://download.newrelic.com/infrastructure_agent/linux/apt/ {release} main" | tee -a /etc/apt/sources.list.d/newrelic-infra.list', + f'printf "deb https://download.newrelic.com/infrastructure_agent/linux/apt/ {release} main" | tee /etc/apt/sources.list.d/newrelic-infra.list', shell=True, ) with settings(warn_only=True): @@ -1708,15 +1724,17 @@ def upload_newrelic_infrastructure_conf(): hostname = "_".join([_instance_name(_current_roles()[0]), run("hostname")]) context["hostname"] = hostname # main, official monitoring agent - template = "newrelic_infra.yml" - destination = f"/etc/{template}" - upload_template( - template, - destination, - context=context, + + # This little bit of hodoo is required because of an error encounted when uploading the + # yaml with Fabric's upload_template, which seems to run into this particular paramiko issue. + # https://github.com/paramiko/paramiko/issues/1133 + template = render_template( + "newrelic-infra.yml", context=context, template_dir=env.templates_dir + ) + operations.put( + local_path=BytesIO(bytes(template, encoding="utf-8")), + remote_path="/etc/newrelic-infra.yml", use_sudo=True, - use_jinja=True, - template_dir=env.templates_dir, ) # leave the hostname the same for the system monitoring so the servers # can be linked up properly with the apps by New Relic diff --git a/fabulaws/library/wsgiautoscale/templates/newrelic_infra.yml b/fabulaws/library/wsgiautoscale/templates/newrelic-infra.yml similarity index 99% rename from fabulaws/library/wsgiautoscale/templates/newrelic_infra.yml rename to fabulaws/library/wsgiautoscale/templates/newrelic-infra.yml index 463da3a..803964b 100644 --- a/fabulaws/library/wsgiautoscale/templates/newrelic_infra.yml +++ b/fabulaws/library/wsgiautoscale/templates/newrelic-infra.yml @@ -79,10 +79,10 @@ license_key: {{newrelic_license_key}} # Value : Use optional key-value pairs to build filter sets, group your # results, annotate your data, etc. # -#custom_attributes: +custom_attributes: environment: {{ environment.title() }} - role: {{ current_role.title().replace('-','') }} - deployment: {{ deployment_tag.title().replace('-', '') }} + role: {{ current_role.title().replace("-","") }} + deployment: {{ deployment_tag.title().replace("-", "") }} # service: login service # team: alpha-team # From 0eb8787b69f28c6b124450aec3dd4359332ccb6f Mon Sep 17 00:00:00 2001 From: Jeremy Gibson Date: Thu, 6 Oct 2022 12:16:10 -0400 Subject: [PATCH 3/4] refs #16888 rename infra template to avoid yaml pre-commit failures --- fabulaws/library/wsgiautoscale/api.py | 2 +- .../{newrelic-infra.yml => newrelic-infra.yml_template} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename fabulaws/library/wsgiautoscale/templates/{newrelic-infra.yml => newrelic-infra.yml_template} (100%) diff --git a/fabulaws/library/wsgiautoscale/api.py b/fabulaws/library/wsgiautoscale/api.py index b0914d5..0218272 100644 --- a/fabulaws/library/wsgiautoscale/api.py +++ b/fabulaws/library/wsgiautoscale/api.py @@ -1729,7 +1729,7 @@ def upload_newrelic_infrastructure_conf(): # yaml with Fabric's upload_template, which seems to run into this particular paramiko issue. # https://github.com/paramiko/paramiko/issues/1133 template = render_template( - "newrelic-infra.yml", context=context, template_dir=env.templates_dir + "newrelic-infra.yml_template", context=context, template_dir=env.templates_dir ) operations.put( local_path=BytesIO(bytes(template, encoding="utf-8")), diff --git a/fabulaws/library/wsgiautoscale/templates/newrelic-infra.yml b/fabulaws/library/wsgiautoscale/templates/newrelic-infra.yml_template similarity index 100% rename from fabulaws/library/wsgiautoscale/templates/newrelic-infra.yml rename to fabulaws/library/wsgiautoscale/templates/newrelic-infra.yml_template From 5bd8d87605a9101c11fbd1510380748acdf2dc88 Mon Sep 17 00:00:00 2001 From: Jeremy Gibson Date: Fri, 7 Oct 2022 08:30:11 -0400 Subject: [PATCH 4/4] refs #16888 removes newrelic python-agent logging. --- fabulaws/library/wsgiautoscale/templates/newrelic.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fabulaws/library/wsgiautoscale/templates/newrelic.ini b/fabulaws/library/wsgiautoscale/templates/newrelic.ini index 6fce2ee..d6e58a6 100644 --- a/fabulaws/library/wsgiautoscale/templates/newrelic.ini +++ b/fabulaws/library/wsgiautoscale/templates/newrelic.ini @@ -52,7 +52,7 @@ monitor_mode = true # write out a log file, it is also possible to say "stderr" and # output to standard error output. This would normally result in # output appearing in your web server log. -log_file = %(log_dir)s/newrelic-python-agent-%(app_type)s.log +# log_file = %(log_dir)s/newrelic-python-agent-%(app_type)s.log # Sets the level of detail of messages sent to the log file, if # a log file location has been provided. Possible values, in @@ -63,7 +63,7 @@ log_file = %(log_dir)s/newrelic-python-agent-%(app_type)s.log # of information very quickly, so it is best not to keep the # agent at this level for longer than it takes to reproduce the # problem you are experiencing. -log_level = info +# log_level = info # The Python Agent communicates with the New Relic service using # SSL by default. Note that this does result in an increase in