Skip to content

Commit

Permalink
T35 Issue #108 - Swarm com monitoramento
Browse files Browse the repository at this point in the history
  • Loading branch information
Rdadamos committed Jun 24, 2019
1 parent 6874802 commit ae1be1e
Show file tree
Hide file tree
Showing 8 changed files with 167 additions and 10 deletions.
6 changes: 3 additions & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ script:
- docker-compose run web3 coverage run --omit="*/migrations/*,manage.py" manage.py test
- docker-compose run web3 coverage report
- cd front-end/pret-event && docker build -f Dockerfile-prod -t pretevent/app . && cd ../..
- zip -r latest appspec.yml prod-update.sh docker-compose-prod.yml
- mkdir -p prod_scripts
- mv latest.zip prod_scripts/latest.zip
- zip -r latest appspec.yml prod-update.sh docker-compose.prod.yml monitoring/
- mkdir prod
- mv latest.zip prod/latest.zip
after_script:
- coverage combine --append --rcfile=.coveragerc-mc1 service/mc1/.coverage
- coverage combine --append --rcfile=.coveragerc-mc2 service/mc2/.coverage
Expand Down
6 changes: 0 additions & 6 deletions docker-compose-prod.yml → docker-compose.prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,6 @@ version: '3'
services:
db:
image: postgres
environment:
POSTGRES_PASSWORD: "${POSTGRES_PASSWORD}"
POSTGRES_DB: "${POSTGRES_DB}"
POSTGRES_USER: "${POSTGRES_USER}"
ports:
- 5000:5432
app:
image: pretevent/app
ports:
Expand Down
10 changes: 10 additions & 0 deletions monitoring/conf/alertmanager/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
route:
receiver: 'slack'

receivers:
- name: 'slack'
slack_configs:
- send_resolved: true
username: 'Rodrigo Dadamos'
channel: '#swarm'
api_url: 'https://hooks.slack.com/services/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'
40 changes: 40 additions & 0 deletions monitoring/conf/prometheus/alert.rules
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
groups:
- name: targets
rules:
- alert: service_down
expr: up == 0
for: 30s
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 30 seconds."

- name: host
rules:
- alert: high_cpu_load
expr: node_load1 > 1.5
for: 30s
labels:
severity: warning
annotations:
summary: "Server under high load"
description: "Docker host is under high load, the avg load 1m is at {{ $value}}. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}."

- alert: high_memory_load
expr: (sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100 > 85
for: 30s
labels:
severity: warning
annotations:
summary: "Server memory is almost full"
description: "Docker host memory usage is {{ humanize $value}}%. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}."

- alert: high_storage_load
expr: (node_filesystem_size_bytes{fstype="aufs"} - node_filesystem_free_bytes{fstype="aufs"}) / node_filesystem_size_bytes{fstype="aufs"} * 100 > 85
for: 30s
labels:
severity: warning
annotations:
summary: "Server storage is almost full"
description: "Docker host storage usage is {{ humanize $value}}%. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}."
39 changes: 39 additions & 0 deletions monitoring/conf/prometheus/prometheus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
global:
scrape_interval: 10s
evaluation_interval: 10s
external_labels:
monitor: 'monitoring'
rule_files:
- 'alert.rules'
alerting:
alertmanagers:
- scheme: http
static_configs:
- targets:
- "alertmanager:9093"

scrape_configs:
- job_name: 'prometheus'
scrape_interval: 5s
static_configs:
- targets: ['localhost:9090']
- job_name: 'node-exporter'
dns_sd_configs:
- names:
- 'tasks.node-exporter'
type: 'A'
port: 9100
- job_name: 'cadvisor'
dns_sd_configs:
- names:
- 'tasks.cadvisor'
type: 'A'
port: 8080
- job_name: 'netdata'
metrics_path: '/api/v1/allmetrics'
params:
format: [prometheus]
honor_labels: true
scrape_interval: 5s
static_configs:
- targets: ['pretevent.cf:19999']
70 changes: 70 additions & 0 deletions monitoring/docker-compose.monitoring.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
version: '3.3'

services:

prometheus:
image: linuxtips/prometheus_alpine
volumes:
- ./conf/prometheus/:/etc/prometheus/
- prometheus_d:/var/lib/prometheus
networks:
- backend
ports:
- 9090:9090

node-exporter:
image: linuxtips/node-exporter_alpine
hostname: '{{.Node.ID}}'
volumes:
- /proc:/usr/proc
- /sys:/usr/sys
- /:/rootfs
networks:
- backend
ports:
- 9100:9100

alertmanager:
image: linuxtips/alertmanager_alpine
volumes:
- ./conf/alertmanager/:/etc/alertmanager/
networks:
- backend
ports:
- 9093:9093

cadvisor:
image: google/cadvisor
hostname: '{{.Node.ID}}'
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
networks:
- backend
ports:
- 8080:8080

grafana:
image: grafana/grafana
depends_on:
- prometheus
volumes:
- grafana_d:/var/lib/grafana
env_file:
- grafana.config
networks:
- backend
- frontend
ports:
- 3000:3000

networks:
frontend:
backend:

volumes:
prometheus_d:
grafana_d:
2 changes: 2 additions & 0 deletions monitoring/grafana.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
GF_SECURITY_ADMIN_PASSWORD=q1w2e3
GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-piechart-panel,camptocamp-prometheus-alertmanager-datasource,vonage-status-panel
4 changes: 3 additions & 1 deletion prod-update.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#!/bin/bash

cd /home/ubuntu/
docker stack deploy -c docker-compose-prod.yml pretevent
docker stack deploy -c docker-compose.pretevent.yml pretevent
cd monitoring/
docker stack deploy -c docker-compose.monitoring.yml monitoring

0 comments on commit ae1be1e

Please sign in to comment.