diff --git a/Makefile b/Makefile index a9f5025..df6bb2d 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -ROOT_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) +ROOT_DIR:=$(dir $(realpath $(lastword $(MAKEFILE_LIST)))) all: hound @@ -8,14 +8,15 @@ hound: hound.go smtp.go alert.go alertscollection.go config.go fmt: go fmt *.go -run: hound - ./run.sh - test: go test . -coverage: +coverage: coverage.html + +coverage.out: go test . -coverprofile=coverage.out + +coverage.html: coverage.out go tool cover -html=coverage.out -o coverage.html build: @@ -23,3 +24,5 @@ build: push: build docker push ccnmtl/hound + +.PHONY: all fmt run test coverage build push diff --git a/README.md b/README.md index d0ede1e..cbbf933 100644 --- a/README.md +++ b/README.md @@ -12,15 +12,33 @@ minutes after that, one hour after that, 2 hours after that, 4 hours, 8 hours, then every 24 hours thereafter. Finally, you will get an email when the metric has recovered. +### Dependencies + +1. Obviously enough, hound need a running graphite server, accessible via + network. +2. In addition, an SMTP host is necessary (without authentication or + encryption) to send the emails out. + +### Usage + +1. Edit the file `run.sh` with the correct values for your use. +2. run `./run.sh` (preferrably in a detachable session or with `nohup`) + ### Configuration -* `GraphiteBase`, `EmailFrom`, and `EmailTo` should all be obvious +The main configuration happens (as noted above) in `run.sh`. + * `CheckInterval` is how many minutes to wait between checks -* `GlobalThrottle` is the maximum number of alerts that Hound will - send in a cycle. Ie, if there's a major network outage and all the - metrics start failing, you want to stop it once you've figured that - out. Once this threshold is passed, Hound sends just one more message - saying how many metrics are failing. +* `GlobalThrottle` is the maximum number of alerts that Hound will send in a + cycle. Ie, if there's a major network outage and all the metrics start + failing, you want to stop it once you've figured that out. Once this + threshold is passed, Hound sends just one more message saying how many + metrics are failing. + +The rest of the values in this file should be self-explanatory. + +The alerts configuration is set in `config.json` (by default - it is passed as +an argument to `hound` in `run.sh`). Each Alert has: diff --git a/run.sh b/run.sh index 8f79c1a..2d4bdd9 100755 --- a/run.sh +++ b/run.sh @@ -1,17 +1,23 @@ -#!/bin/bash +#!/bin/sh -export HOUND_GRAPHITE_BASE="http://nanny.cul.columbia.edu/render/" -export HOUND_CARBON_BASE="nanny.cul.columbia.edu:2003" -export HOUND_METRIC_BASE="ccnmtl.app.gauges.hounddev." -export HOUND_EMAIL_FROM="hound@thraxil.org" -export HOUND_EMAIL_TO="anders@columbia.edu" -export HOUND_CHECK_INTERVAL=1 -export HOUND_GLOBAL_THROTTLE=10 -export HOUND_HTTP_PORT=9998 -export HOUND_TEMPLATE_FILE="index.html" -export HOUND_EMAIL_ON_ERROR=false -export HOUND_SMTP_SERVER=postgres -export HOUND_SMTP_PORT=25 -export HOUND_LOG_LEVEL=DEBUG +set -e -./hound -config=config.json +make hound + +echo "Running hound..." + +HOUND_GRAPHITE_BASE="http://nanny.cul.columbia.edu/render/" \ +HOUND_CARBON_BASE="nanny.cul.columbia.edu:2003" \ +HOUND_METRIC_BASE="ccnmtl.app.gauges.hounddev." \ +HOUND_EMAIL_FROM="hound@thraxil.org" \ +HOUND_EMAIL_TO="anders@columbia.edu" \ +HOUND_CHECK_INTERVAL=1 \ +HOUND_GLOBAL_THROTTLE=10 \ +HOUND_HTTP_PORT=9998 \ +HOUND_TEMPLATE_FILE="index.html" \ +HOUND_EMAIL_ON_ERROR=false \ +HOUND_SMTP_SERVER=postgres \ +HOUND_SMTP_PORT=25 \ +HOUND_LOG_LEVEL=DEBUG \ +\ +>hound.out 2>hound.err