This repository has been archived by the owner on Jan 21, 2022. It is now read-only.
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This is a complete re-write of the vcap/health_manager component. It seeks to be 100% backward compatible behaviorally. See README.md file for details. Change-Id: I892fa1532955431c11f2cdfda8d4d8b6f9d2728d
- Loading branch information
Bob Nugmanov
committed
Apr 20, 2012
1 parent
861f58f
commit 0e2bf2e
Showing
30 changed files
with
2,410 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
/vendor/cache/* | ||
*~ | ||
.idea | ||
\#*\# | ||
.\#* | ||
.bundle | ||
bundler | ||
spec_reports | ||
spec_coverage | ||
ci-artifacts-dir | ||
ci-working-dir | ||
*.rbc | ||
*.swp | ||
.rvmrc | ||
*.pid |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
source "http://rubygems.org" | ||
|
||
|
||
gem "rake" | ||
gem "rspec" | ||
|
||
gem "vcap_common", ">= 1.0.8" | ||
gem "vcap_logging" | ||
|
||
gem 'eventmachine', :git => 'git://github.com/cloudfoundry/eventmachine.git', :branch => 'release-0.12.11-cf' | ||
gem "yajl-ruby" | ||
gem "rest-client" | ||
gem "em-http-request" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
GIT | ||
remote: git://github.com/cloudfoundry/eventmachine.git | ||
revision: 2806c630d8631d5dcf9fb2555f665b829052aabe | ||
branch: release-0.12.11-cf | ||
specs: | ||
eventmachine (0.12.11.cloudfoundry.3) | ||
|
||
GEM | ||
remote: http://rubygems.org/ | ||
specs: | ||
addressable (2.2.7) | ||
daemons (1.1.8) | ||
diff-lcs (1.1.3) | ||
em-http-request (0.3.0) | ||
addressable (>= 2.0.0) | ||
escape_utils | ||
eventmachine (>= 0.12.9) | ||
escape_utils (0.2.4) | ||
json_pure (1.6.6) | ||
mime-types (1.18) | ||
nats (0.4.22) | ||
daemons (>= 1.1.4) | ||
eventmachine (>= 0.12.10) | ||
json_pure (>= 1.6.1) | ||
thin (>= 1.3.1) | ||
posix-spawn (0.3.6) | ||
rack (1.4.1) | ||
rake (0.9.2.2) | ||
rest-client (1.6.7) | ||
mime-types (>= 1.16) | ||
rspec (2.9.0) | ||
rspec-core (~> 2.9.0) | ||
rspec-expectations (~> 2.9.0) | ||
rspec-mocks (~> 2.9.0) | ||
rspec-core (2.9.0) | ||
rspec-expectations (2.9.1) | ||
diff-lcs (~> 1.1.3) | ||
rspec-mocks (2.9.0) | ||
thin (1.3.1) | ||
daemons (>= 1.0.9) | ||
eventmachine (>= 0.12.6) | ||
rack (>= 1.0.0) | ||
vcap_common (1.0.10) | ||
eventmachine (~> 0.12.11.cloudfoundry.3) | ||
nats (~> 0.4.22.beta.8) | ||
posix-spawn (~> 0.3.6) | ||
thin (~> 1.3.1) | ||
yajl-ruby (~> 0.8.3) | ||
vcap_logging (0.1.4) | ||
rake | ||
yajl-ruby (0.8.3) | ||
|
||
PLATFORMS | ||
ruby | ||
|
||
DEPENDENCIES | ||
em-http-request | ||
eventmachine! | ||
rake | ||
rest-client | ||
rspec | ||
vcap_common (>= 1.0.8) | ||
vcap_logging | ||
yajl-ruby |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
require "rspec/core/rake_task" | ||
require "rspec/core/version" | ||
|
||
desc "Run all examples" | ||
RSpec::Core::RakeTask.new(:spec) do |t| | ||
t.rspec_opts = %w[--color --format documentation] | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#!/usr/bin/env ruby | ||
home = File.join(File.dirname(__FILE__),'..') | ||
ENV['BUNDLE_GEMFILE'] = "#{home}/Gemfile" | ||
|
||
require 'rubygems' | ||
require 'bundler/setup' | ||
require File.join(home, 'lib','health_manager') | ||
|
||
trap('INT') { NATS.stop { EM.stop }} | ||
trap('SIGTERM') { NATS.stop { EM.stop }} | ||
|
||
|
||
EM::run { | ||
|
||
NATS.start :uri => ENV['NATS_URI'] || 'nats://nats:nats@192.168.24.128:4222' do | ||
config = { | ||
'bulk' => {'host'=> ENV['BULK_URL'] || 'api.vcap.me', 'batch_size' => '2'}, | ||
} | ||
VCAP::Logging.setup_from_config({'level'=>ENV['LOG_LEVEL'] || 'debug'}) | ||
|
||
prov = HealthManager::BulkBasedExpectedStateProvider.new(config) | ||
prov.each_droplet do |id, droplet| | ||
puts "Droplet #{id}:" | ||
puts droplet.inspect | ||
end | ||
EM.add_timer(5) { EM.stop { NATS.stop } } | ||
end | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#!/usr/bin/env ruby | ||
home = File.join(File.dirname(__FILE__),'..') | ||
|
||
ENV['BUNDLE_GEMFILE'] = "#{home}/Gemfile" | ||
require 'bundler/setup' | ||
|
||
require File.join(home, 'lib','health_manager') | ||
|
||
hm = HealthManager::Manager.new() | ||
|
||
trap('INT') { hm.shutdown } | ||
trap('SIGTERM') { hm.shutdown } | ||
|
||
hm.start |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
--- | ||
# Local_route is the IP address of a well known server on your network, it | ||
# is used to choose the right ip address (think of hosts that have multiple nics | ||
# and IP addresses assigned to them) of the host running the Health Manager. Default | ||
# value of nil, should work in most cases. | ||
# local_route: 127.0.0.1 | ||
|
||
# NATS message bus URI | ||
mbus: nats://nats:nats@192.168.24.128:4222/ | ||
logging: | ||
level: warn | ||
pid: /var/vcap/sys/run/healthmanager.pid | ||
|
||
queue_batch_size: 10 | ||
|
||
intervals: | ||
# Interval for collecting statistics about this cloudfoundry instance. | ||
# Amongst other things, data collected includes number of users, number of | ||
# applications and memory usage. | ||
database_scan: 10 | ||
# Time to wait before starting analysis for stopped applications. | ||
droplet_lost: 30 | ||
# Interval between scans for analysis of applications. | ||
droplets_analysis: 5 | ||
# An application is deemed to be flapping if it is found to be in a crashed | ||
# state (after a restart following every crash) for more than "flapping_death" | ||
# number of times in an interval that is "flapping_timeout" long. | ||
flapping_death: 2 | ||
flapping_timeout: 180 | ||
# Time to wait before trying to restart an application after a crash is | ||
# detected | ||
restart_timeout: 20 | ||
# Time to wait before analyzing the state of an application that has been | ||
# started/restarted | ||
stable_state: 60 | ||
|
||
#number of start requests send each second (subject to EM timer limitations) | ||
#default value is 50. | ||
dequeueing_rate: 50 | ||
|
||
# Used for /healthz and /vars endpoints. If not provided random | ||
# values will be generated on component start. Uncomment to use | ||
# static values. | ||
status: | ||
port: 54321 | ||
user: thin | ||
password: thin |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
# HealthManager 2.0. (c) 2011-2012 VMware, Inc. | ||
$:.unshift(File.dirname(__FILE__)) | ||
|
||
require 'yaml' | ||
require 'yajl' | ||
require 'optparse' | ||
require 'time' | ||
require 'nats/client' | ||
|
||
require 'vcap/common' | ||
require 'vcap/component' | ||
require 'vcap/logging' | ||
require 'vcap/priority_queue' | ||
|
||
require 'health_manager/constants' | ||
require 'health_manager/common' | ||
require 'health_manager/app_state' | ||
require 'health_manager/app_state_provider' | ||
require 'health_manager/nats_based_known_state_provider' | ||
require 'health_manager/bulk_based_expected_state_provider' | ||
require 'health_manager/scheduler' | ||
require 'health_manager/nudger' | ||
require 'health_manager/harmonizer' | ||
require 'health_manager/varz_common' | ||
require 'health_manager/varz' | ||
require 'health_manager/reporter' | ||
|
||
module HealthManager | ||
class Manager | ||
include HealthManager::Common | ||
#primarily for testing | ||
attr_reader :scheduler | ||
attr_reader :known_state_provider | ||
attr_reader :expected_state_provider | ||
|
||
def initialize(config={}) | ||
args = parse_args | ||
@config = read_config_from_file(args[:config_file]).merge(config) | ||
|
||
@logging_config = @config['logging'] | ||
@logging_config = {'level' => ENV['LOG_LEVEL']} if ENV['LOG_LEVEL'] #ENV override | ||
@logging_config ||= {'level' => 'info'} #fallback value | ||
|
||
VCAP::Logging.setup_from_config(@logging_config) | ||
|
||
logger.info("HealthManager: initializing") | ||
|
||
@varz = Varz.new(@config) | ||
@reporter = Reporter.new(@config) | ||
@scheduler = Scheduler.new(@config) | ||
@known_state_provider = AppStateProvider.get_known_state_provider(@config) | ||
@expected_state_provider = AppStateProvider.get_expected_state_provider(@config) | ||
@nudger = Nudger.new(@config) | ||
@harmonizer = Harmonizer.new(@config) | ||
|
||
register_hm_components | ||
end | ||
|
||
def register_as_vcap_component | ||
|
||
logger.info("registering VCAP component") | ||
logger.debug("config: #{sanitized_config}") | ||
|
||
status_config = @config['status'] || {} | ||
VCAP::Component.register(:type => 'HealthManager', | ||
:host => VCAP.local_ip(@config['local_route']), | ||
:index => @config['index'], | ||
:config => sanitized_config, | ||
:port => status_config['port'], | ||
:user => status_config['user'], | ||
:password => status_config['password']) | ||
|
||
end | ||
|
||
def create_pid_file | ||
@pid_file = @config['pid'] | ||
begin | ||
FileUtils.mkdir_p(File.dirname(@pid_file)) | ||
rescue => e | ||
logger.fatal("Can't create pid directory, exiting: #{e}") | ||
end | ||
File.open(@pid_file, 'wb') { |f| f.puts "#{Process.pid}" } | ||
logger.debug("pid file written: #{@pid_file}") | ||
end | ||
|
||
def start | ||
logger.info("starting...") | ||
|
||
EM.epoll | ||
NATS.start :uri => get_nats_uri do | ||
@varz.prepare | ||
@reporter.prepare | ||
@harmonizer.prepare | ||
@expected_state_provider.start | ||
@known_state_provider.start | ||
|
||
unless ENV[HM_SHADOW]=='false' | ||
logger.info("creating Shadower") | ||
@shadower = Shadower.new(@config) | ||
@shadower.subscribe | ||
end | ||
|
||
register_as_vcap_component | ||
create_pid_file if @config['pid'] | ||
|
||
@scheduler.start #blocking call | ||
end | ||
end | ||
|
||
def shutdown | ||
logger.info("shutting down...") | ||
NATS.stop { EM.stop } | ||
logger.info("...good bye.") | ||
end | ||
|
||
def read_config_from_file(config_file) | ||
config_path = ENV['CLOUD_FOUNDRY_CONFIG_PATH'] || File.join(File.dirname(__FILE__),'../config') | ||
config_file ||= File.join(config_path, 'health_manager.yml') | ||
begin | ||
config = YAML.load_file(config_file) | ||
rescue => e | ||
$stderr.puts "Could not read configuration file #{config_file}: #{e}" | ||
exit 1 | ||
end | ||
config | ||
end | ||
|
||
def get_nats_uri | ||
ENV[NATS_URI] || @config['mbus'] | ||
end | ||
|
||
def self.now | ||
Time.now.to_i | ||
end | ||
end | ||
end |
Oops, something went wrong.