Skip to content

Commit

Permalink
admin server health check
Browse files Browse the repository at this point in the history
  • Loading branch information
lepdou committed May 18, 2016
1 parent fae4343 commit 56c36b7
Show file tree
Hide file tree
Showing 9 changed files with 161 additions and 21 deletions.
Expand Up @@ -3,6 +3,6 @@
/**
* @author Jason Song(song_s@ctrip.com)
*/
public enum Env {
public enum Env{
LOCAL, DEV, FWS, FAT, UAT, LPT, PRO, TOOLS
}
@@ -1,35 +1,165 @@
package com.ctrip.apollo.portal;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

import javax.annotation.PostConstruct;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.actuate.health.Health;
import org.springframework.context.ApplicationContext;
import org.springframework.stereotype.Component;

import com.ctrip.apollo.core.enums.Env;
import com.ctrip.apollo.portal.api.AdminServiceAPI;

@Component
public class PortalSettings {

private Logger logger = LoggerFactory.getLogger(PortalSettings.class);

private static final int HEALTH_CHECK_INTERVAL = 5000;

@Value("#{'${apollo.portal.env}'.split(',')}")
private List<String> env;
private List<String> allStrEnvs;

@Autowired
ApplicationContext applicationContext;

private List<Env> allEnvs = new ArrayList<Env>();

private volatile boolean updatedFromLastHealthCheck = true;

private List<Env> envs = new ArrayList<Env>();
//for cache
private List<Env> activeEnvs = new LinkedList<>();

//mark env up or down
private Map<Env, Boolean> envStatusMark = new ConcurrentHashMap<>();

private ScheduledExecutorService healthCheckService;

private Lock lock = new ReentrantLock();

@PostConstruct
private void postConstruct() {
for (String e : env) {
envs.add(Env.valueOf(e.toUpperCase()));
//init origin config envs
for (String e : allStrEnvs) {
allEnvs.add(Env.valueOf(e.toUpperCase()));
}

for (Env env : allEnvs) {
envStatusMark.put(env, true);
}

healthCheckService = Executors.newScheduledThreadPool(1);

healthCheckService
.scheduleWithFixedDelay(new HealthCheckTask(applicationContext), 1000, HEALTH_CHECK_INTERVAL,
TimeUnit.MILLISECONDS);

}

public List<Env> getEnvs() {
public List<Env> getActiveEnvs() {
if (updatedFromLastHealthCheck) {
lock.lock();
//maybe refresh many times but not create a bad impression.
activeEnvs = refreshActiveEnvs();
lock.unlock();
}
return activeEnvs;
}

private List<Env> refreshActiveEnvs() {
List<Env> envs = new LinkedList<>();
for (Env env : allEnvs) {
if (envStatusMark.get(env)) {
envs.add(env);
}
}
logger.info("refresh active envs");
return envs;
}

public Env getFirstEnv(){
return envs.get(0);
public Env getFirstAliveEnv() {
return activeEnvs.get(0);
}


class HealthCheckTask implements Runnable {

private static final int ENV_DIED_THREADHOLD = 2;

private Map<Env, Long> healthCheckFailCnt = new HashMap<>();

private AdminServiceAPI.HealthAPI healthAPI;

public HealthCheckTask(ApplicationContext context) {
healthAPI = context.getBean(AdminServiceAPI.HealthAPI.class);
for (Env env : allEnvs) {
healthCheckFailCnt.put(env, 0l);
}
}

public void run() {
logger.info("admin server health check start...");
boolean hasUpdateStatus = false;

for (Env env : allEnvs) {
try {
if (isUp(env)) {
//revive
if (!envStatusMark.get(env)) {
envStatusMark.put(env, true);
healthCheckFailCnt.put(env, 0l);
hasUpdateStatus = true;
logger.info("env up again [env:{}]", env);
}
} else {
//maybe meta server up but admin server down
handleEnvDown(env);
hasUpdateStatus = true;
}

} catch (Exception e) {
//maybe meta server down
logger.warn("health check fail. [env:{}]", env, e.getMessage());
handleEnvDown(env);
hasUpdateStatus = true;
}
}

if (!hasUpdateStatus) {
logger.info("admin server health check OK");
}
updatedFromLastHealthCheck = hasUpdateStatus;
}

private boolean isUp(Env env) {
Health health = healthAPI.health(env);
return "UP".equals(health.getStatus().getCode());
}

private void handleEnvDown(Env env) {
long failCnt = healthCheckFailCnt.get(env);
healthCheckFailCnt.put(env, ++failCnt);

if (failCnt >= ENV_DIED_THREADHOLD) {
envStatusMark.put(env, false);
logger.error("env down [env:{}]", env);
}
}

}
}
Expand Up @@ -10,6 +10,7 @@
import com.ctrip.apollo.core.dto.NamespaceDTO;
import com.ctrip.apollo.core.dto.ReleaseDTO;

import org.springframework.boot.actuate.health.Health;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
Expand All @@ -25,6 +26,14 @@
@Service
public class AdminServiceAPI {

@Service
public static class HealthAPI extends API{

public Health health(Env env){
return restTemplate.getForObject(getAdminServiceHost(env) + "/health", Health.class);
}
}

@Service
public static class AppAPI extends API {

Expand Down
Expand Up @@ -47,7 +47,7 @@ public MultiResponseEntity<EnvClusterInfo> nav(@PathVariable String appId) {
throw new BadRequestException("app id can not be empty.");
}
MultiResponseEntity<EnvClusterInfo> response = MultiResponseEntity.ok();
List<Env> envs = portalSettings.getEnvs();
List<Env> envs = portalSettings.getActiveEnvs();
for (Env env : envs) {
try {
response.addResponseEntity(RichResponseEntity.ok(appService.createEnvNavNode(env, appId)));
Expand Down Expand Up @@ -84,7 +84,7 @@ public AppDTO load(@PathVariable String appId){
@RequestMapping(value = "/{appId}/miss_envs")
public MultiResponseEntity<Env> findMissEnvs(@PathVariable String appId) {
MultiResponseEntity<Env> response = MultiResponseEntity.ok();
for (Env env : portalSettings.getEnvs()) {
for (Env env : portalSettings.getActiveEnvs()) {
try {
appService.load(env, appId);
} catch (Exception e) {
Expand Down
Expand Up @@ -19,7 +19,7 @@ public class PortalEnvController {

@RequestMapping(value = "", method = RequestMethod.GET)
public List<Env> envs(){
return portalSettings.getEnvs();
return portalSettings.getActiveEnvs();
}

}
Expand Up @@ -41,7 +41,7 @@ public AppDTO load(String appId) {
//轮询环境直到能找到此app的信息
AppDTO app = null;
boolean isCallAdminServiceError = false;
for (Env env : portalSettings.getEnvs()) {
for (Env env : portalSettings.getActiveEnvs()) {
try {
app = appAPI.loadApp(env, appId);
break;
Expand Down Expand Up @@ -71,7 +71,7 @@ public AppDTO load(Env env, String appId){
}

public void createAppInAllEnvs(AppDTO app) {
List<Env> envs = portalSettings.getEnvs();
List<Env> envs = portalSettings.getActiveEnvs();
for (Env env : envs) {
try {
appAPI.createApp(env, app);
Expand Down
@@ -1,8 +1,5 @@
package com.ctrip.apollo.portal.service;

import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -12,21 +9,25 @@
import org.springframework.web.client.HttpClientErrorException;

import com.ctrip.apollo.common.utils.BeanUtils;
import com.ctrip.apollo.core.enums.Env;
import com.ctrip.apollo.core.dto.ItemChangeSets;
import com.ctrip.apollo.core.dto.ItemDTO;
import com.ctrip.apollo.core.dto.NamespaceDTO;
import com.ctrip.apollo.core.dto.ReleaseDTO;
import com.ctrip.apollo.core.enums.Env;
import com.ctrip.apollo.core.exception.BadRequestException;
import com.ctrip.apollo.core.exception.NotFoundException;
import com.ctrip.apollo.core.exception.ServiceException;
import com.ctrip.apollo.portal.api.AdminServiceAPI;
import com.ctrip.apollo.portal.entity.ItemDiffs;
import com.ctrip.apollo.portal.entity.NamespaceIdentifer;
import com.ctrip.apollo.portal.entity.form.NamespaceReleaseModel;
import com.ctrip.apollo.portal.entity.form.NamespaceTextModel;
import com.ctrip.apollo.portal.entity.form.NamespaceReleaseModel;
import com.ctrip.apollo.portal.service.txtresolver.ConfigTextResolver;

import java.util.LinkedList;
import java.util.List;
import java.util.Map;

@Service
public class PortalConfigService {

Expand Down
Expand Up @@ -48,15 +48,15 @@ public class PortalNamespaceService {


public List<AppNamespaceDTO> findPublicAppNamespaces(){
return namespaceAPI.findPublicAppNamespaces(portalSettings.getFirstEnv());
return namespaceAPI.findPublicAppNamespaces(portalSettings.getFirstAliveEnv());
}

public NamespaceDTO createNamespace(Env env, NamespaceDTO namespace){
return namespaceAPI.createNamespace(env, namespace);
}

public void createAppNamespace(AppNamespaceDTO appNamespace) {
for (Env env : portalSettings.getEnvs()) {
for (Env env : portalSettings.getActiveEnvs()) {
try {
namespaceAPI.createAppNamespace(env, appNamespace);
} catch (HttpStatusCodeException e) {
Expand Down
2 changes: 1 addition & 1 deletion apollo-portal/src/main/resources/portal.properties
Expand Up @@ -2,4 +2,4 @@ spring.application.name= apollo-portal
apollo.portal.env= dev,fat,uat
ctrip.appid= 100003173
server.port= 8080
logging.file= /opt/logs/100003173/apollo-portal.log
logging.file= /opt/logs/100003173/apollo-portal.log

0 comments on commit 56c36b7

Please sign in to comment.