Skip to content

Commit

Permalink
Merge pull request #668 from WeBankPartners/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
pobu168 committed Oct 30, 2020
2 parents a4483af + 61ba749 commit 6633aec
Show file tree
Hide file tree
Showing 52 changed files with 2,507 additions and 601 deletions.
2 changes: 1 addition & 1 deletion build/conf/ping_exporter.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
"weight": 1
},
"file": {
"enabled" : true,
"enabled" : false,
"path": "ip.txt",
"weight": 2
},
Expand Down
38 changes: 35 additions & 3 deletions build/register.xml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@

<!-- 6.运行资源 - 描述部署运行本插件包需要的基础资源(如主机、虚拟机、容器、数据库等) -->
<resourceDependencies>
<docker imageName="open-monitor:{{PLUGIN_VERSION}}" containerName="open-monitor-{{PLUGIN_VERSION}}" portBindings="19091:19091,14241:14241,{{ALLOCATE_PORT}}:8080" volumeBindings="{{BASE_MOUNT_PATH}}/prometheus/logs:/app/monitor/prometheus/logs,{{BASE_MOUNT_PATH}}/prometheus/data:/app/monitor/prometheus/data,{{BASE_MOUNT_PATH}}/prometheus/rules:/app/monitor/prometheus/rules,{{BASE_MOUNT_PATH}}/alertmanager/logs:/app/monitor/alertmanager/logs,{{BASE_MOUNT_PATH}}/alertmanager/data:/app/monitor/alertmanager/data,{{BASE_MOUNT_PATH}}/consul/logs:/app/monitor/consul/logs,{{BASE_MOUNT_PATH}}/consul/data:/app/monitor/consul/data,{{BASE_MOUNT_PATH}}/monitor/logs:/app/monitor/monitor/logs,{{BASE_MOUNT_PATH}}/agent_deploy:/app/deploy,{{BASE_MOUNT_PATH}}/transgateway/logs:/app/monitor/transgateway/logs,{{BASE_MOUNT_PATH}}/transgateway/data:/app/monitor/transgateway/data,/etc/localtime:/etc/localtime" envVariables="MONITOR_DB_HOST={{DB_HOST}},MONITOR_DB_PORT={{DB_PORT}},MONITOR_DB_SCHEMA={{DB_SCHEMA}},MONITOR_DB_USER={{DB_USER}},MONITOR_DB_PWD={{DB_PWD}},CORE_ADDR={{CORE_ADDR}},GATEWAY_URL={{GATEWAY_URL}},MONITOR_HOST_IP={{ALLOCATE_HOST}},MONITOR_CHECK_EVENT_KEY={{MONITOR_CHECK_EVENT_KEY}},MONITOR_CHECK_EVENT_TO_MAIL={{MONITOR_CHECK_EVENT_TO_MAIL}},MONITOR_CHECK_EVENT_INTERVAL_MIN={{MONITOR_CHECK_EVENT_INTERVAL_MIN}},MONITOR_ARCHIVE_ENABLE={{MONITOR_ARCHIVE_ENABLE}},MONITOR_ARCHIVE_MYSQL_HOST={{MONITOR_ARCHIVE_MYSQL_HOST}},MONITOR_ARCHIVE_MYSQL_PORT={{MONITOR_ARCHIVE_MYSQL_PORT}},MONITOR_ARCHIVE_MYSQL_USER={{MONITOR_ARCHIVE_MYSQL_USER}},MONITOR_ARCHIVE_MYSQL_PWD={{MONITOR_ARCHIVE_MYSQL_PWD}},MONITOR_LOG_LEVEL={{MONITOR_LOG_LEVEL}},JWT_SIGNING_KEY={{JWT_SIGNING_KEY}},ALARM_FIRING_CALLBACK={{MONITOR_ALARM_FIRING_CALLBACK}},ALARM_RECOVER_CALLBACK={{MONITOR_ALARM_RECOVER_CALLBACK}}"/>
<docker imageName="open-monitor:{{PLUGIN_VERSION}}" containerName="open-monitor-{{PLUGIN_VERSION}}" portBindings="19091:19091,14241:14241,{{ALLOCATE_PORT}}:8080" volumeBindings="{{BASE_MOUNT_PATH}}/prometheus/logs:/app/monitor/prometheus/logs,{{BASE_MOUNT_PATH}}/prometheus/data:/app/monitor/prometheus/data,{{BASE_MOUNT_PATH}}/prometheus/rules:/app/monitor/prometheus/rules,{{BASE_MOUNT_PATH}}/alertmanager/logs:/app/monitor/alertmanager/logs,{{BASE_MOUNT_PATH}}/alertmanager/data:/app/monitor/alertmanager/data,{{BASE_MOUNT_PATH}}/consul/logs:/app/monitor/consul/logs,{{BASE_MOUNT_PATH}}/consul/data:/app/monitor/consul/data,{{BASE_MOUNT_PATH}}/monitor/logs:/app/monitor/monitor/logs,{{BASE_MOUNT_PATH}}/agent_deploy:/app/deploy,{{BASE_MOUNT_PATH}}/transgateway/logs:/app/monitor/transgateway/logs,{{BASE_MOUNT_PATH}}/transgateway/data:/app/monitor/transgateway/data,/etc/localtime:/etc/localtime,{{BASE_MOUNT_PATH}}/certs:/data/certs" envVariables="MONITOR_DB_HOST={{DB_HOST}},MONITOR_DB_PORT={{DB_PORT}},MONITOR_DB_SCHEMA={{DB_SCHEMA}},MONITOR_DB_USER={{DB_USER}},MONITOR_DB_PWD={{DB_PWD}},CORE_ADDR={{CORE_ADDR}},GATEWAY_URL={{GATEWAY_URL}},MONITOR_HOST_IP={{ALLOCATE_HOST}},MONITOR_CHECK_EVENT_KEY={{MONITOR_CHECK_EVENT_KEY}},MONITOR_CHECK_EVENT_TO_MAIL={{MONITOR_CHECK_EVENT_TO_MAIL}},MONITOR_CHECK_EVENT_INTERVAL_MIN={{MONITOR_CHECK_EVENT_INTERVAL_MIN}},MONITOR_ARCHIVE_ENABLE={{MONITOR_ARCHIVE_ENABLE}},MONITOR_ARCHIVE_MYSQL_HOST={{MONITOR_ARCHIVE_MYSQL_HOST}},MONITOR_ARCHIVE_MYSQL_PORT={{MONITOR_ARCHIVE_MYSQL_PORT}},MONITOR_ARCHIVE_MYSQL_USER={{MONITOR_ARCHIVE_MYSQL_USER}},MONITOR_ARCHIVE_MYSQL_PWD={{MONITOR_ARCHIVE_MYSQL_PWD}},MONITOR_LOG_LEVEL={{MONITOR_LOG_LEVEL}},JWT_SIGNING_KEY={{JWT_SIGNING_KEY}},ALARM_FIRING_CALLBACK={{MONITOR_ALARM_FIRING_CALLBACK}},ALARM_RECOVER_CALLBACK={{MONITOR_ALARM_RECOVER_CALLBACK}}"/>
<mysql schema="monitor" initFileName="init.sql" upgradeFileName="upgrade.sql"/>
<s3 bucketName="wecube-agent"/>
</resourceDependencies>
Expand Down Expand Up @@ -541,7 +541,8 @@
<inputParameters>
<parameter datatype="string" required="N" sensitiveData="N" mappingType="entity" mappingEntityExpression="">guid</parameter>
<parameter datatype="string" required="Y" sensitiveData="N" mappingType="entity" mappingEntityExpression="">host_ip</parameter>
<parameter datatype="string" required="Y" sensitiveData="N" mappingType="entity" mappingEntityExpression="">process_tag</parameter>
<parameter datatype="string" required="Y" sensitiveData="N" mappingType="entity" mappingEntityExpression="">process_name</parameter>
<parameter datatype="string" required="N" sensitiveData="N" mappingType="entity" mappingEntityExpression="">process_tag</parameter>
<parameter datatype="string" required="N" sensitiveData="N" mappingType="entity" mappingEntityExpression="">display_name</parameter>
</inputParameters>
<outputParameters>
Expand All @@ -554,7 +555,8 @@
<inputParameters>
<parameter datatype="string" required="N" sensitiveData="N" mappingType="entity" mappingEntityExpression="">guid</parameter>
<parameter datatype="string" required="Y" sensitiveData="N" mappingType="entity" mappingEntityExpression="">host_ip</parameter>
<parameter datatype="string" required="Y" sensitiveData="N" mappingType="entity" mappingEntityExpression="">process_tag</parameter>
<parameter datatype="string" required="Y" sensitiveData="N" mappingType="entity" mappingEntityExpression="">process_name</parameter>
<parameter datatype="string" required="N" sensitiveData="N" mappingType="entity" mappingEntityExpression="">process_tag</parameter>
<parameter datatype="string" required="N" sensitiveData="N" mappingType="entity" mappingEntityExpression="">display_name</parameter>
</inputParameters>
<outputParameters>
Expand All @@ -564,5 +566,35 @@
</outputParameters>
</interface>
</plugin>
<plugin name="log_monitor" targetPackage="" targetEntity="" registerName="" targetEntityFilterRule="">
<interface action="add" path="/wecube-monitor/api/v1/agent/export/log_monitor/add" filterRule="">
<inputParameters>
<parameter datatype="string" required="N" sensitiveData="N" mappingType="entity" mappingEntityExpression="">guid</parameter>
<parameter datatype="string" required="Y" sensitiveData="N" mappingType="entity" mappingEntityExpression="">host_ip</parameter>
<parameter datatype="string" required="Y" sensitiveData="N" mappingType="entity" mappingEntityExpression="">path</parameter>
<parameter datatype="string" required="Y" sensitiveData="N" mappingType="entity" mappingEntityExpression="">keyword</parameter>
<parameter datatype="string" required="N" sensitiveData="N" mappingType="entity" mappingEntityExpression="">priority</parameter>
</inputParameters>
<outputParameters>
<parameter datatype="string" sensitiveData="N" mappingType="context">errorCode</parameter>
<parameter datatype="string" sensitiveData="N" mappingType="context">errorMessage</parameter>
<parameter datatype="string" sensitiveData="N" mappingType="context">guid</parameter>
</outputParameters>
</interface>
<interface action="delete" path="/wecube-monitor/api/v1/agent/export/log_monitor/delete" filterRule="">
<inputParameters>
<parameter datatype="string" required="N" sensitiveData="N" mappingType="entity" mappingEntityExpression="">guid</parameter>
<parameter datatype="string" required="Y" sensitiveData="N" mappingType="entity" mappingEntityExpression="">host_ip</parameter>
<parameter datatype="string" required="Y" sensitiveData="N" mappingType="entity" mappingEntityExpression="">path</parameter>
<parameter datatype="string" required="Y" sensitiveData="N" mappingType="entity" mappingEntityExpression="">keyword</parameter>
<parameter datatype="string" required="N" sensitiveData="N" mappingType="entity" mappingEntityExpression="">priority</parameter>
</inputParameters>
<outputParameters>
<parameter datatype="string" sensitiveData="N" mappingType="context">errorCode</parameter>
<parameter datatype="string" sensitiveData="N" mappingType="context">errorMessage</parameter>
<parameter datatype="string" sensitiveData="N" mappingType="context">guid</parameter>
</outputParameters>
</interface>
</plugin>
</plugins>
</package>
2 changes: 1 addition & 1 deletion monitor-agent/node_exporter/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.7.1.16
1.9.0
75 changes: 54 additions & 21 deletions monitor-agent/node_exporter/collector/process_monitor_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ func (c *processMonitorCollector) Update(ch chan<- prometheus.Metric) error {
for _,v := range ProcessCacheObj.get() {
ch <- prometheus.MustNewConstMetric(c.processMonitor,
prometheus.GaugeValue,
v.Value, v.Name, v.Command)
v.Value, v.DisplayName, v.Command)
ch <- prometheus.MustNewConstMetric(c.processCpuMonitor,
prometheus.GaugeValue,
v.CpuUsedPercent, v.Name, v.Command)
v.CpuUsedPercent, v.DisplayName, v.Command)
ch <- prometheus.MustNewConstMetric(c.processMemMonitor,
prometheus.GaugeValue,
v.MemUsedByte, v.Name, v.Command)
v.MemUsedByte, v.DisplayName, v.Command)
}
return nil
}
Expand Down Expand Up @@ -69,6 +69,7 @@ func NewProcessMonitorCollector() (Collector, error) {

type processMonitorObj struct {
Name string
DisplayName string
Value float64
Command string
CpuUsedPercent float64
Expand Down Expand Up @@ -113,29 +114,41 @@ func (c *processCache) start() {
c.Lock.Lock()
processUsedList := getProcessUsedResource()
if len(processUsedList) > 0 {
for _,v := range c.ProcessMonitor {
tmpName := v.Name
//tmpTag := ""
for _,tmpProcessMonitorObj := range c.ProcessMonitor {
tmpNameList := strings.Split(tmpProcessMonitorObj.Name, "^")
tmpName := tmpNameList[0]
tmpTag := ""
if len(tmpNameList) > 1 {
tmpTag = tmpNameList[1]
}
nameSplit := strings.Split(tmpName, ",")
if tmpTag != "" {
tmpProcessMonitorObj.DisplayName = fmt.Sprintf("%s(%s)", tmpName, tmpTag)
}else{
tmpProcessMonitorObj.DisplayName = tmpName
}
var tmpCount float64 = 0
//if strings.Contains(v.Name, "(") {
// tmpNameList := strings.Split(v.Name, "(")
// tmpName = tmpNameList[0]
// tmpTag = strings.Replace(tmpNameList[1], ")", "", -1)
//}
for _,vv := range processUsedList {
//if vv.Name == tmpName && strings.Contains(vv.Cmd, tmpTag) {
if strings.Contains(vv.Name, tmpName) || strings.Contains(vv.Cmd, tmpName) {
nameMatch := ""
for _,nameSplitObj := range nameSplit {
if vv.Name == strings.ToLower(nameSplitObj) {
nameMatch = nameSplitObj
break
}
}
if nameMatch != "" && strings.Contains(vv.Cmd, tmpTag) {
tmpProcessMonitorObj.DisplayName = fmt.Sprintf("%s(%s)", nameMatch, tmpTag)
tmpCount = tmpCount + 1
if len(vv.Cmd) > 100 {
v.Command = vv.Cmd[:100]
tmpProcessMonitorObj.Command = vv.Cmd[:100]
}else{
v.Command = vv.Cmd
tmpProcessMonitorObj.Command = vv.Cmd
}
v.CpuUsedPercent = vv.Cpu
v.MemUsedByte = vv.Mem
tmpProcessMonitorObj.CpuUsedPercent = vv.Cpu
tmpProcessMonitorObj.MemUsedByte = vv.Mem
}
}
v.Value = tmpCount
tmpProcessMonitorObj.Value = tmpCount
}
}
c.Lock.Unlock()
Expand Down Expand Up @@ -209,8 +222,22 @@ func (c *processCache) checkNum(names []string) []int {
var result []int
for _,v := range names {
count := 0
tmpNameList := strings.Split(v, "^")
tmpName := tmpNameList[0]
tmpTag := ""
if len(tmpNameList) > 1 {
tmpTag = tmpNameList[1]
}
nameSplit := strings.Split(tmpName, ",")
for _,vv := range processUseList {
if strings.Contains(vv.Name, v) || strings.Contains(vv.Cmd, v) {
nameMatch := ""
for _,nameSplitObj := range nameSplit {
if vv.Name == strings.ToLower(nameSplitObj) {
nameMatch = nameSplitObj
break
}
}
if nameMatch != "" && strings.Contains(vv.Cmd, tmpTag) {
count = count + 1
}
}
Expand Down Expand Up @@ -254,7 +281,13 @@ func ProcessMonitorHttpHandle(w http.ResponseWriter, r *http.Request) {
for i,v := range checkNumResult {
if v != 1 {
w.WriteHeader(http.StatusBadRequest)
w.Write([]byte(fmt.Sprintf("Process %s num = %d", param.Process[i], v)))
tmpProcessName := param.Process[i]
if strings.HasSuffix(tmpProcessName, "^") {
tmpProcessName = tmpProcessName[:len(tmpProcessName)-1]
}else{
tmpProcessName = strings.Replace(tmpProcessName, "^", "(", -1) + ")"
}
w.Write([]byte(fmt.Sprintf("Process %s num = %d", tmpProcessName, v)))
illegalFlag = true
break
}
Expand Down Expand Up @@ -292,7 +325,7 @@ func getProcessUsedResource() []processUsedResource {
tmpProcessObj.Pid = tmpPid
}
}else if tmpIndex == 2 {
tmpProcessObj.Name = vv
tmpProcessObj.Name = strings.ToLower(vv)
}else if tmpIndex == 3 {
tmpCpu,_ := strconv.ParseFloat(vv, 64)
tmpProcessObj.Cpu = tmpCpu
Expand Down
1 change: 1 addition & 0 deletions monitor-agent/ping_exporter/funcs/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ func DebugLog(msg string, v ...interface{}){
}

type PingResultObj struct {
Ip string
UpDown int
UseTime float64
}
9 changes: 8 additions & 1 deletion monitor-agent/ping_exporter/funcs/source.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,14 @@ func InitSourceList() {
log.Printf("read file %s error: %v \n", Config().Source.File.Path, err)
}else{
for _,v := range strings.Split(string(ips), "\n") {
sourceMap[strings.TrimSpace(v)] = weight
tmpMessage := strings.TrimSpace(v)
if strings.Contains(tmpMessage, "^") {
tmpSplit := strings.Split(tmpMessage, "^")
sourceMap[tmpSplit[0]] = weight
sourceRemoteData = append(sourceRemoteData, &PingExportSourceObj{Ip:tmpSplit[0], Guid:tmpSplit[1]})
}else {
sourceMap[tmpMessage] = weight
}
}
}
}
Expand Down
24 changes: 22 additions & 2 deletions monitor-agent/ping_exporter/http_check/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,17 @@ func httpCheckTask() {
clearHttpCheckResult()
startTime := time.Now()
httpCheckList := funcs.GetHttpCheckList()
http.DefaultClient.CloseIdleConnections()
wg := sync.WaitGroup{}
//var successCounter int
for _,v := range httpCheckList {
wg.Add(1)
go func(method string,url string) {
defer wg.Done()
b := doHttpCheck(method, url)
//b := doHttpCheck(method, url)
b := doHttpCheckNew(method, url)
writeHttpCheckResult(method, url, b)
funcs.DebugLog("http check %s:%s result %d ", method, url, b)
wg.Done()
}(v.Method,v.Url)
}
wg.Wait()
Expand Down Expand Up @@ -68,6 +70,24 @@ func doHttpCheck(method,url string) int {
return resp.StatusCode
}

func doHttpCheckNew(method,url string) int {
var resp *http.Response
var err error
if method == "post" {
resp,err = http.Post(url, "application/json", strings.NewReader(""))
}else{
resp,err = http.Get(url)
}
if err != nil {
log.Printf("do http check -> method:%s url:%s response error: %v \n", method, url, err)
return 2
}
if resp.Body != nil {
resp.Body.Close()
}
return resp.StatusCode
}

func writeHttpCheckResult(method,url string,statusCode int) {
resultLock.Lock()
httpCheckResultList = append(httpCheckResultList, &funcs.HttpCheckObj{Method:method, Url:url, StatusCode:statusCode})
Expand Down
50 changes: 27 additions & 23 deletions monitor-agent/ping_exporter/icmpping/icmp.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,14 @@ func InitIcmpBytes() {
icmpBytes = buffer.Bytes()
}

func StartPing(distIp string, timeout int) (int,float64) {
func StartPing(distIp string, timeout int) (int,float64,bool) {
var raddr net.IPAddr = net.IPAddr{IP: net.ParseIP(distIp)}
isConfused := false
//如果你要使用网络层的其他协议还可以设置成 ip:ospf、ip:arp 等
conn, err := net.DialIP("ip4:icmp", &localAddress, &raddr)
if err != nil {
fmt.Println(err.Error())
return 3,0
return 3,0,isConfused
}
defer conn.Close()
re := 0
Expand All @@ -83,33 +84,36 @@ func StartPing(distIp string, timeout int) (int,float64) {
useTime := float64(time.Now().Sub(startTime).Nanoseconds()) / 1e6
if re >= 3 { // 发4个ICMP包,如果有3个回复成功则算ping通
addSuccessIp(distIp)
return 0,useTime/float64(re)
return 0,useTime/float64(re),isConfused
}else{
isConfused = true
if tq == 4 {
addSuccessIp(distIp)
return 0,useTime
//addSuccessIp(distIp)
return 0,useTime,isConfused
}
if re == 1 && tq == 3 {
addSuccessIp(distIp)
return 0,useTime
//addSuccessIp(distIp)
return 0,useTime,isConfused
}
if re == 2 && tq == 2{ // 如果有2个回复成功和2个太快回复(下面把这当做了一种异常,有时候主机不通也会出现这种情况),也算主机是通的
addSuccessIp(distIp)
return 0,useTime/2
}
if useTime < 6100 { // 如果4个包不是全部2秒超时,则算异常需要重试
funcs.DebugLog("%s ping retry,%.3f ms, renum : %d ## ", distIp, useTime, re)
t := GetRetryMap(distIp, re)
if t>=4{ // 如果这几次检测中有总数超过4个成功的包返回,也算是成功,经测试在网络流量高的时候会有大概5%-10%的测试IP会只返回2个响应成功的包
addSuccessIp(distIp)
return 0,useTime/float64(re)
}
addRetryIp(distIp)
return 2,0
}else {
funcs.DebugLog("%s ping fail,%.3f ms, renum : %d ## ", distIp, useTime, re)
return 1,useTime
if re == 2 && tq == 2 { // 如果有2个回复成功和2个太快回复(下面把这当做了一种异常,有时候主机不通也会出现这种情况),也算主机是通的
//addSuccessIp(distIp)
return 0,useTime/2,isConfused
}
funcs.DebugLog("%s ping fail,%.3f ms, renum : %d ## ", distIp, useTime, re)
return 1,useTime,false
//if useTime < 6100 { // 如果4个包不是全部2秒超时,则算异常需要重试
// funcs.DebugLog("%s ping retry,%.3f ms, renum : %d ## ", distIp, useTime, re)
// t := GetRetryMap(distIp, re)
// if t>=4{ // 如果这几次检测中有总数超过4个成功的包返回,也算是成功,经测试在网络流量高的时候会有大概5%-10%的测试IP会只返回2个响应成功的包
// addSuccessIp(distIp)
// return 0,useTime/float64(re)
// }
// addRetryIp(distIp)
// return 2,0
//}else {
// funcs.DebugLog("%s ping fail,%.3f ms, renum : %d ## ", distIp, useTime, re)
// return 1,useTime
//}
}
}

Expand Down

0 comments on commit 6633aec

Please sign in to comment.