In [194]:
import pandas as pd
import numpy as np
import warnings
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
#%matplotlib notebook
from matplotlib import font_manager, rc
import scipy.cluster.hierarchy as spc
font_name = font_manager.FontProperties(fname="/usr/share/fonts/nanum/nanumGothic.ttf").get_name()
plt.rc('font', family=font_name)
plt.rc('style')
import warnings
warnings.filterwarnings(action='ignore') 
pd.options.display.float_format = '{:.2f}'.format
pd.set_option('max_colwidth', 1000)

In [195]:
from IPython.utils.path import get_ipython_dir
print(get_ipython_dir())

/root/.ipython


In [196]:
import plotly.plotly as py
from plotly.graph_objs import *
py.sign_in('kyoh', 'xLxYyOTECJ48ofwzrn2j')
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [197]:
from IPython.display import display_html
def display_side_by_side(*args):
    html_str=''
    for df in args:
        html_str+=df.to_html()
    display_html(html_str.replace('table','table style="display:inline"'),raw=True)

In [198]:
import os
file_list =os.listdir('../busan/DATA')

## 로드 데이터

In [199]:
data_list = []
for i in file_list:
    print(i)
    data = pd.read_csv('../busan/DATA/'+i, encoding='UTF-8',sep="\|\|",header=None,error_bad_lines=False)
    data_list.append(data)


201704.lst
201707.lst
201710.lst
201801.lst
201804.lst
201807.lst
201810.lst
201901.lst


In [200]:
data = pd.concat(data_list ,axis=0)

## 1. 알람 데이터 로드 및 전처리

In [201]:
data.columns = ["ALARM_LEVEL","ALARM_NAME","CTIME","DTIME","HOSTNAME","PATH1","PATH2","PATH3","PATH4","PATH5","PATH6","PATH7","NAME","CONDITIONLOGTEXT","CURRENTALARMSTATUS"]

In [202]:
#컬럼 이름 수정
data.columns = ['degree', 'alarm_name','time','clear_time','hostname','group1','group2','group3','group4','group5','group6','group7','resource_name','condition','status']

In [203]:
#추가 컬럼 생성
data.degree = data.degree.astype(str)
data['time'] = pd.to_datetime(data['time'], format='%Y-%m-%d %H:%M:%S')
data['clear_time'] = pd.to_datetime(data['clear_time'], format='%Y-%m-%d %H:%M:%S')
data['last_time'] = data['clear_time'] - data['time']
data['last_time'] = data['last_time'].dt.seconds
data['last_time'] = data['last_time'].fillna(0)
data['time_month'] = data['time'].dt.month
data['time_day'] = data['time'].dt.day
data['time_hour'] = data['time'].dt.hour
data['time_date']= data['time'].dt.strftime('%Y-%m-%d')
data['time_date_hour']= data['time'].dt.strftime('%Y-%m-%d %H')
data['time_weekday'] = data['time'].dt.weekday
data['time_week'] = data['time'].dt.week

In [204]:
data['group'] = data.group1.fillna('')+'>'+data.group2.fillna('')+'>'+data.group3.fillna('')+'>'+data.group4.fillna('')+'>'+data.group5.fillna('')+'>'+data.group6.fillna('')+'>'+data.group7.fillna('')

In [205]:
data['group'] = data['group'].str.replace('>>','')
data['group'] = data['group'].str.replace('>>>','')
data['group'] = data['group'].str.replace('>>>>','')

In [206]:
data['group'] = data.group.str.replace('>부산은행>','')
data['group'] = data.group.str.replace('부산은행>','')

In [207]:
data['group1'] = data['group'].str.split('>', expand=True)[0]
data['group2'] = data['group'].str.split('>', expand=True)[1]
data['group3'] = data['group'].str.split('>', expand=True)[2]
data['group4'] = data['group'].str.split('>', expand=True)[3]
data['group5'] = data['group'].str.split('>', expand=True)[4]
data['group6'] = data['group'].str.split('>', expand=True)[5]

In [208]:
data = data[data.degree.isin(['1','2','3'])==True]

In [209]:
data.degree = data.degree.replace({"1":"주의","2":"경고","3":"심각"})

### 2. 알람 발생 현황 분석

#### (1) 전체 알람 데이터 현황

In [210]:
data_after = data[(data.time_date > '2018-04-01')]
data_before = data[(data.time_date <= '2018-04-01')]
data_after_label = '2018년 4월 2일 ~ 2019년 4월 18일'
data_before_label = '2017년 4월 1일 ~ 2018년 4월 1일'
display('총 데이터: {}건'.format(len(data)),'{}: {}건'.format(data_after_label,len(data_after)),'{}: {}건'.format(data_before_label,len(data_before)))

'총 데이터: 914711건'

'2018년 4월 2일 ~ 2019년 4월 18일: 450717건'

'2017년 4월 1일 ~ 2018년 4월 1일: 463994건'

In [211]:
data_before['group2'] = data_before['group2'].fillna('')
data_after['group2'] = data_after['group2'].fillna('')

In [212]:
def result_degree_group(degree,group):
    data_after_df = data_after[data_after['degree'] == degree]
    data_before_df = data_before[data_before['degree'] == degree]
    print('>>>>>>{}'.format(degree))
    for i in group:
        print('>>>>>>>>>'+i)
        print(data_before_label)
        display(data_before_df[data_before_df['group2'].str.contains(i)].groupby(['alarm_name'])['time'].count().reindex().reset_index().sort_values('time',ascending=False)[:10])
        print(data_after_label)
        display(data_after_df[data_after_df['group2'].str.contains(i)].groupby(['alarm_name'])['time'].count().reindex().reset_index().sort_values('time',ascending=False)[:10])

In [None]:
result_degree_group('심각',['외환','통합그룹웨어','신인터넷뱅킹','기타','\(구\)인터넷뱅킹\/SMS'])

In [None]:
result_degree_group('경고',['기타','외환','단위업무','CRM\/COLD','로이터'])

In [None]:
result_degree_group('주의',['CRM\/COLD','외환','단위업무','계정계','통합그룹웨어'])

In [213]:
def df_degree(df,title):
    levels = ['주의','경고','심각']
    degree_total = df.groupby(['degree'])['time'].count().reindex(levels).reset_index()
    degree_total['per'] = degree_total.time/degree_total.sum().time * 100
    degree_total.columns = ['degree','count','per']
    degree_total.sort_values('degree')
    trace1 = {
      "name" : "",
      "domain": {
    "x": [0, 0.5], 
    "y": [0.25, 1]
              }, 
      "labels": ["주의", "경고", "심각"], 
      "type": "pie", 
      "values": list(degree_total['count']),
      "hole": .4,
        }
    trace2 = {
      "cells": {
        "align": "center", 
        "fill": {"color": "rgb(255, 255, 255)"}, 
        "font": {
          "color": "rgb(48, 55, 66)", 
          "family": "Arial", 
          "size": 19
        }, 
        "height": 33, 
        "line": {
          "color": "rgb(171, 191, 212)", 
          "width": 1
        }, 
        "values": [list(degree_total['degree']), list(degree_total['count'].astype(str)+" 건"), list(degree_total['per'].round().astype(str)+" %")]
        },
        "domain": {
            "x": [0.5, 1], 
            "y": [0.25, 0.75]
          }, 
          "header": {
            "fill": {"color": "rgb(31, 119, 180)"}, 
            "font": {
              "color": "rgb(255, 255, 255)", 
              "size": 24
            }, 
            "height": 37, 
            "line": {"color": "rgb(140, 177, 214)"}, 
            "values": ['등급','건수','비율']
            }, "type": "table" }
    data = Data([trace1, trace2])
    layout = {
      "autosize": True, 
      "colorway": ["#FF7F0E",'#FFC90E',"#D62728"], 
      "font": {"size": 20},
      "legend": {
            "x": 0.3961578483357668, 
            "y": 1.1245210727969348
       }, 
      "title": {"text": title}, 
      "xaxis": {
            "autorange": True, 
            "range": [-1, 6]
              }, 
      "yaxis": {
            "autorange": True, 
            "range": [-1, 4]
      }
    }
    fig = Figure(data=data, layout=layout)
    iplot(fig)
    #display(degree_total)

In [None]:
df_degree(data,'총 발생 알람')

In [214]:
data_log_before = data_before[data_before.condition.str.contains('이벤트 탐지') == True]
data_perf_before = data_before[data_before.condition.str.contains('이벤트 탐지') == False]
display('2017년 4월 1일 ~ 2018년 4월 1일','성능 데이터: {}건'.format(len(data_perf_before)),'로그 데이터: {}건'.format(len(data_log_before)))

'2017년 4월 1일 ~ 2018년 4월 1일'

'성능 데이터: 443141건'

'로그 데이터: 20853건'

In [215]:
data_log = data_after[data_after.condition.str.contains('이벤트 탐지') == True]
data_perf = data_after[data_after.condition.str.contains('이벤트 탐지') == False]
display('2018년 4월 2일 ~ 2019년 4월 18일','성능 데이터: {}건'.format(len(data_perf)),'로그 데이터: {}건'.format(len(data_log)))

'2018년 4월 2일 ~ 2019년 4월 18일'

'성능 데이터: 395021건'

'로그 데이터: 55696건'

#### (2) 일반 알람 등급별 발생 현황

In [None]:
df_degree(data_perf,'성능 알람 건수({})'.format(data_after_label))
df_degree(data_perf_before,'성능 알람 건수({})'.format(data_before_label))
df_degree(data_log,'로그 알람 건수({})'.format(data_after_label))
df_degree(data_log_before,'로그 알람 건수({})'.format(data_before_label))

In [275]:
def group_chart(df,group,num,degree,title):
    if degree == "주의":
        color = ["#FFC90E"]
    if degree == "경고":
        color = ["#FF7F0E"]
    if degree == "심각":
        color = ["#D62728"]
    df = df.groupby([group,'degree'])['time'].count().reindex().reset_index().pivot_table(values='time', index=group, columns='degree', aggfunc='first',fill_value=0).reset_index()
    df['sum'] = df.sum(axis=1)
    df['per'] = df[degree]/df[degree].sum(axis=0)*100
    df.columns = ['그룹','경고','심각','주의','총합','비율']
    df = df[['그룹','주의','경고','심각','총합','비율']].sort_values(degree,ascending=False).iloc[:num]
    trace1 = {
      "x": list(df[degree]), 
      "y": list(df['그룹']), 
      "name": degree, 
      "orientation": "h", 
      "type": "bar", 
      "visible": True, 
      "xaxis": "x"
    }
    trace2 = {
      "cells": {
        "align": "center", 
        "fill": {"color": "rgb(255, 255, 255)"}, 
        "font": {
          "color": "rgb(48, 55, 66)", 
          "family": "Arial", 
          "size": 13
        }, 
        "height": 25, 
        "line": {
          "color": "rgb(171, 191, 212)", 
          "width": 1
        }, 
        "values": [list(df['그룹']), list(df[degree].astype(str)+" 건"), list(df['비율'].round().astype(str)+" %")]
        },
        "domain": {
            "x": [0.3, 1], 
            "y": [0, 1]
          },
          "header": {
            "fill": {"color": "rgb(31, 119, 180)"}, 
            "font": {
              "color": "rgb(255, 255, 255)", 
              "size": 15
            }, 
            "height": 25, 
            "line": {"color": "rgb(140, 177, 214)"}, 
            "values": ['이름','건수','비율']
            },  "columnwidth": [20,10,10], "type": "table" }
    data = Data([trace1,trace2])
    layout = {
      "autosize": True, 
      "colorway": color, 
      "font": {
        "color": "rgb(34, 43, 59)", 
        "size": 13
      }, 
      "legend": {
        "x": -0.11808242767074219, 
        "y": 1.10727969348659, 
        "font": {"size": 20}
      }, 
      "showlegend": True, 
      "title": {
        "font": {"size": 20}, 
        "text": title
      }, 
      "xaxis": {
        "autorange": True, 
        "domain": [0, 0.3], 
        "exponentformat": "none", 
        "range": [0, 501672.63157894736], 
        "type": "linear"
      }, 
      "yaxis": {
        "autorange": "reversed", 
        "range": [-0.5, 2.5], 
        "domain": [1, 0.2], 
        "type": "category",
        "automargin": True
      }
    }
    fig = Figure(data=data, layout=layout)
    iplot(fig)
    display(df[['그룹',degree,'총합','비율']])

In [217]:
def group_chart2(df,group,group2,num,degree,title):
    if degree == "주의":
        color = ["#FFC90E"]
    if degree == "경고":
        color = ["#FF7F0E"]
    if degree == "심각":
        color = ["#D62728"]
    df2 = df.groupby([group,'degree'])['time'].count().reindex().reset_index()
    df2 = df2[df2['degree']==degree]
    df2.columns = ['호스트명','등급','건수']
    df2 = df2[['호스트명','등급','건수']].sort_values('건수',ascending=False).iloc[:num]
    df = data_perf.groupby([group,group2,'degree'])['time'].count().reindex().reset_index()
    df = df[df['degree']==degree]
    df['per'] = df['time']/df['time'].sum(axis=0)*100
    df.columns = ['호스트명','알람이름','등급','건수','비율']
    df = df[['호스트명','알람이름','등급','건수','비율']].sort_values('건수',ascending=False).iloc[:num]
    trace1 = {
      "x": list(df2['건수']), 
      "y": list(df2['호스트명']), 
      "name": degree, 
      "orientation": "h", 
      "type": "bar", 
      "visible": True, 
      "xaxis": "x"
    }
    trace2 = {
      "cells": {
        "align": "center", 
        "fill": {"color": "rgb(255, 255, 255)"}, 
        "font": {
          "color": "rgb(48, 55, 66)", 
          "family": "Arial", 
          "size": 13
        }, 
        "height": 25, 
        "line": {
          "color": "rgb(171, 191, 212)", 
          "width": 1
        }, 
        "values": [list(df['호스트명']),list(df['알람이름']), list(df['건수'].astype(str)+" 건"), list(df['비율'].round().astype(str)+" %")]
        },
        "domain": {
            "x": [0.3, 1], 
            "y": [0, 1]
          },
          "header": {
            "fill": {"color": "rgb(31, 119, 180)"}, 
            "font": {
              "color": "rgb(255, 255, 255)", 
              "size": 15
            }, 
            "height": 25, 
            "line": {"color": "rgb(140, 177, 214)"}, 
            "values": ['호스트명','알람이름','건수','비율']
            },  "columnwidth": [10,20,10,10], "type": "table" }
    data = Data([trace1,trace2])
    layout = {
      "autosize": True, 
      "colorway": color, 
      "font": {
        "color": "rgb(34, 43, 59)", 
        "size": 13
      }, 
      "legend": {
        "x": -0.11808242767074219, 
        "y": 1.10727969348659, 
        "font": {"size": 20}
      }, 
      "showlegend": True, 
      "title": {
        "font": {"size": 20}, 
        "text": title
      }, 
      "xaxis": {
        "autorange": True, 
        "domain": [0, 0.3], 
        "exponentformat": "none", 
        "range": [0, 501672.63157894736], 
        "type": "linear"
      }, 
      "yaxis": {
        "autorange": "reversed", 
        "range": [-0.5, 2.5], 
        "domain": [1, 0.2], 
        "type": "category",
        "automargin": True
      }
    }
    fig = Figure(data=data, layout=layout)
    iplot(fig)

In [218]:
def group_chart_total(df,group,num,title):
    df = df.groupby([group])['time'].count().reindex().reset_index()
    df['sum'] = df.sum(axis=1)
    df['per'] = df['time']/df['time'].sum(axis=0)*100
    df.columns = ['그룹','건수','총합','비율']
    df = df[['그룹','건수','총합','비율']].sort_values('건수',ascending=False).iloc[:num]
    trace1 = {
      "x": list(df['건수']), 
      "y": list(df['그룹']), 
      "name": '총 건수', 
      "orientation": "h", 
      "type": "bar", 
      "visible": True, 
      "xaxis": "x"
    }
    trace2 = {
      "cells": {
        "align": "center", 
        "fill": {"color": "rgb(255, 255, 255)"}, 
        "font": {
          "color": "rgb(48, 55, 66)", 
          "family": "Arial", 
          "size": 13
        }, 
        "height": 25, 
        "line": {
          "color": "rgb(171, 191, 212)", 
          "width": 1
        }, 
        "values": [list(df['그룹']), list(df['건수'].astype(str)+" 건"), list(df['비율'].round().astype(str)+" %")]
        },
        "domain": {
            "x": [0.3, 1], 
            "y": [0, 1]
          },
          "header": {
            "fill": {"color": "rgb(31, 119, 180)"}, 
            "font": {
              "color": "rgb(255, 255, 255)", 
              "size": 15
            }, 
            "height": 25, 
            "line": {"color": "rgb(140, 177, 214)"}, 
            "values": ['이름','건수','비율']
            },  "columnwidth": [20,10,10], "type": "table" }
    data = Data([trace1,trace2])
    layout = {
      "autosize": True, 
      "font": {
        "color": "rgb(34, 43, 59)", 
        "size": 13
      }, 
      "legend": {
        "x": -0.11808242767074219, 
        "y": 1.10727969348659, 
        "font": {"size": 20}
      }, 
      "showlegend": True, 
      "title": {
        "font": {"size": 20}, 
        "text": title
      }, 
      "xaxis": {
        "autorange": True, 
        "domain": [0, 0.3], 
        "exponentformat": "none", 
        "range": [0, 501672.63157894736], 
        "type": "linear"
      }, 
      "yaxis": {
        "autorange": "reversed", 
        "range": [-0.5, 2.5], 
        "domain": [1, 0.2], 
        "type": "category",
        "automargin": True
      }
    }
    fig = Figure(data=data, layout=layout)
    iplot(fig)
    display(df)

### 년도 별 장비 전체 알람 TOP10

In [None]:
group_chart_total(data_perf_before,'hostname',10,data_before_label)
group_chart_total(data_perf,'hostname',10,data_after_label)

### 년도 별 장비 등급별 알람 TOP10

In [None]:
group_chart(data_perf_before,'hostname',10,'주의',data_before_label)
group_chart(data_perf,'hostname',10,'주의',data_after_label)
group_chart(data_perf_before,'hostname',10,'경고',data_before_label)
group_chart(data_perf,'hostname',10,'경고',data_after_label)
group_chart(data_perf_before,'hostname',10,'심각',data_before_label)
group_chart(data_perf,'hostname',10,'심각',data_after_label)

### 01월 20일 전체 TOP 10 / 02월 05일 전체 TOP 10

In [None]:
group_chart_total(data_after[data_after.time_date == '2019-01-20'],'alarm_name',10,'2019-01-20')
group_chart_total(data_after[data_after.time_date == '2019-02-05'],'alarm_name',10,'2019-02-05')

In [None]:
group_chart_total(data_after[data_after.time_date == '2019-01-15'],'alarm_name',10,'2019-01-15')

### 01월 20일 등급별 TOP 10 / 02월 05일 등급별 TOP 10

In [None]:
group_chart(data_after[data_after.time_date == '2019-01-20'],'alarm_name',10,'주의','2019-01-20(주의)')
group_chart(data_after[data_after.time_date == '2019-02-05'],'alarm_name',10,'주의','2019-02-05(주의)')
group_chart(data_after[data_after.time_date == '2019-01-20'],'alarm_name',10,'경고','2019-01-20(경고)')
group_chart(data_after[data_after.time_date == '2019-02-05'],'alarm_name',10,'경고','2019-02-05(경고)')
group_chart(data_after[data_after.time_date == '2019-01-20'],'alarm_name',10,'심각','2019-01-20(심각)')
group_chart(data_after[data_after.time_date == '2019-02-05'],'alarm_name',10,'심각','2019-02-05(심각)')

In [None]:
group_chart(data_after[data_after.time_date == '2019-01-15'],'alarm_name',10,'주의','2019-01-15(주의)')
group_chart(data_after[data_after.time_date == '2019-01-15'],'alarm_name',10,'경고','2019-01-15(경고)')
group_chart(data_after[data_after.time_date == '2019-01-15'],'alarm_name',10,'심각','2019-01-15(심각)')

In [293]:
data_after[(data_after.time_date == '2019-01-10')&(data_after.condition.str.contains('이벤트 탐지'))].groupby(['hostname','condition'])['time'].count().reindex().reset_index()

Unnamed: 0,hostname,condition,time
0,bdalmdb1,"이벤트 탐지 [심각도: WARN, 내용: ORA-00060: Deadlock detected. More info in file /db/trace/diag/rdbms/bdalmdb/BDALMDB/trace/BDALMDB_ora_5077.trc.]",1
1,bdalmdb1,"이벤트 탐지 [심각도: WARN, 내용: ORA-00600: 내부 오류 코드, 인수: [13030], [20], [], [], [], [], [], [], [], [], [], []]",1
2,bdalmdb1,"이벤트 탐지 [심각도: WARN, 내용: ORA-1652: unable to extend temp segment by 8 in tablespace TS_ALM_D001 ]",2
3,bdalmdb1,"이벤트 탐지 [심각도: WARN, 내용: ORA-1683: unable to extend index STATASIS.I0TB_BS_원장계좌_수신 partition P20170430 by 1024 in tablespace TS_ALM_D001 ]",1
4,bdalmdb1,"이벤트 탐지 [심각도: WARN, 내용: ORA-1683: unable to extend index STATASIS.I0TB_BS_원장계좌_수신 partition P20170731 by 1024 in tablespace TS_ALM_D001 ]",1
5,bdalmdb1,"이벤트 탐지 [심각도: WARN, 내용: ORA-1683: unable to extend index STATASIS.I0TB_BS_원장계좌_수신 partition P20170831 by 1024 in tablespace TS_ALM_D001 ]",1
6,bdalmdb1,"이벤트 탐지 [심각도: WARN, 내용: ORA-1683: unable to extend index STATASIS.I0TB_BS_원장계좌_수신 partition P20170930 by 1024 in tablespace TS_ALM_D001 ]",1
7,bdalmdb1,"이벤트 탐지 [심각도: WARN, 내용: ORA-1683: unable to extend index STATASIS.I0TB_BS_원장계좌_여신 partition P20170430 by 128 in tablespace TS_ALM_D001 ]",1
8,bdalmdb1,"이벤트 탐지 [심각도: WARN, 내용: ORA-1683: unable to extend index STATASIS.I0TB_BS_원장계좌_여신 partition P20170531 by 128 in tablespace TS_ALM_D001 ]",1
9,bdalmdb1,"이벤트 탐지 [심각도: WARN, 내용: ORA-1683: unable to extend index STATASIS.I0TB_BS_원장계좌_여신 partition P20170831 by 1024 in tablespace TS_ALM_D001 ]",1


In [292]:
data_after[(data_after.time_date == '2019-02-05')&(data_after.condition.str.contains('이벤트 탐지'))].groupby(['hostname','condition'])['time'].count().reindex().reset_index()

Unnamed: 0,hostname,condition,time
0,bdalmdb1,"이벤트 탐지 [심각도: WARN, 내용: ORA-20000: ORA-01422: 실제 인출은 요구된 것보다 많은 수의 행을 추출합니다]",2880
1,bdalmdb1,"이벤트 탐지 [심각도: WARN, 내용: Thread 1 cannot allocate new log, sequence 54946]",1
2,datmdb1,"이벤트 탐지 [심각도: INFO, 내용: ERRPT HARDWARE FAULT MSG : 80D3764C 0205042819 U H LVDD PV NO LONGER RELOCATING NEW BAD BLOCKS]",1
3,pbakap1,"이벤트 탐지 [심각도: WARN, 내용: bnk_info backup error Backup psemdb1_ORA C (Full) ]",2
4,pbakap1,"이벤트 탐지 [심각도: WARN, 내용: bnk_info duplicate_job error Duplication of Job 693670: Backup paisdb1_ORA A (Full) ]",1
5,pbakap1,"이벤트 탐지 [심각도: WARN, 내용: bnk_info duplicate_job error Duplication of Job 693697: Backup pbprdb2_ORA A (Full) ]",1
6,pbakap1,"이벤트 탐지 [심각도: WARN, 내용: bnk_info duplicate_job error Duplication of Job 693703: Backup palmap1_ORA_00 D (Full) ]",1
7,pbakap1,"이벤트 탐지 [심각도: WARN, 내용: bnk_info duplicate_job error Duplication of Job 693725: Backup tj_svr_ORA C (Full) ]",1
8,pbakap1,"이벤트 탐지 [심각도: WARN, 내용: bnk_info duplicate_job error Duplication of Job 693865: Backup psttdb1_ORA A (Full) ]",1
9,pbakap2,"이벤트 탐지 [심각도: WARN, 이벤트 소스: [PLOG] SFM, 내용: Feb 5 00:05:01 2019 Target path state change.Pl... Feb 5 00:05:01 2019 Target path state change.Pl... Feb 5 00:05:00 2019 Target path state change.Pl... Feb 5 00:05:00 2019 Target path state change.Pl...",1


### 01월 20일 장비별 전체 등급  TOP 10 / 02월 05일 장비별 전체 등급  TOP 10

In [None]:
group_chart_total(data_after[data_after.time_date == '2019-01-20'],'hostname',10,'2019-01-20')
group_chart_total(data_after[data_after.time_date == '2019-02-05'],'hostname',10,'2019-02-05')

### 01월 20일 장비별 등급별 TOP 10 / 2월 05일 장비별 등급별 TOP 10

In [None]:
group_chart(data_after[data_after.time_date == '2019-01-20'],'hostname',10,'주의','2019-01-20(주의)')
group_chart(data_after[data_after.time_date == '2019-02-05'],'hostname',10,'주의','2019-02-05(주의)')
group_chart(data_after[data_after.time_date == '2019-01-20'],'hostname',10,'경고','2019-01-20(경고)')
group_chart(data_after[data_after.time_date == '2019-02-05'],'hostname',10,'경고','2019-02-05(경고)')
group_chart(data_after[data_after.time_date == '2019-01-20'],'hostname',10,'심각','2019-01-20(심각)')
group_chart(data_after[data_after.time_date == '2019-02-05'],'hostname',10,'심각','2019-02-05(심각)')

In [None]:
test = data_after.groupby(['group2'])['time'].count().reindex().reset_index().sort_values('time',ascending=False)
test['비율'] = test['time']/test['time'].sum()*100
test[:20].to_excel('{}.xlsx'.format(str(data_after_label)))

In [None]:
test = data_before.groupby(['group2'])['time'].count().reindex().reset_index().sort_values('time',ascending=False)
test['비율'] = test['time']/test['time'].sum()*100
test[:20].to_excel('{}.xlsx'.format(str(data_before_label)))

In [None]:
group_chart(data,'group2',10,'주의','주의_전체')
group_chart(data,'group2',10,'경고','경고_전체')
group_chart(data,'group2',10,'심각','심각_전체')

In [None]:
group_chart_total(data_perf_before,'alarm_name',10,data_before_label)
group_chart_total(data_perf,'alarm_name',10,data_after_label)

In [None]:
group_chart(data_perf,'group2',10,'주의','주의({})'.format(data_after_label))
group_chart(data_perf_before,'group2',10,'주의','주의({})'.format(data_before_label))
group_chart(data_perf,'group2',10,'경고','경고({})'.format(data_after_label))
group_chart(data_perf_before,'group2',10,'경고','경고({})'.format(data_before_label))
group_chart(data_perf,'group2',10,'심각','심각({})'.format(data_after_label))
group_chart(data_perf_before,'group2',10,'심각','심각({})'.format(data_before_label))

In [None]:
group_chart(data_perf,'alarm_name',10,'주의','주의({})'.format(data_after_label))
group_chart(data_perf_before,'alarm_name',10,'주의','주의({})'.format(data_before_label))
group_chart(data_perf,'alarm_name',10,'경고','경고({})'.format(data_after_label))
group_chart(data_perf_before,'alarm_name',10,'경고','경고({})'.format(data_before_label))
group_chart(data_perf,'alarm_name',10,'심각','심각({})'.format(data_after_label))
group_chart(data_perf_before,'alarm_name',10,'심각','심각({})'.format(data_before_label))

In [None]:
group_chart2(data_perf,'hostname','alarm_name',10,'주의')
group_chart2(data_perf,'hostname','alarm_name',10,'경고')
group_chart2(data_perf,'hostname','alarm_name',10,'심각')

In [None]:
def rank_chart(df,group):
    df = df.groupby([group,'degree'])['time'].count().reindex().reset_index().pivot_table(values='time', index=group, columns='degree', aggfunc='first',fill_value=0)
    df['sum'] = df.sum(axis=1)
    df = df.sort_values('sum',ascending=False).reset_index()
    df.columns = ['호스트명','경고','심각','주의','총합']
    df.index = df.index + 1
    table_1 = df[:10]
    table_2 = df[10:20]
    df =df[:15]
    trace1 = {
      "x": list(df['호스트명']), 
      "y": list(df['주의']), 
      "marker": {
        "color": "#FFC90E", 
        "line": {"width": 1}
      }, 
      "opacity": 0.6, 
      "name": "주의", 
      "orientation": "v", 
      "textposition": "none", 
      "type": "bar", 
    }
    trace2 = {
      "x": list(df['호스트명']), 
      "y": list(df['경고']), 
      "marker": {
        "color": "#FF7F0E", 
        "line": {"width": 1}
      }, 
      "name": "경고", 
      "opacity": 0.6, 
      "orientation": "v",
      "type": "bar"
    }
    trace3 = {
      "x": list(df['호스트명']),
      "y": list(df['심각']), 
      "marker": {
        "color": "#D62728",
        "line": {"width": 1}
      }, 
      "name": "심각", 
      "opacity": 0.6, 
      "orientation": "v", 
      "type": "bar", 
    }
    trace4 = {
      "x": list(df['호스트명']),
      "y": list(df['총합']), 
      "cliponaxis": True, 
      "constraintext": "none", 
      "name" : "총합",
      "marker": {
        "color": "#2B62AD",
        "line": {"width": 1}
      }, 
      "opacity": 0.63, 
      "orientation": "v", 
      "text": list(df['총합'].astype('str')), 
      "textfont": {
        "color": "rgb(255, 0, 6)", 
        "family": "Roboto", 
        "size": 10
      }, 
      "textposition": "outside", 
      "type": "bar"
    }
    trace5 = {
      "cells": {
        "values": [
        list(table_1.index),list(table_1['호스트명']),list(table_1['주의']),list(table_1['경고']),list(table_1['심각']),list(table_1['총합'])
        ]
      }, 
      "domain": {
        "x": [0, 0.5], 
        "y": [0, 0.48]
      }, 
      "header": {
        "values": ["RANK", "HOSTNAME", "주의", "경고", "심각", "총합계"], 
        "valuessrc": "kyoh:14:429f25"
      }, 
      "type": "table"
    }
    trace6 = {
      "cells": {
        "values": [
        list(table_2.index),list(table_2['호스트명']),list(table_2['주의']),list(table_2['경고']),list(table_2['심각']),list(table_2['총합'])
    ]
      }, 
      "domain": {
        "x": [0.5, 1], 
        "y": [0, 0.48]
      }, 
      "header": {
        "values": ["RANK", "HOSTNAME", "주의", "경고", "심각", "총합계"], 
      }, 
      "type": "table"
    }
    data = Data([trace1, trace2, trace3, trace4])
    layout = {
      "dragmode": "zoom", 
      "hovermode": "closest", 
      "legend": {
        "x": 0.4534313725490196, 
        "y": 1.0802211091654588, 
        "orientation": "h", 
        "xanchor": "auto"
      }, 
      "xaxis": {
        "autorange": True, 
        "domain": [0, 1], 
        "fixedrange": False, 
        "nticks": 10, 
        "range": [-0.5, 9.5], 
        "rangeslider": {
          "autorange": True, 
          "range": [-0.5, 9.5], 
          "visible": False
        }, 
        "showline": False, 
        "showspikes": False, 
        "showticklabels": False, 
        "ticks": "", 
        "title": {"text": ""}, 
        "type": "category"
      }, 
      "yaxis": {
        "autorange": True, 
        "domain": [0.3, 1], 
        "exponentformat": "none", 
        "range": [0, 70986.31578947368], 
        "showspikes": False, 
        "ticks": ""
      }
    }
    fig = Figure(data=data, layout=layout)
    iplot(fig)

    trace5 = {
        "cells": {
        "align": "center", 
        "fill": {"color": "rgb(255, 255, 255)"}, 
        "font": {
        "color": "rgb(48, 55, 66)", 
        "family": "Arial", 
        "size": 13,},
        "height": 25, 
        "line": {
        "color": "rgb(171, 191, 212)", 
        "width": 1
            }, 
        "values": [
        list(table_1.index),list(table_1['호스트명']),list(table_1['주의']),list(table_1['경고']),list(table_1['심각']),list(table_1['총합'])
        ]
      }, 
      "domain": {
        "x": [0, 0.5], 
        "y": [0, 1]
      }, 
      "header": {
        "values": ["No", "HOSTNAME", "주의", "경고", "심각", "총합계"], 
                "fill": {"color": "rgb(31, 119, 180)"}, 
                "font": {
                  "color": "rgb(255, 255, 255)", 
                  "size": 15
      }},"columnwidth": [5,15,10,10,10],
      "type": "table"
    }
    trace6 = {
    "cells": {
        "align": "center", 
        "fill": {"color": "rgb(255, 255, 255)"}, 
        "font": {
        "color": "rgb(48, 55, 66)", 
        "family": "Arial", 
        "size": 13,},
        "height": 25, 
        "line": {
        "color": "rgb(171, 191, 212)", 
        "width": 1
            }, 
        "values": [
        list(table_2.index),list(table_2['호스트명']),list(table_2['주의']),list(table_2['경고']),list(table_2['심각']),list(table_2['총합'])
        ]
      }, 
      "domain": {
        "x": [0.5, 1], 
        "y": [0, 1]
      }, 
      "header": {
        "values": ["No", "HOSTNAME", "주의", "경고", "심각", "총합계"], 
                "fill": {"color": "rgb(31, 119, 180)"}, 
                "font": {
                  "color": "rgb(255, 255, 255)", 
                  "size": 15
      }},"columnwidth": [5,15,10,10,10],
         "type": "table"
    }
    data = Data([trace5, trace6])
    layout = {
      "dragmode": "zoom", 
      "hovermode": "closest", 
      "legend": {
        "x": 0.4534313725490196, 
        "y": 1.0802211091654588, 
        "orientation": "h", 
        "xanchor": "auto"
      }, 
      "xaxis": {
        "autorange": True, 
        "domain": [0, 1], 
        "fixedrange": False, 
        "nticks": 10, 
        "range": [-0.5, 9.5], 
        "rangeslider": {
          "autorange": True, 
          "range": [-0.5, 9.5], 
          "visible": False
        }, 
        "showline": False, 
        "showspikes": False, 
        "showticklabels": False, 
        "ticks": "", 
        "title": {"text": ""}, 
        "type": "category"
      }, 
      "yaxis": {
        "autorange": True, 
        "domain": [0.3, 1], 
        "exponentformat": "none", 
        "range": [0, 70986.31578947368], 
        "showspikes": False, 
        "ticks": ""
      }
    }
    fig = Figure(data=data, layout=layout)
    iplot(fig)

In [None]:
rank_chart(data_perf,'hostname')

In [None]:
group_chart(data_perf,'alarm_name',10,'주의')
group_chart(data_perf,'alarm_name',10,'경고')
group_chart(data_perf,'alarm_name',10,'심각')

In [256]:
def line_chart(df,group,line_type,num,title):
    df = df.groupby([group,'degree'])['time'].count().reindex().reset_index()
    df = df.pivot_table(values='time', index=group, columns='degree', aggfunc='first',fill_value=0).reset_index()
    df.columns = ['시간','경고','심각','주의']
    df = df[['시간','주의','경고','심각']]
    trace1 = {
      "name" : "주의",
      "x": list(df['시간']),
      "y": list(df['주의']),
      "mode": line_type, 
      "type": "scatter"
    }
    trace2 = {
      "name" : "경고",
      "x": list(df['시간']),
      "y": list(df['경고']),
      "mode": line_type, 
      "type": "scatter"
    }
    trace3 = {
      "name" : "심각",
      "x": list(df['시간']),
      "y": list(df['심각']),
      "mode": line_type, 
      "type": "scatter"
    }
    data = Data([trace1, trace2, trace3])
    layout = {
      "autosize": True, 
      "title": {
        "x": 0.5, 
        "text": "{}별 알람 추이".format(title)
      }, 
      "xaxis": {
        "automargin": True, 
        "autorange": True, 
        "domain": [0, 1], 
        "exponentformat": "none", 
        "nticks": num, 
        "side": "bottom", 
        "tickformat": "", 
        "title": {"text": "{}".format(title)}, 
        "type": "category"
      }, 
      "yaxis": {
        "autorange": True, 
        "domain": [0.3, 1], 
        "exponentformat": "none", 
        "showticklabels": True, 
        "title": {"text": "알람 수"}, 
        "type": "linear"
      }
    }
    fig = Figure(data=data, layout=layout)
    iplot(fig)

In [None]:
data = data[(data.time_date >= '2018-04-01')&(data.time_date < '2019-04-01')]

import datetime

data['time'] = data['time'] - datetime.timedelta(days=90)

data['time_month'] = data['time'].dt.month

In [None]:
line_chart(data_after,'time_date',"lines",5,"일(Day)")
line_chart(data_after,'time_week',"markers+lines",40,"주(Week)")
line_chart(data_after,'time_month',"markers+lines",40,"월(Month)")
line_chart(data_after,'time_hour',"markers+lines",23,"시간대")
line_chart(data_after,'time_day',"markers+lines",40,"일(한달 기준)")
line_chart(data_after,'time_weekday',"markers+lines",40,"요일(일주일 기준)")

In [None]:
def group_chart_log(df,group,num):
    df = df.groupby([group])['time'].count().reindex().reset_index()
    df['per'] = df['time']/df['time'].sum(axis=0)*100
    df.columns = ['그룹','건수','비율']
    df = df[['그룹','건수','비율']].sort_values('건수',ascending=False).iloc[:num].reset_index()
    df.index = df.index + 1
    trace1 = {
      "x": list(df['그룹']), 
      "y": list(df['건수']), 
      "name": "",
      "orientation": "v", 
      "type": "bar", 
      "visible": True, 
      "xaxis": "x",
      "text": list(df['비율'].round(1).astype('str')+" %"), 
      "textfont": {
        "color": "rgb(255, 0, 6)", 
        "family": "Roboto", 
        "size": 15
      }, 
      "textposition": "auto", 
    }
    trace2 = {
      "cells": {
        "align": "center", 
        "fill": {"color": "rgb(255, 255, 255)"}, 
        "font": {
          "color": "rgb(48, 55, 66)", 
          "family": "Arial", 
          "size": 13
        }, 
        "height": 25, 
        "line": {
          "color": "rgb(171, 191, 212)", 
          "width": 1
        }, 
        "values": [list(df.index),list(df['그룹']), list(df['건수'].astype(str)+" 건"), list(df['비율'].round(1).astype(str)+" %")]
        },
        "domain": {
            "x": [0, 1], 
            "y": [0, 0.3]
          },
          "header": {
            "fill": {"color": "rgb(31, 119, 180)"}, 
            "font": {
              "color": "rgb(255, 255, 255)", 
              "size": 15
            }, 
            "height": 25, 
            "line": {"color": "rgb(140, 177, 214)"}, 
            "values": ['No','이름','건수','비율']
            },  "columnwidth": [10,20,10,10], "type": "table" }
    data = Data([trace1,trace2])
    layout = {
      "autosize": True, 
      "colorway": ['#5B9BD5'], 
      "font": {
        "color": "rgb(34, 43, 59)", 
        "size": 13
      }, 
      "showlegend": False, 
      "title": {
        "font": {"size": 20}, 
        "text": ""
      }, 
      "xaxis": {
        "autorange": True, 
        "domain": [0, 1], 
        "type": "category"
      }, 
      "yaxis": {
        "autorange": True,  
        "domain": [0.5, 1], 
        "type": "linear",
        "exponentformat": "none",
        "automargin": True
      }
    }
    fig = Figure(data=data, layout=layout)
    iplot(fig)

### 로그 데이터 총 현황

In [None]:
data_log.alarm_name.unique()
data_log.groupby(['alarm_name'])['time'].count().reindex().reset_index().sort_values('time',ascending=False)

### 1. 로그 모니터

In [None]:
df_log = data_log[data_log['alarm_name'] == "로그 모니터"]
group_chart_log(df_log,'hostname',10)

### 로그모니터 발생 종류

In [None]:
def len_condition(name):
    print('{} : {} 건'.format(name,len(df[df['condition'].str.contains(name) == True])))

In [None]:
df = data_log.groupby(['alarm_name','resource_name'])['time'].count().reindex().reset_index()
df = df[df['alarm_name'] == '로그 모니터']
df.sort_values('time',ascending=False).head()

### 1) Alert_BDALMDB

In [None]:
df = data_log[data_log['resource_name'] == "Alert_BDALMDB"]
len_condition('ORA-01422')
len_condition('ORA-00600')
len_condition('ORA-1688')
len_condition('ORA-00060')
len_condition('ORA-1652')
len_condition('ORA-04030')
len_condition('ORA-1683')
len_condition('ORA-1653')
len_condition('ORA-20011')
len_condition('ORA-00604')

### 2) D-ERRLOG	

In [None]:
df = data_log[data_log['resource_name'] == "D-ERRLOG"]
len_condition('스레드 종료 또는 응용 프로그램 요청 때문에 I/O 작업이 취소되었습니다')
len_condition('Error: 9954')
len_condition('디스크 공간이 부족합니다')
len_condition('ETW')
len_condition('요청을 취소했습니다')

### 3) [DBA_AL]

In [None]:
df = data_log[data_log['resource_name'] == "[DBA_AL]"]
len_condition('Services Deadlock detected')
len_condition('ORA-27037')
len_condition('Thread 1 cannot allocate new log')
len_condition('ORA-20011')
len_condition('ORA-1688')
len_condition('ORA-00060')
len_condition('ORA-20000')
len_condition('ORA-1652')

### 4) Jennifer

In [None]:
df = data_log[data_log['resource_name'] == "Jennifer Log"]
len_condition('Que')

### 5) 백업에러

In [None]:
df = data_log[data_log['resource_name'] == "백업에러"]
len_condition('error Backup')
len_condition('duplicate_job error')

### 2. Syslog 모니터

In [None]:
df_log = data_log[data_log['alarm_name'] == "Syslog 모니터"]
group_chart_log(df_log,'hostname',10)

In [None]:
df = data_log[data_log['alarm_name'] == "Syslog 모니터"]
df.sort_values('time',ascending=False)
len_condition('namespace root/cimv2')
len_condition('NFS')
len_condition('EMS Event Notification')
len_condition('ERRPT')
len_condition('cluster daemon')
len_condition('ISCSI_ERR_TCP_CONN_CLOSE')
#df[(df['condition'].str.contains('namespace root/cimv2')==False)&(df['condition'].str.contains('NFS')==False)&(df['condition'].str.contains('EMS Event Notification')==False)
  #&(df['condition'].str.contains('ERRPT')==False)&(df['condition'].str.contains('cluster daemon')==False)&(df['condition'].str.contains('ISCSI_ERR_TCP_CONN_CLOSE')==False)]

### 3. SFM 체크

In [None]:
df_log = data_log[data_log['alarm_name'] == "SFM 체크"]
group_chart_log(df_log,'hostname',10)

In [None]:
df = data_log.groupby(['alarm_name','condition'])['time'].count().reindex().reset_index()
df = df[df['alarm_name'] == 'SFM 체크']
len_condition('Port World-wide name for')
len_condition('The legacy lun path registr')
len_condition('script result nomatch')
len_condition('Target path had gone offlin')
len_condition('Async write failed')
len_condition('Fibre Channel Driver receiv')
len_condition('Target path state change')

### 클러스터

In [None]:
data_df = data_perf.groupby(['hostname'])['time'].count().reindex().reset_index()
data_df.sort_values('time',ascending=False)
len(data_df[(data_df.time >= 1)&(data_df.time < 1000)])

In [None]:
data_df = data_perf.groupby(['hostname'])['time'].count().reindex().reset_index()
data_df = data_df.rename(columns={'time': 'count'})
data_df.describe()

In [250]:
data_df = data_df[data_df['count'] > 0]
node_names = data_df.sort_values('count',ascending=False).hostname.values
c_nodes = data[data['hostname'].isin(node_names)==True]
data_df_date = c_nodes.groupby(['time_date','hostname'])['degree'].count().reindex().reset_index()
data_df_date = data_df_date.pivot_table(values='degree',columns='hostname',index=data_df_date['time_date'],aggfunc='first',fill_value=0)

In [251]:
corrmat = data_df_date.corr()

In [252]:
clusters = []
for node in corrmat.columns:
    clusters.append(corrmat[corrmat[node] >= 0.9].index)
clusters_new = []
for cluster in clusters:
    if len(cluster) > 1:
        clusters_new.append(list(cluster))
clusters_new = list(set([tuple(set(node)) for node in clusters_new]))

dictt = {}
for i in range(0,len(clusters_new)):
    dictt.update({'cluster{}'.format(i) : list(clusters_new[i])})

data_df_date = data_df_date.reset_index()
#pd.concat([pd.Series(clusters_new[0],name='cluster0'),pd.Series(clusters_new[1],name='cluster1')], axis=1)
#for i in range(2,len(clusters_new)):
#    df = pd.concat([df,pd.Series(clusters_new[i],name='cluster{}'.format(i))], axis=1)
#df.fillna('',inplace=True)

In [253]:
len(clusters_new)

77

In [254]:
def chart_cluster(clusters_new,df):
    i = 0
    for cluster in clusters_new:
        print('>>>>>>>>>cluster{}'.format(i))
        test_data = pd.melt(data_df_date, id_vars=['time_date'],value_vars=cluster)
        fig = plt.figure(figsize=(15, 7))
        ax = sns.pointplot(x="time_date", y="value",hue=test_data.hostname, data=test_data)
        plt.xticks(rotation='vertical')
        plt.show()
        plt.clf()
        try:
            c_nodes = df[df['hostname'].isin(cluster)]
        except:
            c_nodes = df[df['hostname'].str.contains(cluster)]
        c_nodes = c_nodes.groupby(['time_date','alarm_name','hostname'])['time'].count().reindex().reset_index().sort_values('time_date', ascending=True)
        c_nodes = c_nodes.groupby(['alarm_name','hostname'])['time'].sum().reindex().reset_index().sort_values('time', ascending=False)
        c_nodes = c_nodes[c_nodes.time > 100]
        display_side_by_side(c_nodes)
        i += 1

In [None]:
corrmat[corrmat['SCCLD301'] > 0.6]

In [None]:
chart_cluster([('SCMSG004R', 'SCMSG006R')],data_perf)

In [None]:
chart_cluster([('SCMSG005R', 'SCMSG003R', 'SCMSG002R')],data_perf)

In [None]:
chart_cluster([('SCFEM001R', 'SCFEM002R','SCFEM421R','SCFEM431R')],data_perf)

In [None]:
chart_cluster([('pedwdb1', 'pedwdb2')],data_perf)

In [None]:
chart_cluster([('SCCLD301')],data_perf)

In [None]:
chart_cluster([('DRFEM001R')],data_perf)

In [None]:
chart_cluster([('ptrepap1', 'ptrepap2')],data_perf)

### ptrepap1, ptrepap2 번 중 1번에만 NTP 알람이 발생함 1번 서버 NTP 확인 필요

In [None]:
chart_cluster([('SCSAH202T')],data_perf)
data_perf[data_perf.alarm_name.str.contains("B-UCServerMain120")==True].last_time.describe()

### B-UCServerMain120X 관련 프로세스 다운 알람 47556건 중 75%가 평균 3~4분 이내에 복구 되었음
### 따라서 일시적인 재기동 상황이라면 연속발생 3~4회 권고

In [None]:
def scatter_chart(alarm_name, clusters_new,df):
    df = df[(df.alarm_name == alarm_name) & (df['hostname'].isin(clusters_new))]
    condition  = df.condition.str.split(' ', expand=True)
    df['type'] = condition[0]
    df['threshold'] = condition[10].str.extract('(\d+(?:\.\d+)?)').astype(float)
    df['current'] = condition[4].str.extract('(\d+(?:\.\d+)?)').astype(float)
    df['current'] = np.where(condition[3]=='Gbps',df['current']*1000,df['current'])
    df_raw = df.groupby(['time','hostname','degree'])['current'].mean().reindex().reset_index()
    df_hour = df.groupby(['time_date_hour','hostname','degree'])['current'].agg(['mean', 'min', 'max']).reindex().reset_index()
    for i,node in enumerate(clusters_new):
        print(node)
        df_2 = df_raw[df_raw['hostname'] == node]
        df_2 = df_2.pivot_table(values='current', index=df_2.time, columns='degree', aggfunc='first',fill_value=(np.NaN)).reset_index()
        df_2['merge'] = df_2.sum(axis=1)
        df_2.to_excel(str(i)+'.xlsx',encoding='UTF-8')
        df_3 = df_hour[df_hour['hostname'] == node]
    #df_3.to_excel(str(i)+'_hour.xlsx',encoding='UTF-8')
    #df_2 = pd.melt(df_2,id_vars=['time'],value_vars=['주의','경고','심각'])
    #df_2['merge'] = df_2.sum(axis=1)
    #display(df_2)
        fig = plt.figure(figsize=(15, 7))
        ax = sns.scatterplot(x="time", y="merge" ,data=df_2)
        plt.xlim('2018-03-30', '2019-4-30')
        plt.xticks(rotation='horizontal')
        plt.show()
        plt.clf()
 

In [None]:
scatter_chart('NTP 서버와 시간 차이',['SCMSG004R', 'SCMSG006R'],data_perf)

In [None]:
scatter_chart('NTP 서버와 시간 차이',['SCMSG005R', 'SCMSG003R', 'SCMSG002R'],data_perf)

In [None]:
#주의 > 90 (3회)
#경고 > 95
#심각 > 98

In [None]:
df = data_perf[(data_perf.alarm_name == "Disk I/O 처리율") & (data_perf['hostname'].isin(['SCFEM001R', 'SCFEM002R','SCFEM421R','SCFEM431R']))]
condition  = df.condition.str.split(' ', expand=True)
#display(condition.head())
df['threshold'] = condition[6].str.extract('(\d+(?:\.\d+)?)').astype(float)
df['current'] = condition[3].str.extract('(\d+(?:\.\d+)?)').astype(float)
df_raw = df.groupby(['time','last_time','hostname','degree'])['current'].mean().reindex().reset_index()
df_hour = df.groupby(['time_date_hour','hostname','degree'])['current'].agg(['mean', 'min', 'max']).reindex().reset_index()
df_day = df.groupby(['time_date','hostname','degree'])['current'].count().reindex().reset_index()
df_day = df_day[df_day['hostname']=="SCFEM002R"]
df_day.sort_values('time_date',ascending=False)[['degree','current']]
#df_raw[df_raw['degree'] == "심각"].describe()
#df_raw[df_raw['last_time'] > 1800].describe()
#clusters_new = [('SCFEM001R', 'SCFEM002R','SCFEM421R','SCFEM431R')]
#test = df[['degree','time','clear_time','last_time','hostname','threshold','current']]
#test


In [None]:
fig = plt.figure(figsize=(15, 7))
ax = sns.scatterplot(x="current", y="last_time",hue="degree" ,data=test)
#plt.xlim('2018-03-30', '2019-4-30')
plt.xticks(rotation='horizontal')
plt.show()
plt.clf()

In [None]:
df = data_perf[(data_perf.alarm_name == "Disk I/O 처리율") & (data_perf['hostname'].isin(['SCFEM001R', 'SCFEM002R','SCFEM421R','SCFEM431R']))]
condition  = df.condition.str.split(' ', expand=True)
#display(condition.head())
df['threshold'] = condition[6].str.extract('(\d+(?:\.\d+)?)').astype(float)
df['current'] = condition[3].str.extract('(\d+(?:\.\d+)?)').astype(float)
df_raw = df.groupby(['time','last_time','hostname','degree'])['current'].mean().reindex().reset_index()
df_hour = df.groupby(['time_date_hour','hostname','degree'])['current'].agg(['mean', 'min', 'max']).reindex().reset_index()
df_raw
#df_raw[df_raw['degree'] == "심각"].describe()
#df_raw[df_raw['last_time'] > 1800].describe()

'''
i = 0
for cluster in clusters_new:
    print('>>>>>>>>>cluster{}'.format(i))
    test_data = pd.melt(data_df_date, id_vars=['time_date'],value_vars=cluster)
    fig = plt.figure(figsize=(15, 7))
    ax = sns.pointplot(x="time_date", y="value",hue=test_data.hostname, data=test_data)
    plt.xticks(rotation='vertical')
    plt.show()
    plt.clf()
    i += 1
'''
test = df[['degree','time','clear_time','last_time','hostname','threshold','current']]
test
#test.degree = np.where(test['current']>90, "주의",test['degree'])
#test.degree = np.where(test['current']>95, "경고",test['degree'])
#test.degree = np.where(test['current']>98, "심각",test['degree'])
test.last_time
sns.distplot(test.last_time)

In [None]:
def scatter_chart_new(df,group,num):
    trace1 = {
      "x": ["주의222222222222222222222", "경고", "심각"], 
      "y": ["316153", "476589", "121969"], 
      "mode": "markers", 
      "type": "scatter"
    }
    data = Data([trace1])
    layout = {
      "autosize": True, 
      "xaxis": {
        "autorange": True,
        "type": "category"
      }, 
      "yaxis": {
        "autorange": True, 
        "type": "linear"
      }
    }
    fig = Figure(data=data, layout=layout)
    plot_url = py.plot(fig)