In [None]:
import pandas as pd
import numpy as np
import warnings
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
#%matplotlib notebook
from matplotlib import font_manager, rc
import scipy.cluster.hierarchy as spc
font_name = font_manager.FontProperties(fname="/usr/share/fonts/nanum/nanumGothic.ttf").get_name()
plt.rc('font', family=font_name)
plt.rc('style')
import warnings
warnings.filterwarnings(action='ignore') 
pd.options.display.float_format = '{:.2f}'.format
pd.options.display.max_colwidth=3000

In [None]:
from IPython.utils.path import get_ipython_dir
print(get_ipython_dir())

In [None]:
import plotly.plotly as py
from plotly.graph_objs import *
py.sign_in('kyoh', 'xLxYyOTECJ48ofwzrn2j')
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [None]:
from IPython.display import display_html
def display_side_by_side(*args):
    html_str=''
    for df in args:
        html_str+=df.to_html()
    display_html(html_str.replace('table','table style="display:inline"'),raw=True)

## 로드 데이터

In [None]:
data = pd.read_excel('알람 목록.xls', encoding='UTF-8')

In [None]:
#행렬 확인
data.shape

In [None]:
#불필요 컬럼 제거
cols = ['처리 현황', '담당자', '처리 이력', '확인 시간', '유지보수 상태']
data.drop(columns=cols,axis=1,inplace=True)

In [None]:
#컬럼 이름 수정
data.columns = ['degree', 'alarm_name','time','clear_time','last_time','group_path','system_name','hostname','ip','resource_name','full_path','condition']

## 1. 알람 데이터 로드 및 전처리

In [None]:
#컬럼 이름 수정
data.columns = ['degree', 'alarm_name','time','clear_time','last_time','group_path','system_name','hostname','ip','resource_name','full_path','condition']

In [None]:
#추가 컬럼 생성
data['time'] = pd.to_datetime(data['time'], format='%Y-%m-%d %H:%M:%S')
data['clear_time'] = pd.to_datetime(data['clear_time'], format='%Y-%m-%d %H:%M:%S')
data['last_time'] = data['clear_time'] - data['time']
data['last_time'] = data['last_time'].dt.seconds
data['last_time'] = data['last_time'].fillna(0)
data['time_month'] = data['time'].dt.month
data['time_day'] = data['time'].dt.day
data['time_hour'] = data['time'].dt.hour
data['time_date']= data['time'].dt.strftime('%Y-%m-%d')
data['time_date_hour']= data['time'].dt.strftime('%Y-%m-%d %H')
data['time_weekday'] = data['time'].dt.weekday
data['time_week'] = data['time'].dt.week
full_group = data['group_path'].str.split('>', expand=True)
data['group1'] = full_group[0]
data['group2'] = full_group[1]
#data['group3'] = full_group[2]
#data['group4'] = full_group[3]
ipaddr = data['ip'].str.split('.', expand=True)
ipaddr[3] = "0"
data['C_class']= ipaddr[0] + '.' + ipaddr[1] + '.' + ipaddr[2] + '.' + ipaddr[3]
ipaddr[3] = "0"
ipaddr[2] = "0"
data['B_class']= ipaddr[0] + '.' + ipaddr[1] + '.' + ipaddr[2] + '.' + ipaddr[3]

In [None]:
data = data[~(data['degree'] == '해제')]

In [None]:
#데이터 확인
data.head()

### 2. 알람 발생 현황 분석

#### (1) 전체 알람 데이터 현황

In [None]:
def result_degree_group(degree,group):
    data_after_df = data_after[data_after['degree'] == degree]
    data_before_df = data_before[data_before['degree'] == degree]
    print('>>>>>>{}'.format(degree))
    for i in group:
        print('>>>>>>>>>'+i)
        print(data_before_label)
        display(data_before_df[data_before_df['group2'].str.contains(i)].groupby(['alarm_name'])['time'].count().reindex().reset_index().sort_values('time',ascending=False)[:10])
        print(data_after_label)
        display(data_after_df[data_after_df['group2'].str.contains(i)].groupby(['alarm_name'])['time'].count().reindex().reset_index().sort_values('time',ascending=False)[:10])

In [None]:
result_degree_group('심각',['외환','통합그룹웨어','신인터넷뱅킹','기타','\(구\)인터넷뱅킹\/SMS'])

In [None]:
result_degree_group('경고',['기타','외환','단위업무','CRM\/COLD','로이터'])

In [None]:
result_degree_group('주의',['CRM\/COLD','외환','단위업무','계정계','통합그룹웨어'])

In [None]:
def df_degree(df,title):
    levels = ['주의','경고','심각']
    degree_total = df.groupby(['degree'])['time'].count().reindex(levels).reset_index()
    degree_total['per'] = degree_total.time/degree_total.sum().time * 100
    degree_total.columns = ['degree','count','per']
    degree_total.sort_values('degree')
    trace1 = {
      "name" : "",
      "domain": {
    "x": [0, 0.5], 
    "y": [0.25, 1]
              }, 
      "labels": ["주의", "경고", "심각"], 
      "type": "pie", 
      "values": list(degree_total['count']),
      "hole": .4,
        }
    trace2 = {
      "cells": {
        "align": "center", 
        "fill": {"color": "rgb(255, 255, 255)"}, 
        "font": {
          "color": "rgb(48, 55, 66)", 
          "family": "Arial", 
          "size": 19
        }, 
        "height": 33, 
        "line": {
          "color": "rgb(171, 191, 212)", 
          "width": 1
        }, 
        "values": [list(degree_total['degree']), list(degree_total['count'].astype(str)+" 건"), list(degree_total['per'].round().astype(str)+" %")]
        },
        "domain": {
            "x": [0.5, 1], 
            "y": [0.25, 0.75]
          }, 
          "header": {
            "fill": {"color": "rgb(31, 119, 180)"}, 
            "font": {
              "color": "rgb(255, 255, 255)", 
              "size": 24
            }, 
            "height": 37, 
            "line": {"color": "rgb(140, 177, 214)"}, 
            "values": ['등급','건수','비율']
            }, "type": "table" }
    data = Data([trace1, trace2])
    layout = {
      "autosize": True, 
      "colorway": ["#FF7F0E",'#FFC90E',"#D62728"], 
      "font": {"size": 20},
      "legend": {
            "x": 0.3961578483357668, 
            "y": 1.1245210727969348
       }, 
      "title": {"text": title}, 
      "xaxis": {
            "autorange": True, 
            "range": [-1, 6]
              }, 
      "yaxis": {
            "autorange": True, 
            "range": [-1, 4]
      }
    }
    fig = Figure(data=data, layout=layout)
    iplot(fig)
    #display(degree_total)

In [None]:
df_degree(data,'총 발생 알람')

In [None]:
data_log = data[data.condition.str.contains('이벤트 탐지') == True]

#### (2) 일반 알람 등급별 발생 현황

In [None]:
df_degree(data,'성능 알람 건수')

In [None]:
def group_chart(df,group,num,degree,title):
    if degree == "주의":
        color = ["#FFC90E"]
    if degree == "경고":
        color = ["#FF7F0E"]
    if degree == "심각":
        color = ["#D62728"]
    df = df.groupby([group,'degree'])['time'].count().reindex().reset_index().pivot_table(values='time', index=group, columns='degree', aggfunc='first',fill_value=0).reset_index()
    df['sum'] = df.sum(axis=1)
    df['per'] = df[degree]/df[degree].sum(axis=0)*100
    #df = df.rename(columns={'' : '그룹','sum' : '총합', 'per':'비율'})
    df.columns = ['그룹','경고','심각','주의','총합','비율']
    df = df[['그룹','주의','경고','심각','총합','비율']].sort_values(degree,ascending=False).iloc[:num]
    trace1 = {
      "x": list(df[degree]), 
      "y": list(df['그룹']), 
      "name": degree, 
      "orientation": "h", 
      "type": "bar", 
      "visible": True, 
      "xaxis": "x"
    }
    trace2 = {
      "cells": {
        "align": "center", 
        "fill": {"color": "rgb(255, 255, 255)"}, 
        "font": {
          "color": "rgb(48, 55, 66)", 
          "family": "Arial", 
          "size": 13
        }, 
        "height": 25, 
        "line": {
          "color": "rgb(171, 191, 212)", 
          "width": 1
        }, 
        "values": [list(df['그룹']), list(df[degree].astype(str)+" 건"), list(df['비율'].round().astype(str)+" %")]
        },
        "domain": {
            "x": [0.3, 1], 
            "y": [0, 1]
          },
          "header": {
            "fill": {"color": "rgb(31, 119, 180)"}, 
            "font": {
              "color": "rgb(255, 255, 255)", 
              "size": 15
            }, 
            "height": 25, 
            "line": {"color": "rgb(140, 177, 214)"}, 
            "values": ['이름','건수','비율']
            },  "columnwidth": [20,10,10], "type": "table" }
    data = Data([trace1,trace2])
    layout = {
      "autosize": True, 
      "colorway": color, 
      "font": {
        "color": "rgb(34, 43, 59)", 
        "size": 13
      }, 
      "legend": {
        "x": -0.11808242767074219, 
        "y": 1.10727969348659, 
        "font": {"size": 20}
      }, 
      "showlegend": True, 
      "title": {
        "font": {"size": 20}, 
        "text": title
      }, 
      "xaxis": {
        "autorange": True, 
        "domain": [0, 0.3], 
        "exponentformat": "none", 
        "range": [0, 501672.63157894736], 
        "type": "linear"
      }, 
      "yaxis": {
        "autorange": "reversed", 
        "range": [-0.5, 2.5], 
        "domain": [1, 0.2], 
        "type": "category",
        "automargin": True
      }
    }
    fig = Figure(data=data, layout=layout)
    iplot(fig)
    display(df[['그룹',degree,'총합','비율']])

In [None]:
def group_chart2(df,group,group2,num,degree,title):
    if degree == "주의":
        color = ["#FFC90E"]
    if degree == "경고":
        color = ["#FF7F0E"]
    if degree == "심각":
        color = ["#D62728"]
    df2 = df.groupby([group,'degree'])['time'].count().reindex().reset_index()
    df2 = df2[df2['degree']==degree]
    df2.columns = ['호스트명','등급','건수']
    df2 = df2[['호스트명','등급','건수']].sort_values('건수',ascending=False).iloc[:num]
    df = df.groupby([group,group2,'degree'])['time'].count().reindex().reset_index()
    df = df[df['degree']==degree]
    df['per'] = df['time']/df['time'].sum(axis=0)*100
    df.columns = ['호스트명','알람이름','등급','건수','비율']
    df = df[['호스트명','알람이름','등급','건수','비율']].sort_values('건수',ascending=False).iloc[:num]
    trace1 = {
      "x": list(df2['건수']), 
      "y": list(df2['호스트명']), 
      "name": degree, 
      "orientation": "h", 
      "type": "bar", 
      "visible": True, 
      "xaxis": "x"
    }
    trace2 = {
      "cells": {
        "align": "center", 
        "fill": {"color": "rgb(255, 255, 255)"}, 
        "font": {
          "color": "rgb(48, 55, 66)", 
          "family": "Arial", 
          "size": 13
        }, 
        "height": 25, 
        "line": {
          "color": "rgb(171, 191, 212)", 
          "width": 1
        }, 
        "values": [list(df['호스트명']),list(df['알람이름']), list(df['건수'].astype(str)+" 건"), list(df['비율'].round().astype(str)+" %")]
        },
        "domain": {
            "x": [0.3, 1], 
            "y": [0, 1]
          },
          "header": {
            "fill": {"color": "rgb(31, 119, 180)"}, 
            "font": {
              "color": "rgb(255, 255, 255)", 
              "size": 15
            }, 
            "height": 25, 
            "line": {"color": "rgb(140, 177, 214)"}, 
            "values": ['호스트명','알람이름','건수','비율']
            },  "columnwidth": [10,20,10,10], "type": "table" }
    data = Data([trace1,trace2])
    layout = {
      "autosize": True, 
      "colorway": color, 
      "font": {
        "color": "rgb(34, 43, 59)", 
        "size": 13
      }, 
      "legend": {
        "x": -0.11808242767074219, 
        "y": 1.10727969348659, 
        "font": {"size": 20}
      }, 
      "showlegend": True, 
      "title": {
        "font": {"size": 20}, 
        "text": title
      }, 
      "xaxis": {
        "autorange": True, 
        "domain": [0, 0.3], 
        "exponentformat": "none", 
        "range": [0, 501672.63157894736], 
        "type": "linear"
      }, 
      "yaxis": {
        "autorange": "reversed", 
        "range": [-0.5, 2.5], 
        "domain": [1, 0.2], 
        "type": "category",
        "automargin": True
      }
    }
    fig = Figure(data=data, layout=layout)
    iplot(fig)

In [None]:
def group_chart_total(df,group,num,title):
    df = df.groupby([group])['time'].count().reindex().reset_index()
    df['sum'] = df.sum(axis=1)
    df['per'] = df['time']/df['time'].sum(axis=0)*100
    df.columns = ['그룹','건수','총합','비율']
    df = df[['그룹','건수','총합','비율']].sort_values('건수',ascending=False).iloc[:num]
    trace1 = {
      "x": list(df['건수']), 
      "y": list(df['그룹']), 
      "name": '총 건수', 
      "orientation": "h", 
      "type": "bar", 
      "visible": True, 
      "xaxis": "x"
    }
    trace2 = {
      "cells": {
        "align": "center", 
        "fill": {"color": "rgb(255, 255, 255)"}, 
        "font": {
          "color": "rgb(48, 55, 66)", 
          "family": "Arial", 
          "size": 13
        }, 
        "height": 25, 
        "line": {
          "color": "rgb(171, 191, 212)", 
          "width": 1
        }, 
        "values": [list(df['그룹']), list(df['건수'].astype(str)+" 건"), list(df['비율'].round().astype(str)+" %")]
        },
        "domain": {
            "x": [0.3, 1], 
            "y": [0, 1]
          },
          "header": {
            "fill": {"color": "rgb(31, 119, 180)"}, 
            "font": {
              "color": "rgb(255, 255, 255)", 
              "size": 15
            }, 
            "height": 25, 
            "line": {"color": "rgb(140, 177, 214)"}, 
            "values": ['이름','건수','비율']
            },  "columnwidth": [20,10,10], "type": "table" }
    data = Data([trace1,trace2])
    layout = {
      "autosize": True, 
      "font": {
        "color": "rgb(34, 43, 59)", 
        "size": 13
      }, 
      "legend": {
        "x": -0.11808242767074219, 
        "y": 1.10727969348659, 
        "font": {"size": 20}
      }, 
      "showlegend": True, 
      "title": {
        "font": {"size": 20}, 
        "text": title
      }, 
      "xaxis": {
        "autorange": True, 
        "domain": [0, 0.3], 
        "exponentformat": "none", 
        "range": [0, 501672.63157894736], 
        "type": "linear"
      }, 
      "yaxis": {
        "autorange": "reversed", 
        "range": [-0.5, 2.5], 
        "domain": [1, 0.2], 
        "type": "category",
        "automargin": True
      }
    }
    fig = Figure(data=data, layout=layout)
    iplot(fig)
    display(df)

### 년도 별 장비 전체 알람 TOP10

In [None]:
group_chart_total(data,'system_name',10,'장비 전체 알람 TOP10')

### 년도 별 장비 등급별 알람 TOP10

In [None]:
group_chart(data,'system_name',10,'주의','장비 등급별 알람 TOP10')
group_chart(data,'system_name',10,'경고','장비 등급별 알람 TOP10')
group_chart(data,'system_name',10,'심각','장비 등급별 알람 TOP10')

### 04월 11일 전체 TOP 10

In [None]:
group_chart_total(data[data.time_date == '2019-04-11'],'alarm_name',10,'2019-04-11')
group_chart_total(data[data.time_date == '2019-04-14'],'alarm_name',10,'2019-04-14')

In [None]:
group_chart_total(data[data.time_date == '2019-04-06'],'alarm_name',10,'2019-04-06')

### 04월 11일 등급별 TOP 10

In [None]:
#group_chart(data[data.time_date == '2019-04-11'],'alarm_name',10,'주의','2019-04-11(주의)')
group_chart(data[data.time_date == '2019-04-11'],'alarm_name',10,'경고','2019-04-11(경고)')
group_chart(data[data.time_date == '2019-04-11'],'alarm_name',10,'심각','2019-04-11(심각)')

In [None]:
group_chart(data[data.time_date == '2019-03-31'],'alarm_name',10,'경고','2019-04-11(경고)')
group_chart(data[data.time_date == '2019-04-03'],'alarm_name',10,'경고','2019-04-11(경고)')

### 01월 20일 장비별 전체 등급  TOP 10 / 02월 05일 장비별 전체 등급  TOP 10

In [None]:
group_chart_total(data[data.time_date == '2019-04-11'],'system_name',10,'2019-04-11')

In [None]:
group_chart(data,'group2',10,'주의','주의_전체')
group_chart(data,'group2',10,'경고','경고_전체')
group_chart(data,'group2',10,'심각','심각_전체')

In [None]:
group_chart_total(data,'alarm_name',10,'전체')

In [None]:
group_chart(data,'group2',10,'주의','주의')
group_chart(data,'group2',10,'경고','경고')
group_chart(data,'group2',10,'심각','심각')

In [None]:
group_chart(data,'alarm_name',10,'주의','주의')
group_chart(data,'alarm_name',10,'경고','경고')
group_chart(data,'alarm_name',10,'심각','심각')

In [None]:
group_chart2(data,'hostname','alarm_name',10,'주의','주의')
group_chart2(data,'hostname','alarm_name',10,'경고','경고')
group_chart2(data,'hostname','alarm_name',10,'심각','심각')

In [None]:
def rank_chart(df,group):
    df = df.groupby([group,'degree'])['time'].count().reindex().reset_index().pivot_table(values='time', index=group, columns='degree', aggfunc='first',fill_value=0)
    df['sum'] = df.sum(axis=1)
    df = df.sort_values('sum',ascending=False).reset_index()
    df.columns = ['호스트명','경고','심각','주의','총합']
    df.index = df.index + 1
    table_1 = df[:10]
    table_2 = df[10:20]
    df =df[:15]
    trace1 = {
      "x": list(df['호스트명']), 
      "y": list(df['주의']), 
      "marker": {
        "color": "#FFC90E", 
        "line": {"width": 1}
      }, 
      "opacity": 0.6, 
      "name": "주의", 
      "orientation": "v", 
      "textposition": "none", 
      "type": "bar", 
    }
    trace2 = {
      "x": list(df['호스트명']), 
      "y": list(df['경고']), 
      "marker": {
        "color": "#FF7F0E", 
        "line": {"width": 1}
      }, 
      "name": "경고", 
      "opacity": 0.6, 
      "orientation": "v",
      "type": "bar"
    }
    trace3 = {
      "x": list(df['호스트명']),
      "y": list(df['심각']), 
      "marker": {
        "color": "#D62728",
        "line": {"width": 1}
      }, 
      "name": "심각", 
      "opacity": 0.6, 
      "orientation": "v", 
      "type": "bar", 
    }
    trace4 = {
      "x": list(df['호스트명']),
      "y": list(df['총합']), 
      "cliponaxis": True, 
      "constraintext": "none", 
      "name" : "총합",
      "marker": {
        "color": "#2B62AD",
        "line": {"width": 1}
      }, 
      "opacity": 0.63, 
      "orientation": "v", 
      "text": list(df['총합'].astype('str')), 
      "textfont": {
        "color": "rgb(255, 0, 6)", 
        "family": "Roboto", 
        "size": 10
      }, 
      "textposition": "outside", 
      "type": "bar"
    }
    trace5 = {
      "cells": {
        "values": [
        list(table_1.index),list(table_1['호스트명']),list(table_1['주의']),list(table_1['경고']),list(table_1['심각']),list(table_1['총합'])
        ]
      }, 
      "domain": {
        "x": [0, 0.5], 
        "y": [0, 0.48]
      }, 
      "header": {
        "values": ["RANK", "HOSTNAME", "주의", "경고", "심각", "총합계"], 
        "valuessrc": "kyoh:14:429f25"
      }, 
      "type": "table"
    }
    trace6 = {
      "cells": {
        "values": [
        list(table_2.index),list(table_2['호스트명']),list(table_2['주의']),list(table_2['경고']),list(table_2['심각']),list(table_2['총합'])
    ]
      }, 
      "domain": {
        "x": [0.5, 1], 
        "y": [0, 0.48]
      }, 
      "header": {
        "values": ["RANK", "HOSTNAME", "주의", "경고", "심각", "총합계"], 
      }, 
      "type": "table"
    }
    data = Data([trace1, trace2, trace3, trace4])
    layout = {
      "dragmode": "zoom", 
      "hovermode": "closest", 
      "legend": {
        "x": 0.4534313725490196, 
        "y": 1.0802211091654588, 
        "orientation": "h", 
        "xanchor": "auto"
      }, 
      "xaxis": {
        "autorange": True, 
        "domain": [0, 1], 
        "fixedrange": False, 
        "nticks": 10, 
        "range": [-0.5, 9.5], 
        "rangeslider": {
          "autorange": True, 
          "range": [-0.5, 9.5], 
          "visible": False
        }, 
        "showline": False, 
        "showspikes": False, 
        "showticklabels": False, 
        "ticks": "", 
        "title": {"text": ""}, 
        "type": "category"
      }, 
      "yaxis": {
        "autorange": True, 
        "domain": [0.3, 1], 
        "exponentformat": "none", 
        "range": [0, 70986.31578947368], 
        "showspikes": False, 
        "ticks": ""
      }
    }
    fig = Figure(data=data, layout=layout)
    iplot(fig)

    trace5 = {
        "cells": {
        "align": "center", 
        "fill": {"color": "rgb(255, 255, 255)"}, 
        "font": {
        "color": "rgb(48, 55, 66)", 
        "family": "Arial", 
        "size": 13,},
        "height": 25, 
        "line": {
        "color": "rgb(171, 191, 212)", 
        "width": 1
            }, 
        "values": [
        list(table_1.index),list(table_1['호스트명']),list(table_1['주의']),list(table_1['경고']),list(table_1['심각']),list(table_1['총합'])
        ]
      }, 
      "domain": {
        "x": [0, 0.5], 
        "y": [0, 1]
      }, 
      "header": {
        "values": ["No", "HOSTNAME", "주의", "경고", "심각", "총합계"], 
                "fill": {"color": "rgb(31, 119, 180)"}, 
                "font": {
                  "color": "rgb(255, 255, 255)", 
                  "size": 15
      }},"columnwidth": [5,15,10,10,10],
      "type": "table"
    }
    trace6 = {
    "cells": {
        "align": "center", 
        "fill": {"color": "rgb(255, 255, 255)"}, 
        "font": {
        "color": "rgb(48, 55, 66)", 
        "family": "Arial", 
        "size": 13,},
        "height": 25, 
        "line": {
        "color": "rgb(171, 191, 212)", 
        "width": 1
            }, 
        "values": [
        list(table_2.index),list(table_2['호스트명']),list(table_2['주의']),list(table_2['경고']),list(table_2['심각']),list(table_2['총합'])
        ]
      }, 
      "domain": {
        "x": [0.5, 1], 
        "y": [0, 1]
      }, 
      "header": {
        "values": ["No", "HOSTNAME", "주의", "경고", "심각", "총합계"], 
                "fill": {"color": "rgb(31, 119, 180)"}, 
                "font": {
                  "color": "rgb(255, 255, 255)", 
                  "size": 15
      }},"columnwidth": [5,15,10,10,10],
         "type": "table"
    }
    data = Data([trace5, trace6])
    layout = {
      "dragmode": "zoom", 
      "hovermode": "closest", 
      "legend": {
        "x": 0.4534313725490196, 
        "y": 1.0802211091654588, 
        "orientation": "h", 
        "xanchor": "auto"
      }, 
      "xaxis": {
        "autorange": True, 
        "domain": [0, 1], 
        "fixedrange": False, 
        "nticks": 10, 
        "range": [-0.5, 9.5], 
        "rangeslider": {
          "autorange": True, 
          "range": [-0.5, 9.5], 
          "visible": False
        }, 
        "showline": False, 
        "showspikes": False, 
        "showticklabels": False, 
        "ticks": "", 
        "title": {"text": ""}, 
        "type": "category"
      }, 
      "yaxis": {
        "autorange": True, 
        "domain": [0.3, 1], 
        "exponentformat": "none", 
        "range": [0, 70986.31578947368], 
        "showspikes": False, 
        "ticks": ""
      }
    }
    fig = Figure(data=data, layout=layout)
    iplot(fig)

In [None]:
rank_chart(data,'system_name')

In [None]:
def line_chart(df,group,line_type,num,title):
    df = df.groupby([group,'degree'])['time'].count().reindex().reset_index()
    df = df.pivot_table(values='time', index=group, columns='degree', aggfunc='first',fill_value=0).reset_index()
    df.columns = ['시간','경고','심각','주의']
    df = df[['시간','주의','경고','심각']]
    trace1 = {
      "name" : "주의",
      "x": list(df['시간']),
      "y": list(df['주의']),
      "mode": line_type, 
      "type": "scatter"
    }
    trace2 = {
      "name" : "경고",
      "x": list(df['시간']),
      "y": list(df['경고']),
      "mode": line_type, 
      "type": "scatter"
    }
    trace3 = {
      "name" : "심각",
      "x": list(df['시간']),
      "y": list(df['심각']),
      "mode": line_type, 
      "type": "scatter"
    }
    data = Data([trace1, trace2, trace3])
    layout = {
      "autosize": True, 
      "title": {
        "x": 0.5, 
        "text": "{}별 알람 추이".format(title)
      }, 
      "xaxis": {
        "automargin": True, 
        "autorange": True, 
        "domain": [0, 1], 
        "exponentformat": "none", 
        "nticks": num, 
        "side": "bottom", 
        "tickformat": "", 
        "title": {"text": "{}".format(title)}, 
        "type": "category"
      }, 
      "yaxis": {
        "autorange": True, 
        "domain": [0.3, 1], 
        "exponentformat": "none", 
        "showticklabels": True, 
        "title": {"text": "알람 수"}, 
        "type": "linear"
      }
    }
    fig = Figure(data=data, layout=layout)
    iplot(fig)

In [None]:
line_chart(data,'time_date',"lines",5,"일(Day)")
line_chart(data,'time_week',"markers+lines",40,"주(Week)")
line_chart(data,'time_month',"markers+lines",40,"월(Month)")
line_chart(data,'time_hour',"markers+lines",23,"시간대")
line_chart(data,'time_day',"markers+lines",40,"일(한달 기준)")
line_chart(data,'time_weekday',"markers+lines",40,"요일(일주일 기준)")

In [None]:
data[data['alarm_name'] == '[DB]액티브 세션 수'].system_name.unique()

In [None]:
df = data.groupby(['hostname','time'])['system_name'].count().reset_index().reindex().pivot_table(values='system_name', index='time', columns='hostname', aggfunc='first',fill_value=0).reset_index().describe()

In [None]:
df = data[data.system_name=="npamis"].groupby(['alarm_name','time'])['condition'].count().reindex().reset_index().pivot_table(values='condition',columns='alarm_name',index='time',aggfunc='first',fill_value=0).describe()

### 클러스터

In [None]:
data_df = data.groupby(['hostname'])['time'].count().reindex().reset_index()
data_df.sort_values('time',ascending=False)
len(data_df[(data_df.time >= 1)&(data_df.time < 1000)])

In [None]:
data_df = data.groupby(['hostname'])['time'].count().reindex().reset_index()
data_df = data_df.rename(columns={'time': 'count'})
data_df.describe()

In [None]:
data_df = data_df[data_df['count'] > 0]
node_names = data_df.sort_values('count',ascending=False).hostname.values
c_nodes = data[data['hostname'].isin(node_names)==True]
data_df_date = c_nodes.groupby(['time_date','hostname'])['degree'].count().reindex().reset_index()
data_df_date = data_df_date.pivot_table(values='degree',columns='hostname',index=data_df_date['time_date'],aggfunc='first',fill_value=0)

In [None]:
corrmat = data_df_date.corr()

In [None]:
clusters = []
for node in corrmat.columns:
    clusters.append(corrmat[corrmat[node] >= 0.7].index)
clusters_new = []
for cluster in clusters:
    if len(cluster) > 1:
        clusters_new.append(list(cluster))
clusters_new = list(set([tuple(set(node)) for node in clusters_new]))

dictt = {}
for i in range(0,len(clusters_new)):
    dictt.update({'cluster{}'.format(i) : list(clusters_new[i])})

data_df_date = data_df_date.reset_index()
#pd.concat([pd.Series(clusters_new[0],name='cluster0'),pd.Series(clusters_new[1],name='cluster1')], axis=1)
#for i in range(2,len(clusters_new)):
#    df = pd.concat([df,pd.Series(clusters_new[i],name='cluster{}'.format(i))], axis=1)
#df.fillna('',inplace=True)

In [None]:
len(clusters_new)

In [None]:
clusters_new

In [None]:
def chart_cluster(clusters_new,df):
    i = 0
    for cluster in clusters_new:
        print('>>>>>>>>>cluster{}'.format(i))
        test_data = pd.melt(data_df_date, id_vars=['time_date'],value_vars=cluster)
        fig = plt.figure(figsize=(15, 7))
        ax = sns.pointplot(x="time_date", y="value",hue=test_data.hostname, data=test_data)
        plt.xticks(rotation='vertical')
        plt.show()
        plt.clf()
        try:
            c_nodes = df[df['hostname'].isin(cluster)]
        except:
            c_nodes = df[df['hostname'].str.contains(cluster)]
        c_nodes = c_nodes.groupby(['time_date','alarm_name','hostname'])['time'].count().reindex().reset_index().sort_values('time_date', ascending=True)
        c_nodes = c_nodes.groupby(['alarm_name','hostname'])['time'].sum().reindex().reset_index().sort_values('time', ascending=False)
        c_nodes = c_nodes[c_nodes.time > 1]
        display_side_by_side(c_nodes)
        i += 1

In [None]:
chart_cluster([('SCMSG005R', 'SCMSG003R', 'SCMSG002R')],data_perf)

In [None]:
chart_cluster([('SCFEM001R', 'SCFEM002R','SCFEM421R','SCFEM431R')],data_perf)

In [None]:
chart_cluster([('pedwdb1', 'pedwdb2')],data_perf)

In [None]:
chart_cluster([('SCCLD301')],data_perf)

In [None]:
chart_cluster([('DRFEM001R')],data_perf)

In [None]:
chart_cluster([('ptrepap1', 'ptrepap2')],data_perf)

In [None]:
def scatter_chart_new(df,group,num):
    trace1 = {
      "x": ["주의222222222222222222222", "경고", "심각"], 
      "y": ["316153", "476589", "121969"], 
      "mode": "markers", 
      "type": "scatter"
    }
    data = Data([trace1])
    layout = {
      "autosize": True, 
      "xaxis": {
        "autorange": True,
        "type": "category"
      }, 
      "yaxis": {
        "autorange": True, 
        "type": "linear"
      }
    }
    fig = Figure(data=data, layout=layout)
    plot_url = py.plot(fig)

In [None]:
df = data.groupby(['time','alarm_name'])['condition'].count().reindex().reset_index().pivot_table(values='condition', index='time', columns='alarm_name', aggfunc='first',fill_value=0).reset_index().describe()

In [None]:
import plotly.graph_objs as go
trace = go.Scatter(
    x = df.iloc[1].values,
    y = df.iloc[2].values,
    mode='markers+text',
    text= df.columns,
    textposition='top left'
)

plot = [trace]

# Plot and embed in ipython notebook!
py.iplot(plot)

In [None]:
import plotly.graph_objs as go
trace5 = go.Scatter(
    name="심각",
    x = df_5.iloc[1].values,
    y = df_5.iloc[2].values,
    mode='markers+text',
    text= df_5.columns,
    textposition='top left'
)
trace4 = go.Scatter(
    name="경고",
    x = df_4.iloc[1].values,
    y = df_4.iloc[2].values,
    mode='markers+text',
    text= df_4.columns,
    textposition='top left'
)
trace3 = go.Scatter(
    name="주의",
    x = df_3.iloc[1].values,
    y = df_3.iloc[2].values,
    mode='markers+text',
    text= df_3.columns,
    textposition='top left'
)
trace2 = go.Scatter(
    name="system",
    x = df_2.iloc[1].values,
    y = df_2.iloc[2].values,
    mode='markers+text',
    text= df_2.columns,
    textposition='top left'
)
trace = go.Scatter(
    name="alarm",
    x = df.iloc[1].values,
    y = df.iloc[2].values,
    mode='markers+text',
    text= df.columns,
    textposition='top left'
)

plot = [trace,trace2,trace3,trace4,trace5]

# Plot and embed in ipython notebook!
py.iplot(plot)

In [None]:
df = data.groupby(['time','alarm_name'])['condition'].count().reindex().reset_index().pivot_table(values='condition', index='time', columns='alarm_name', aggfunc='first',fill_value=0).reset_index().describe()
df_2 = data.groupby(['time','system_name'])['condition'].count().reset_index().reindex().pivot_table(values='condition', index='time', columns='system_name', aggfunc='first',fill_value=0).reset_index().describe()
df_3 = data[data.degree=="주의"].groupby(['time_date','time'])['condition'].count().reset_index().reindex().pivot_table(values='condition', index='time', columns='time_date', aggfunc='first',fill_value=0).reset_index().describe()
df_4 = data[data.degree=="경고"].groupby(['time_date','time'])['condition'].count().reset_index().reindex().pivot_table(values='condition', index='time', columns='time_date', aggfunc='first',fill_value=0).reset_index().describe()
df_5 = data[data.degree=="심각"].groupby(['time_date','time'])['condition'].count().reset_index().reindex().pivot_table(values='condition', index='time', columns='time_date', aggfunc='first',fill_value=0).reset_index().describe()

In [None]:
df_3 = data.groupby(['time_date_hour','host_alarm'])['condition'].count().reindex().reset_index().pivot_table(values='condition',index='time_date_hour',columns='host_alarm',aggfunc='first',fill_value=0).reset_index()

In [None]:
data['host_alarm'] = data.system_name+'_'+data.alarm_name

In [None]:
df_3 = data.groupby(['time_date','host_alarm'])['condition'].count().reindex().reset_index().pivot_table(values='condition',index='time_date',columns='host_alarm',aggfunc='first',fill_value=0).reset_index()

In [None]:
corrmat = df_3.corr()

In [None]:
corrmat

In [None]:
clusters = []
for node in corrmat.columns:
    clusters.append(corrmat[corrmat[node] >= 0.9].index)
clusters_new = []
for cluster in clusters:
    if len(cluster) > 1:
        clusters_new.append(list(cluster))
clusters_new = list(set([tuple(set(node)) for node in clusters_new]))

dictt = {}
for i in range(0,len(clusters_new)):
    dictt.update({'cluster{}'.format(i) : list(clusters_new[i])})

#data_df_date = data_df_date.reset_index()
#pd.concat([pd.Series(clusters_new[0],name='cluster0'),pd.Series(clusters_new[1],name='cluster1')], axis=1)
#for i in range(2,len(clusters_new)):
#    df = pd.concat([df,pd.Series(clusters_new[i],name='cluster{}'.format(i))], axis=1)
#df.fillna('',inplace=True)

In [None]:
len(clusters_new)

In [None]:
clusters_new

In [None]:
clusters_new[2]

In [None]:
def chart_cluster(clusters_new,df):
    i = 0
    for cluster in clusters_new:
        print('>>>>>>>>>cluster{}'.format(i))
        test_data = pd.melt(df_3, id_vars=['time_date'],value_vars=cluster)
        #display(test_data)
        fig = plt.figure(figsize=(15, 7))
        ax = sns.pointplot(x="time_date", y="value",hue=test_data.host_alarm, data=test_data)
        plt.xticks(rotation='vertical')
        plt.show()
        plt.clf()
        try:
            c_nodes = df[df['hostname'].isin(cluster)]
        except:
            c_nodes = df[df['hostname'].str.contains(cluster)]
        c_nodes = c_nodes.groupby(['time_date','alarm_name','hostname'])['time'].count().reindex().reset_index().sort_values('time_date', ascending=True)
        c_nodes = c_nodes.groupby(['alarm_name','hostname'])['time'].sum().reindex().reset_index().sort_values('time', ascending=False)
        c_nodes = c_nodes[c_nodes.time > 100]
        display_side_by_side(c_nodes)
        i += 1

In [None]:
chart_cluster([ ('VM_연동서식_에이전트 다운',
  'VM_연동서식_서버 상태',
  'Oracle 11g (192.168.1.5 : ORA9)_[DB]액티브 세션 수',
  'eaudit_10GB',
  'Oracle 11g (192.168.1.18 : gw8orcl)_[DB]Invalid Object 수',
  'VM_연동서식_CPU 사용률')],data)

In [None]:
data[data.system_name=='VM_연동서식']

In [None]:
data.groupby(['time_date'])['condition'].count().reindex().reset_index()

In [None]:
import statsmodels.api as sm
# multiplicative
res = sm.tsa.seasonal_decompose(data.groupby(['time_date'])['condition'].count().reindex().reset_index().condition.values,freq=40,model="multiplicative")
plt.figure(figsize=(30,12))
fig = res.plot()
fig.show()

In [None]:
len(data.groupby(['time_date'])['condition'].count().reindex().reset_index().condition.values)

In [None]:
data = sm.datasets.get_rdataset("deaths", "MASS")

df = data.data
df.tail()

In [None]:
def yearfraction2datetime(yearfraction, startyear=0):
    import datetime
    import dateutil
    year = int(yearfraction) + startyear
    month = int(round(12 * (yearfraction - year)))
    delta = dateutil.relativedelta.relativedelta(months=month)
    date = datetime.datetime(year, 1, 1) + delta
    return date

df["datetime"] = df.time.map(yearfraction2datetime)
df.tail()

In [None]:
df.shape

In [None]:
df.plot(x='datetime',y='value')

In [None]:
import statsmodels.api as sm
# multiplicative
res = sm.tsa.seasonal_decompose(df.value.values,freq=5,model="multiplicative")
plt.figure(figsize=(30,12))
fig = res.plot()
fig.show()