In [1]:
import numpy as np
import pandas as pd
import scipy as sp
import bokeh as bk

In [2]:
from bokeh.plotting import figure 
from bokeh.io import output_notebook, show

In [3]:
from collections import Counter

In [4]:
output_notebook()

In [5]:
x = np.linspace(-6, 6, 100)
y = np.cos(x)

In [6]:
p = figure(width=500, height=500, title="Test")
p.circle(x, y, size=7, color="firebrick", alpha=0.5)
show(p)

In [7]:
raw_df = pd.read_csv("../csv/sampledata2.csv")

In [8]:
raw_df.shape

(122, 18)

In [9]:
raw_df.columns = raw_df.columns.str.replace(' ', '_')

In [10]:
raw_df.columns

Index(['Severity', 'Customer_Record_ID', 'Account_Name', 'Case_Number',
       'Status', 'Date/Time_Opened', 'Date/Time_Closed', 'Resolution_Time',
       'Ambari_Version', 'Stack_Version', 'Product_Component',
       'Apache_BugID_URL', 'Hortonworks_BugID_URL', 'HOTFIX_BugID_URL',
       'EAR_URL', 'Root_Cause', 'Enhancement_Request_Number', 'Environment'],
      dtype='object')

In [11]:
df = raw_df[['Severity', 'Status', 'Date/Time_Opened', 'Date/Time_Closed', 'Resolution_Time', 'Ambari_Version', 'Stack_Version', 'Product_Component', 'Root_Cause']]

In [12]:
df

Unnamed: 0,Severity,Status,Date/Time_Opened,Date/Time_Closed,Resolution_Time,Ambari_Version,Stack_Version,Product_Component,Root_Cause
0,S1 - Production Down,Closed,20170101 020003.000,20170101 020744.000,0 days 0 hours8 mn,1.4.4,2.0.6,Hbase,Duplicate Case
1,S1 - Production Down,Closed,20170101 020004.000,20170101 094851.000,0 days 7 hours15 mn,1.4.4,2.0.6,Hbase,Customer Environment (Non HDP)
2,S1 - Production Down,Closed,20170101 074012.000,20170103 061541.000,1 days 22 hours35 mn,1.7.0,2.2.4,Hive,Customer Environment (Non HDP)
3,S1 - Production Down,Closed,20170102 150813.000,20170102 194057.000,0 days 4 hours33 mn,2.2.0,2.3.2,HDFS,Education - Configuration
4,S1 - Production Down,Closed,20170102 180708.000,20170106 065125.000,3 days 0 hours11 mn,2.4.1.0,2.4.0,Hbase,Data Issue
5,S1 - Production Down,Closed,20170103 071607.000,20170103 073309.000,0 days 0 hours17 mn,2.4.2.0,2.4.2,HDFS,Education - Configuration
6,S1 - Production Down,Closed,20170103 091214.000,20170103 102540.000,0 days 1 hours13 mn,2.4.2.0,2.4.2,Solr (HDFS),Education - Configuration
7,S1 - Production Down,Closed,20170103 202933.000,20170103 225953.000,0 days 2 hours30 mn,2.4.2.0,2.5.3,Oozie,Education - Configuration
8,S1 - Production Down,Closed,20170104 002956.000,20170105 020358.000,0 days 2 hours16 mn,2.2.0,2.3.4,Hive,Customer Environment (Non HDP)
9,S1 - Production Down,Closed,20170104 060327.000,20170104 070857.000,0 days 1 hours4 mn,2.4.2.0,2.5.3,Hive,Education - Non Configuration


In [13]:
values = df['Product_Component'].value_counts().keys().tolist()
counts = df['Product_Component'].value_counts().tolist()

In [14]:
print(values)

['HDFS', 'Ambari', 'Hive', 'Hbase', 'YARN', 'Zookeeper', 'Ranger', 'NiFI', 'Kafka', 'Cloudbreak', 'Solr (HDFS)', 'Phoenix', 'Map Reduce', 'Oozie', 'Spark', 'Accumulo', 'Ambari (HDF)']


In [15]:
print(counts)

[28, 25, 18, 16, 9, 6, 4, 3, 3, 2, 2, 1, 1, 1, 1, 1, 1]


In [16]:
len(values)

17

In [17]:
len(counts)

17

In [24]:
x = zip(values, counts)

In [25]:
x = dict(x)

In [26]:
x

{'Accumulo': 1,
 'Ambari': 25,
 'Ambari (HDF)': 1,
 'Cloudbreak': 2,
 'HDFS': 28,
 'Hbase': 16,
 'Hive': 18,
 'Kafka': 3,
 'Map Reduce': 1,
 'NiFI': 3,
 'Oozie': 1,
 'Phoenix': 1,
 'Ranger': 4,
 'Solr (HDFS)': 2,
 'Spark': 1,
 'YARN': 9,
 'Zookeeper': 6}

In [52]:
from math import pi
from bokeh.io import output_file, show
from bokeh.palettes import Category20c
from bokeh.plotting import figure
from bokeh.transform import cumsum
from bokeh.models import Label, LabelSet

In [54]:
data = pd.DataFrame.from_dict(dict(x), orient='index').reset_index().rename(index=str, columns={0:'value', 'index':'Component'})
data['angle'] = data['value']/sum(x.values()) * 2*pi
data['color'] = Category20c[len(x)]

p = figure(plot_height=750, plot_width=900, title="Pie Chart", toolbar_location=None,
           tools="hover", tooltips="@Component: @value")

p.wedge(x=0, y=1, radius=0.65,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend='Component', source=data)

p.axis.axis_label=None
p.axis.visible=False
p.grid.grid_line_color = None

labels = LabelSet(x = 10, y = 10, text="Component", text_align='center')
p.add_layout(labels)

show(p)

In [45]:
show(p)