## Logs Analytics Dashboard
This dashboard can be used for performing visual analytics on the server log files. You can interact with any chart in the dashboard, like so:

1. Click on a bar in any bar chart to filter the whole dataset by that bar's value
2. Click on a slice of any pie chart to filter the whole dataset by that pie slices value

Fun things to try:
1. Select any specific day in the `Daily Events` bar chart (for e.g. why do first and last days have fewer events?)
2. Find out which product queries resulted in server error (click on `Server Error` slice of `Events By Status` pie chart)


In [None]:
import numpy as np
import pandas as pd

import ipywidgets as widgets
import bqplot as bq
import bqplot.pyplot as plt

In [None]:
def get_status_code(x):
    """
    map integer http status to  string status code
    """
    if x >= 200 and x < 300:
        return "SUCCESS"
    elif x >= 300 and x < 400:
        return "REDIRECT"
    elif x >= 400 and x < 500:
        return "CLIENT ERROR"
    elif x >= 500:
        return "SERVER ERROR"

In [None]:
def get_events_by_hour(log_data_slice):
    """
    get event counts by hour
    """
    return (
        log_data_slice["status_code"]
        .groupby(lambda x: x.hour)
        .count()
        .reindex(np.arange(24))
        .fillna(0)
    )

In [None]:
# load data into pandas and do some munging
log_data = pd.read_csv("access.log", sep=" ", header=None)
log_data.drop([1, 2], axis=1, inplace=True)
log_data.columns = [
    "ip_address",
    "timestamp",
    "request",
    "status",
    "col1",
    "url",
    "agent",
    "col2",
]

log_data.index = pd.to_datetime(
    log_data["timestamp"].str.replace("[\[\]]", ""), format="%d/%b/%Y:%H:%M:%S"
)
log_data.drop("timestamp", axis=1, inplace=True)

# add extra columns for easy querying
log_data["status_code"] = log_data["status"].map(get_status_code)
log_data["category"] = log_data["request"].str.extract("categoryId=(.*)&", expand=False)
log_data["product"] = log_data["request"].str.extract("productId=(.*)&", expand=False)
log_data["hour"] = log_data.index.map(lambda x: x.hour)
log_data["day"] = log_data.index.strftime("%Y-%m-%d")

In [None]:
all_days = log_data["day"].value_counts().index
all_hours = np.arange(24)
all_categories = list(log_data["category"].value_counts().index)
all_products = list(log_data["product"].value_counts().index)
all_status_codes = list(log_data["status_code"].value_counts().index)

category_colors = dict(zip(all_categories, bq.CATEGORY10))
status_label_colors = dict(
    [("SUCCESS", "#006d2c"), ("CLIENT ERROR", "#fc8d59"), ("SERVER ERROR", "#a63603")]
)

In [None]:
# daily events bar chart
daily_events_fig = plt.figure(
    title="Daily Events",
    animation_duration=1000,
    layout=widgets.Layout(width="900px", height="500px"),
)

plt.scales(scales={"x": bq.DateScale()})

# intsel = BrushIntervalSelector(scale=, marks=[hist])
bar_axes_options = {"x": {"grid_lines": "none"}, "y": {"tick_format": ","}}
common_bar_options = dict(
    interactions={"click": "select"},
    selected_style={"stroke": "Red", "stroke-width": 4},
    axes_options=bar_axes_options,
)
daily_events_bar = plt.bar(
    pd.to_datetime(all_days),
    [],
    colors=["dodgerblue"],
    opacities=[0.8] * len(all_days),
    **common_bar_options
)

num_days = len(all_days)
filtered_daily_events_bar = plt.bar(
    pd.to_datetime(all_days),
    np.zeros(num_days),
    colors=["lightgreen"],
    opacities=[0.8] * num_days,
    axes_options=bar_axes_options,
)

# hourly events line chart
hourly_events_fig = plt.figure(
    title="Hourly Events",
    animation_duration=1000,
    layout=widgets.Layout(width="600px", height="500px"),
)

plt.scales(scales={"x": bq.OrdinalScale()})
hourly_events_bar = plt.bar(
    all_hours,
    [],
    colors=["goldenrod"],
    opacities=[0.8] * 24,
    padding=0.2,
    **common_bar_options
)

filtered_hourly_events_bar = plt.bar(
    all_hours,
    [],
    colors=["lightgreen"],
    opacities=[0.8] * 24,
    padding=0.2,
    axes_options=bar_axes_options,
)

products_fig = plt.figure(
    title="Events By Product",
    animation_duration=1000,
    fig_margin=dict(top=60, bottom=20, left=100, right=40),
    layout=widgets.Layout(width="550px", height="500px"),
)
products_bar = plt.bar(
    all_products,
    [],
    colors=["salmon"],
    orientation="horizontal",
    opacities=[0.8] * len(all_products),
    padding=0.2,
    **common_bar_options
)

common_pie_args = dict(
    display_labels="outside",
    interactions={"click": "select"},
    selected_style={"stroke": "white", "stroke-width": 3},
    inner_radius=80,
    apply_clip=False,
)
# categories pie chart
categories_fig = plt.figure(
    title="Events By Category",
    animation_duration=1000,
    layout=widgets.Layout(width="550px", height="500px"),
)
categories_pie = plt.pie([], labels=all_categories, **common_pie_args)

# status codes pie chart
status_codes_fig = plt.figure(
    title="Events By Status",
    animation_duration=1000,
    layout=widgets.Layout(width="550px", height="500px"),
)
status_codes_pie = plt.pie([], labels=all_status_codes, **common_pie_args)

# buttons for updating and resetting filters
update_btn = widgets.Button(description="Update", button_style="Success")
reset_btn = widgets.Button(description="Reset", button_style="Success")

In [None]:
fields = ["day", "hour", "product", "category", "status_code"]
field_vals = [all_days, all_hours, all_products, all_categories, all_status_codes]
plots = [
    daily_events_bar,
    hourly_events_bar,
    products_bar,
    categories_pie,
    status_codes_pie,
]

In [None]:
def generate_filters():
    filters_dict = {}
    for field, field_val, plot in zip(fields, field_vals, plots):
        if plot.selected:
            selected_vals = [field_val[i] for i in plot.selected]
            filters_dict[field] = selected_vals
    return filters_dict

In [None]:
def apply_filters(df, filters):
    filtered_df = df

    for k, v in filters.items():
        if v and len(v) > 0:
            filtered_df = filtered_df[filtered_df[k].isin(v)]
    return filtered_df

In [None]:
def update_plots(*args):
    global filters, filtered_log_data
    filters = generate_filters()
    filtered_log_data = apply_filters(log_data, filters)

    daily_events = (
        filtered_log_data.resample("D")
        .count()["status_code"]
        .reindex(pd.to_datetime(all_days))
        .fillna(0)
    )
    daily_events_bar.y = daily_events

    hourly_events_bar.y = get_events_by_hour(filtered_log_data)

    products_bar.y = (
        filtered_log_data["product"].value_counts().reindex(all_products).fillna(0)
    )

    events_by_category = filtered_log_data["category"].value_counts()

    with categories_pie.hold_sync():
        categories_pie.labels = list(events_by_category.index)
        categories_pie.sizes = events_by_category
        categories_pie.colors = [category_colors[d] for d in categories_pie.labels]

    events_by_status_code = filtered_log_data["status_code"].value_counts()

    with status_codes_pie.hold_sync():
        status_codes_pie.labels = list(events_by_status_code.index)
        status_codes_pie.sizes = events_by_status_code.values
        status_codes_pie.colors = [
            status_label_colors[d] for d in status_codes_pie.labels
        ]

In [None]:
def reset_filters(*args):
    for plot in plots:
        plot.selected = None
    update_plots(None)


update_btn.on_click(lambda btn: update_plots(None))
reset_btn.on_click(lambda btn: reset_filters())
btns_layout = widgets.VBox(
    [update_btn, reset_btn], layout=widgets.Layout(overflow_x="hidden")
)

In [None]:
update_plots(None)

parent_plots = widgets.HBox([daily_events_fig, hourly_events_fig])
child_plots = widgets.HBox([products_fig, categories_fig, status_codes_fig])
widgets.VBox([widgets.HBox([parent_plots, btns_layout]), child_plots])