In [61]:
from IPython.display import clear_output

import panel as pn
import ipywidgets as widgets
import pandas as pd
import altair as alt
import pathlib
import datetime
import os

LOCATION = "Location"
TIMESTAMP = "Timestamp"
COUNT = "COUNT"
IP_ADDR = "Client IP"

#User Options
EVENTS = "Events"
TIME_PLOT = "Time plot"


prefix_file_path = os.path.join("Task 2", "536535_data", "536535_data")
events_file_path = os.path.join(prefix_file_path, "m365_user_sessions", "events.csv")
dir_path = pathlib.Path().resolve()

events_data: pd.DataFrame = pd.DataFrame(pd.read_csv(dir_path / events_file_path))

pn.extension('vega')

style = {'description_width': 'initial'}

top_n_widget = widgets.Dropdown(
    options=['10', '20', '30', '40', '50'],
    value='10',
    description='Top N stats',
    disabled=False,
    style = style,
)

data_type_widget = widgets.ToggleButtons(
    options=[EVENTS, TIME_PLOT],
    description='Projection type:',
    disabled=False,
    button_style='info', # 'success', 'info', 'warning', 'danger' or ''
    style = {'button_width':'200px'}, layout={'width': '520px'},
)

apply_changes_widget = widgets.Button(
    description='Apply changes',
    tooltip='Apply changes',
    disabled=False,
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    style = style,
)
period_widget = widgets.IntRangeSlider(
    min=0,
    max=168,
    value=[0, 168],
    step=1,
    description="Time range in hours (max 1 week from the first event)",
    disabled=False,
    style = style,
    layout={'width': '520px'}
)

table_widget = widgets.HTML(value=events_data.iloc[:0].to_html())

ui_widgets = [top_n_widget, data_type_widget, 
              period_widget, apply_changes_widget,
              ]

def display_widgets():
    for ui_widget in ui_widgets:
        display(ui_widget)

def get_first_time_flow_string():
    return events_data[TIMESTAMP].min()

def get_last_time_flow_string():
    return events_data[TIMESTAMP].max()

def get_first_time_flow():
    return datetime.datetime.fromisoformat(get_first_time_flow_string())

def get_last_time_flow():
    return datetime.datetime.fromisoformat(get_last_time_flow_string())

def to_datetime(x):
    return datetime.datetime.fromisoformat(x)

def process_by_top_n(current_data, top_n):
    if current_data.empty:
        return current_data
    return current_data.nlargest(top_n, columns=COUNT)

def process_by_time_range(current_data, time_period):
    if current_data.empty:
        return current_data
    
    start_offset, end_offset = time_period

    start = get_first_time_flow() + datetime.timedelta(hours=start_offset)
    end = get_first_time_flow() + datetime.timedelta(hours=end_offset)

    return current_data[(current_data[TIMESTAMP].apply(to_datetime) >= start) &
                      ((current_data[TIMESTAMP].apply(to_datetime)) <= end)]

def process_data_event(current_data: pd.DataFrame, top_n, time_period):
    processed_data = process_by_time_range(current_data, time_period)
    processed_data = current_data.groupby(IP_ADDR)[IP_ADDR].size().reset_index(name=COUNT)
    return process_by_top_n(processed_data, top_n)

def make_events_plot(current_data):
    brush = alt.selection_point(name="brush", fields=[IP_ADDR])
    plot = alt.Chart(current_data).mark_arc().encode(
        theta=alt.Theta(COUNT),
        color=alt.Color(IP_ADDR, sort=alt.EncodingSortField(COUNT, order="descending")),
        tooltip=[
                alt.Tooltip(f"{IP_ADDR}", title="IP address"),
                alt.Tooltip(f"{COUNT}:Q", title="IP events count"),
            ],
    ).properties(
        title=f"Distribution of {current_data[COUNT].sum()} recorded events by IP adress"
    ).add_params(brush)

    return alt.JupyterChart(plot)

def on_select_ip_addr(change):
    sel = change.new.value
    if sel is None or IP_ADDR not in sel[0]:
        filtered = events_data.iloc[:0]
    else:
    
        ip_addresses = [x[IP_ADDR] for x in sel]
        #I was not able to get NaN `Client IP address` out any other way 
        filtered = events_data[events_data[IP_ADDR].isnull()]  
        filtered = pd.merge(filtered, events_data, how='outer', indicator=True).query("_merge != 'both'").drop('_merge', axis=1).reset_index(drop=True)
        filtered = filtered[filtered[IP_ADDR].isin(ip_addresses)]
        filtered = process_by_time_range(filtered, period_widget.value)

    table_widget.value = filtered.to_html()

def on_change(v):
    clear_output(wait=True)
    top_n = int(top_n_widget.value)
    display_widgets()

    if data_type_widget.value == EVENTS:
        processed_data = process_data_event(events_data, top_n, period_widget.value)
    
    if processed_data.empty:
        print("Log: The data set is empty")
        return

    if (data_type_widget.value == EVENTS):
        plot: alt.JupyterChart = make_events_plot(processed_data)
        plot.selections.observe(on_select_ip_addr, ["brush"])
    
    display(widgets.VBox([plot, table_widget]))


display_widgets()
apply_changes_widget.on_click(on_change)

Dropdown(description='Top N stats', options=('10', '20', '30', '40', '50'), style=DescriptionStyle(description…

ToggleButtons(button_style='info', description='Projection type:', layout=Layout(width='520px'), options=('Eve…

IntRangeSlider(value=(0, 168), description='Time range in hours (max 1 week from the first event)', layout=Lay…

Button(button_style='success', description='Apply changes', style=ButtonStyle(), tooltip='Apply changes')

VBox(children=(JupyterChart(spec={'config': {'view': {'continuousWidth': 300, 'continuousHeight': 300}}, 'data…