In [1]:
from IPython.display import clear_output

import panel as pn
import ipywidgets as widgets
import pandas as pd
import altair as alt
import pathlib
import datetime
from vega_datasets import data
import os
import pycountry

TIMESTAMP_START = "First Downloaded At"
TIMESTAMP_END = "Last Downloaded At"
COUNT = "COUNT"
IP_ADDR = "Client IP"
FILE_SIZE = "File Size (MB)"
FILE_NAME = "Filename"
URL = "URL"
DOWNLOAD_COUNT = "Download Count"
APP = "Application"
FILE_EXT = "File Extension"
TOTAL_DATA_AMOUNT = "Total data amount"


#User Options
FILE_SIZE_DSC = "Downloaded data amount"
TIME_PLOT_DSC = "Time plot"


prefix_file_path = os.path.join("Task 2", "536535_data", "536535_data")
download_file_path = os.path.join(prefix_file_path, "m365_exfiltration", "single_user_download.csv")
dir_path = pathlib.Path().resolve()

download_data: pd.DataFrame = pd.DataFrame(pd.read_csv(dir_path / download_file_path))

pn.extension('vega')

style = {'description_width': 'initial'}

top_n_widget = widgets.Dropdown(
    options=['10', '20', '30', '40', '50'],
    value='10',
    description='Top N stats',
    disabled=False,
    style = style,
)

data_type_widget = widgets.ToggleButtons(
    options=[FILE_SIZE_DSC, TIME_PLOT_DSC],
    description='Projection type:',
    disabled=False,
    button_style='info', # 'success', 'info', 'warning', 'danger' or ''
    style = {'button_width':'200px'}, layout={'width': '520px'},
)

apply_changes_widget = widgets.Button(
    description='Apply changes',
    tooltip='Apply changes',
    disabled=False,
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    style = style,
)
period_widget = widgets.IntRangeSlider(
    min=0,
    max=168,
    value=[0, 168],
    step=1,
    description="Time range in hours (max 1 week from the first event)",
    disabled=False,
    style = style,
    layout={'width': '520px'}
)

table_widget = widgets.HTML(value=download_data.iloc[:0].to_html())

ui_widgets = [top_n_widget, data_type_widget, 
              period_widget, apply_changes_widget,
              ]

def display_widgets():
    for ui_widget in ui_widgets:
        display(ui_widget)

def get_first_time_flow_string():
    return download_data[TIMESTAMP_START].min()

def get_last_time_flow_string():
    return download_data[TIMESTAMP_END].max()

def get_first_time_flow():
    return datetime.datetime.fromisoformat(get_first_time_flow_string())

def get_last_time_flow():
    return datetime.datetime.fromisoformat(get_last_time_flow_string())

def to_datetime(x):
    return datetime.datetime.fromisoformat(x)

def process_by_top_n(current_data, top_n):
    if current_data.empty:
        return current_data
    return current_data.nlargest(top_n, columns=FILE_SIZE)

def process_by_time_range(current_data, time_period):
    if current_data.empty:
        return current_data
    
    start_offset, end_offset = time_period

    start = get_first_time_flow() + datetime.timedelta(hours=start_offset)
    end = get_first_time_flow() + datetime.timedelta(hours=end_offset)

    return current_data[(current_data[TIMESTAMP_START].apply(to_datetime) >= start) &
                      ((current_data[TIMESTAMP_END].apply(to_datetime)) <= end)]

def process_data_download(current_data: pd.DataFrame, top_n, time_period):
    processed_data = process_by_time_range(current_data, time_period)
    processed_data = processed_data.groupby(FILE_NAME, as_index=False).aggregate({FILE_SIZE: 'sum', DOWNLOAD_COUNT: 'sum', 
                                                                                  FILE_EXT: 'first', APP: 'first'})
    processed_data[TOTAL_DATA_AMOUNT] = processed_data[FILE_SIZE] * processed_data[DOWNLOAD_COUNT]
    return process_by_top_n(processed_data, top_n)

def on_select_download_file(change):
    sel = change.new.value
    if sel is None or FILE_NAME not in sel:
        filtered = download_data.iloc[:0]
    else:
        filtered = download_data[download_data[FILE_NAME].isin(sel[FILE_NAME])]
        filtered = process_by_time_range(filtered, period_widget.value)

    table_widget.value = filtered.to_html()

def make_download_plot(current_data):
    brush = alt.selection_interval(name="brush")
    plot = alt.Chart(current_data).mark_bar().encode(
        x=alt.X(TOTAL_DATA_AMOUNT),
        y=alt.Y(FILE_NAME, sort='-x'),
        color=alt.Color(TOTAL_DATA_AMOUNT,
                   scale=alt.Scale(range=['lightgreen', 'green'])),
         tooltip=[
                 alt.Tooltip(f"{FILE_NAME}", title="File name"),
                 alt.Tooltip(f"{FILE_EXT}", title="File extension"),
                 alt.Tooltip(f"{APP}", title="Downloaded via"),
                 alt.Tooltip(f"{FILE_SIZE}:Q", title="File size (MB)"),
                 alt.Tooltip(f"{DOWNLOAD_COUNT}:Q", title="Download count"),
                 alt.Tooltip(f"{TOTAL_DATA_AMOUNT}:Q", title="Total data amount (MB)"),
             ],
    ).properties(
        title=f"File size x Download count",    
        width=800, 
        height=500,
    ).add_params(brush)

    return alt.JupyterChart(plot)
    
    
def on_change(v):
    clear_output(wait=True)
    top_n = int(top_n_widget.value)
    display_widgets()

    if data_type_widget.value == FILE_SIZE_DSC:
        processed_data = process_data_download(download_data, top_n, period_widget.value)
    
    if processed_data.empty:
        print("Log: The data set is empty")
        return
    
    if (data_type_widget.value == FILE_SIZE_DSC):
        plot: alt.JupyterChart = make_download_plot(processed_data)
        plot.selections.observe(on_select_download_file, ["brush"])

    display(widgets.VBox([plot, table_widget]))

display_widgets()
apply_changes_widget.on_click(on_change)

Dropdown(description='Top N stats', options=('10', '20', '30', '40', '50'), style=DescriptionStyle(description…

ToggleButtons(button_style='info', description='Projection type:', layout=Layout(width='520px'), options=('Dow…

IntRangeSlider(value=(0, 168), description='Time range in hours (max 1 week from the first event)', layout=Lay…

Button(button_style='success', description='Apply changes', style=ButtonStyle(), tooltip='Apply changes')

VBox(children=(JupyterChart(spec={'config': {'view': {'continuousWidth': 300, 'continuousHeight': 300}}, 'data…