# Импортируем необходимые модули

In [None]:
import pandas as pd

from tqdm.auto import tqdm
from IPython.display import display

from msticpy.nbtools import *
from msticpy.sectools import *
from msticpy.data import QueryProvider

# Вспомогательные функции

In [None]:
def strip_na(df: pd.DataFrame) -> pd.DataFrame:
    return df[df.columns[~df.isnull().all()]]

# Загружаем данные

In [None]:
local_provider = QueryProvider("LocalData", data_paths=["dataset"])
local_provider.connect()
events = local_provider.exec_query("PHD2021_Threat_Research_Workshop_Host_Events.pkl")

In [None]:
events.shape

# Ищем сработку

## Мапим поля на нужные названия

In [None]:
events = events.astype(str)
events['TimeCreated'] = pd.to_datetime(events['time'])
events['Computer'] = events['event_src.host']
events['EventID'] = events['msgid'].apply(lambda x: int(x))
events['TimeGenerated'] = pd.to_datetime(events['time'])
events['NewProcessName'] = events['object.name']
events['NewProcessId'] = events['object.id']
events['ParentProcessName'] = events['datafield4']
events['ProcessId'] = events['datafield2']
events['SubjectLogonId'] = events['datafield1']
events['TargetLogonId'] = events['datafield7']
events['CommandLine'] = events['datafield5']
events['cmd'] = events['datafield5']
events['ParentCommandLine'] = events['datafield9']
events['SubjectUserName'] = events['subject.name']
events['SubjectUserSid'] = events['subject.id'] 
events['TenantId'] = ""

## Поиск подозрительного процесса

Среди всех событий ищем (по UUID) то, на которое отработал фильтр

> 00000006-09e5-0e8f-f000-0000338adaef

In [None]:
proc_herpaderping_events = events[events.uuid == ""]
displayed_columns = ["reason", "NewProcessName", "NewProcessId", "TimeCreated", "EventID"]
proc_herpaderping_events = strip_na(proc_herpaderping_events)[displayed_columns]
display(proc_herpaderping_events)

## Находим событие запуска подозрительного процесса среди событий Sysmon 1

In [None]:
susp_proc_pid = proc_herpaderping_events["NewProcessId"].values[0]
susp_proc_creation_event = events[(events.EventID == 1) & (events.NewProcessId == susp_proc_pid)]
displayed_columns = ["Computer", "SubjectLogonId", "NewProcessId", "NewProcessName", "ProcessId", "ParentProcessName", "TimeCreated", "EventID"]
susp_proc_creation_event = susp_proc_creation_event[susp_proc_creation_event.columns[~susp_proc_creation_event.isnull().all()]][displayed_columns]
display(susp_proc_creation_event)

## Отображаем дерево процессов к которому принадлежит найденный процесс

In [None]:
full_tree = ptree.build_process_tree(events, show_progress=True)
susp_proc_index = full_tree[full_tree.ProcessId == susp_proc_pid].index.values[0]
susp_proc_tree = ptree.get_root_tree(full_tree, susp_proc_index)

nbdisplay.plot_process_tree(data=susp_proc_tree, legend_col="SubjectUserName", show_table=False)

# Посмотрим статистику по деревьям процессов

In [None]:
# убираем ограничение на длину столбцов
pd.set_option('display.max_colwidth', None)

roots = ptree.get_roots(full_tree)
trees = []
for row_num, (ix, row) in enumerate(roots.iterrows()):
    tree = ptree.get_descendents(full_tree, row)
    trees.append({
        "Row": row_num,
        "PID": roots.loc[ix].NewProcessId,
        "RootProcess": roots.loc[ix].NewProcessName,
        "Index": ix,
        "TreeSize:": len(tree),
        "TreeDepth": ptree.get_tree_depth(tree),
        "LogonId": roots.loc[ix].SubjectLogonId
    })
    
pd.DataFrame(trees).sort_values('TreeDepth', ascending=False).head()

# Видим, что одно из деревьев значительно выделяется на фоне остальных

Визуализируем деревья процессов, чтобы детальнее понять что из себя представляет аномальное дерево.

Фильтруем по идентификатору сессии, в рамках которой были запуски

> 10781108

In [None]:
import networkx as nx

from bokeh.io import show
from bokeh.models import (Circle, Arrow, NormalHead, EdgesAndLinkedNodes, HoverTool, WheelZoomTool, ResetTool, SaveTool, PanTool,
                          MultiLine, NodesAndLinkedEdges, Plot, Range1d, Label, LabelSet, ColumnDataSource,)
from bokeh.palettes import Spectral4, YlGnBu4
from bokeh.plotting import from_networkx, figure

susp_processes = events[events['SubjectLogonId'] == '']

G = nx.DiGraph()
G = nx.from_pandas_edgelist(
    df=susp_processes,
    source="ProcessId",
    target="NewProcessId",
    edge_attr=["TimeCreated", "NewProcessName", "CommandLine", "NewProcessId", "ProcessId"],
    create_using=nx.DiGraph,
)


plot = Plot(plot_width=1000, plot_height=1000,
            x_range=Range1d(-1.1,1.1), y_range=Range1d(-1.1,1.1))
plot.title.text = "Processes graph"

plot.add_tools(
    HoverTool(tooltips=[("CommandLine", "@CommandLine"),("ParentPID", "@ParentProcessId"), ("PID", "@ProcessId")]), 
    WheelZoomTool(),
    ResetTool(),
    SaveTool(),PanTool())

graph_renderer = from_networkx(G, nx.circular_layout, scale=1, center=(0,0))

graph_renderer.node_renderer.glyph = Circle(size=15, fill_color=Spectral4[0])
graph_renderer.node_renderer.selection_glyph = Circle(size=15, fill_color=Spectral4[2])
graph_renderer.node_renderer.hover_glyph = Circle(size=15, fill_color=Spectral4[1])

graph_renderer.edge_renderer.glyph = MultiLine(line_color="#CCCCCC", line_alpha=0.8, line_width=5)
graph_renderer.edge_renderer.selection_glyph = MultiLine(line_color=Spectral4[2], line_width=5)
graph_renderer.edge_renderer.hover_glyph = MultiLine(line_color=Spectral4[1], line_width=5)

graph_renderer.selection_policy = NodesAndLinkedEdges()
graph_renderer.inspection_policy = EdgesAndLinkedNodes()

positions_df = pd.DataFrame(list(graph_renderer.layout_provider.graph_layout.items()), columns=["NewProcessId", "Positions"])
positions_df[['x', 'y']] = pd.DataFrame(positions_df['Positions'].tolist(), columns=['x', 'y'])
positions_df.set_index("NewProcessId", inplace=True)
positions_df.drop('Positions', axis='columns', inplace=True)

data_df = graph_renderer.edge_renderer.data_source.to_df()
data_df.set_index("NewProcessId", inplace=True)

image_df = pd.DataFrame(list(data_df.apply(lambda x: x.NewProcessName.split("\\")[-1], axis=1).to_dict().items()), columns=["NewProcessId", "ImageName"])
image_df.set_index('NewProcessId')
data_df = pd.merge(left=data_df, right=image_df, how='left', left_on=['NewProcessId'], right_on = ['NewProcessId'])

e = pd.merge(left=data_df, right=positions_df, how='left', left_on=['NewProcessId'], right_on = ['NewProcessId'])

ttt = pd.merge(left=data_df, right=positions_df, how='left', left_on=['start'], right_on = ['NewProcessId'])
ttt = pd.merge(left=ttt, right=positions_df, how='left', left_on=['end'], right_on = ['NewProcessId'])

for index, row in ttt.iterrows():
    plot.add_layout(Arrow(end=NormalHead(fill_color=YlGnBu4[1], size=10), line_color=YlGnBu4[1],
                   x_start=row['x_x'], y_start=row['y_x'], x_end=row['x_y'], y_end=row['y_y']))

plot.add_layout(LabelSet(x='x', y='y', text='ImageName',
              x_offset=5, y_offset=5, source=ColumnDataSource(e), render_mode='canvas'))

plot.renderers.append(graph_renderer)


In [None]:
show(plot)

## Теперь взглянем на картину в целом, объединив разные типы событий

In [None]:
share_access = (
        events[(events["EventID"].isin([5140, 5145]))]
        .rename(
            columns={
                "src.ip": "IpAddress",
                "src.port":"IpPort",
                "datafield4": "RelativeTargetName"
            }
        )
    )

In [None]:
logons = (
        events[events["EventID"].isin([4624, 4672])]
        .rename(
            columns={
                "TargetLogonId": "subject.id",
                "TargetUserName":"subject.name"
            }
        )
    )

In [None]:
processes_on_host = events[events.EventID == 1]

procs_and_logons_and_share = {
    "Processes" : {"data": processes_on_host, "source_columns": ["EventID", "NewProcessName", "SubjectUserName", "SubjectLogonId", "CommandLine", "ParentCommandLine"]},
    "Logons": {"data": logons, "source_columns": ["EventID", "SubjectUserName", "SubjectLogonId", "logon_type", "Computer"]},
    "ShareAccess":{"data": share_access, "source_columns": ["EventID", "SubjectUserName", "SubjectLogonId", "RelativeTargetName", "IpAddress", "IpPort"]}
}

nbdisplay.display_timeline(data=procs_and_logons_and_share,
                           title="Logons, Processes and Share access groupped by EventID",
                           legend="left", yaxis=False);

## Напоследок

Мы раскрутили всю цепочку действий на машине. Можно теперь просто отобразить в виджете весь граф процессов. 
Может что-то упустили.

In [None]:
full_tree = ptree.build_process_tree(events, show_progress=True)
nbdisplay.plot_process_tree(data=full_tree, legend_col="SubjectUserName", show_table=False)