In [None]:
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("./data/24-06-20 @ 900 AM to 930 AM - All DNS Activity (America_Phoenix).csv", dtype_backend="pyarrow")

df.columns

## Destination Value Counts (top 10)

In [None]:
df["Time"].max()

In [None]:
df["Time"].min()

In [None]:
df["Destination"].value_counts().head(30)

## Split Identities Col Func

In [None]:
def split_identities(identities: str) -> list:
    result_dict = {
        "Sites": None,
        "ADUsers": None,
        "Email": None,
        "ADComputers": None,
        "Networks": None
    }

    if identities is None:
        return result_dict

    result = identities.split(",")

    try:
        if len(result) == 1:
            if "Sites" in result:
                result_dict["Sites"] = result
            elif "@" in result:
                re_result = re.match(r"^([\w\s\-]+)\s\(([\w\-@.]+)\)", result)
                result_dict["ADUsers"] = re_result.group(1)
                result_dict["Email"] = re_result.group(2)
            elif ".org" in result:
                result_dict["ADComputers"] = result.replace(r".example.org", "")
            else:
                result_dict["Networks"] = result


        else:
            for r in result:
                if "Sites" in r:
                    result_dict["Sites"] = r
                elif "@" in r:
                    re_result = re.match(r"^([^\(]+)\s\(([\w\-@.]+)\)", r)
                    result_dict["ADUsers"] = re_result.group(1)
                    result_dict["Email"] = re_result.group(2)
                elif ".org" in r:
                    result_dict["ADComputers"] = r.replace(r".example.org", "")
                else:
                    result_dict["Networks"] = r

        return pd.Series(result_dict)


    except Exception as e:
        print(e)
        print(identities)




## Apply Split Func

In [None]:
df[["Sites", "ADUsers", "Email", "ADComputers", "Networks"]] = df["Identities"].apply(func=split_identities)

## Drop NA and split cols

In [None]:
df.dropna(how='all', axis=1, inplace=True)

In [None]:
df.drop(["Identities", "Identity Types"], axis=1, inplace=True)

## Top internal (source) offenders

In [None]:
df[["ADComputers", "Internal IP"]].value_counts().head(20).reset_index().to_csv(".csv",
                                                                                index=False)

## Plot graphic

In [None]:
fig = plt.figure(figsize=(20, 30))
ax = df[["ADComputers", "Internal IP"]].value_counts().head(50).plot(
    kind='barh',
    title="DNS Traffic by Device",
    x=['ADComputers', 'Internal IP'],
    y='counts',
    legend=True,
    fontsize=12)

ax.xaxis.set_ticks(np.arange(0, 600, 50))

fig.savefig(".png")