# Using Univariate Outlier Detection Techniques to find C2 Beacons


Over the last week or so I've been working through simple univariate anomaly detection code projects from the book 'Finding Ghosts in Your Data: Anomaly Detection Techniques with Examples in Python'. To this point the book has introduced conceptual ideas that help find outliers in datasets by using single features, hence 'univariate'. 

Seeing these things work with test data helps solidify the understanding of whats going on behind the scenes, but applying it to domain specific problems is the ultimate goal. A commomn usecase for outlier detection is finding interesting connections in a sea of of firewall or netflow logs that says this flow of data is unlike the others based on any number of continuous data points that typical are contained in these kind of logs. I.E. the number of flows, the number of bytes transferred between the two hosts, the intervals of connections, the duration of the connections etc.

Coincidentally I recently came across a set of threat hunting/ beacon detection challenges provided by Active Counter Measures. This would be a good place to apply real world data to the outlier detection and see if it actually works.

---------

[**Challenge 1 - AsyncRat**](https://www.activecountermeasures.com/malware-of-the-day-asyncrat/)  

Detect the presence of a C2 beacon from a 24 hour collection of Zeek logs that contain post-exploitation communication traffic. The link above describes the lab setup including the C2 address and the beacon characteristics as:

- C2 Server: 172.208.51.75  
- Beacon Timing: 6.5s  
- Jitter: +/- 1.5s

**Lets give it a go**

In [1]:
import pandas as pd
import numpy as np
from IPython.display import display, Markdown, HTML
from datetime import datetime
import pandas as pd
import numpy as np
from statsmodels import robust


In [2]:
## Helper dictionary to help properly assign column names based on the type of Zeek logs being analyzed

zeek_cols = {'capture_loss': ['ts', 'ts_delta', 'peer', 'gaps', 'acks', 'percent_lost'],
 'conn': ['ts', 'uid', 'orig_h', 'orig_p', 'resp_h',
        'resp_p', 'proto', 'service', 'duration', 'orig_bytes', 'resp_bytes',
        'conn_state', 'local_orig', 'local_resp', 'missed_bytes', 'history',
        'orig_pkts', 'orig_ip_bytes', 'resp_pkts', 'resp_ip_bytes',
        'tunnel_parents'],
 'dhcp': ['ts', 'uids', 'client_addr', 'server_addr', 'mac',
        'host_name', 'client_fqdn', 'domain', 'requested_addr', 'assigned_addr',
        'lease_time', 'client_message', 'server_message', 'msg_types',
        'duration'],
 'dns': ['ts', 'uid', 'id.orig_h', 'id.orig_p', 'id.resp_h',
        'id.resp_p', 'proto', 'trans_id', 'rtt', 'query', 'qclass',
        'qclass_name', 'qtype', 'qtype_name', 'rcode', 'rcode_name', 'AA', 'TC',
        'RD', 'RA', 'Z', 'answers', 'TTLs', 'rejected'],
 'files': ['ts', 'fuid', 'tx_hosts', 'rx_hosts', 'conn_uids', 'source',
        'depth', 'analyzers', 'mime_type', 'filename', 'duration', 'local_orig',
        'is_orig', 'seen_bytes', 'total_bytes', 'missing_bytes',
        'overflow_bytes', 'timedout', 'parent_fuid', 'md5', 'sha1', 'sha256',
        'extracted', 'extracted_cutoff', 'extracted_size'],
 'http': ['ts', 'uid', 'id.orig_h', 'id.orig_p', 'id.resp_h',
        'id.resp_p', 'trans_depth', 'method', 'host', 'uri', 'referrer',
        'version', 'user_agent', 'origin', 'request_body_len',
        'response_body_len', 'status_code', 'status_msg', 'info_code',
        'info_msg', 'tags', 'username', 'password', 'proxied', 'orig_fuids',
        'orig_filenames', 'orig_mime_types', 'resp_fuids', 'resp_filenames',
        'resp_mime_types'],
 'notice': ['ts', 'uid', 'id.orig_h', 'id.orig_p', 'id.resp_h',
        'id.resp_p', 'fuid', 'file_mime_type', 'file_desc', 'proto', 'note',
        'msg', 'sub', 'src', 'dst', 'p', 'n', 'peer_descr', 'actions',
        'email_dest', 'suppress_for', 'remote_location.country_code',
        'remote_location.region', 'remote_location.city',
        'remote_location.latitude', 'remote_location.longitude'],
 'ntp': ['ts', 'uid', 'id.orig_h', 'id.orig_p', 'id.resp_h',
        'id.resp_p', 'version', 'mode', 'stratum', 'poll', 'precision',
              'xmt_time', 'num_exts'],
 'ssl': ['ts', 'uid', 'id.orig_h', 'id.orig_p', 'id.resp_h',
        'id.resp_p', 'version', 'cipher', 'curve', 'server_name', 'resumed',
        'last_alert', 'next_protocol', 'established', 'cert_chain_fuids',
        'client_cert_chain_fuids', 'subject', 'issuer', 'client_subject',
        'client_issuer', 'validation_status'],
 'stats': ['ts', 'peer', 'mem', 'pkts_proc', 'bytes_recv',
        'pkts_dropped', 'pkts_link', 'pkt_lag', 'events_proc', 'events_queued',
        'active_tcp_conns', 'active_udp_conns', 'active_icmp_conns',
        'tcp_conns', 'udp_conns', 'icmp_conns', 'timers', 'active_timers',
        'files', 'active_files', 'dns_requests', 'active_dns_requests',
        'reassem_tcp_size', 'reassem_file_size', 'reassem_frag_size',
        'reassem_unknown_size'],
 'weird': ['ts', 'uid', 'id.orig_h', 'id.orig_p', 'id.resp_h',
        'id.resp_p', 'name', 'addl', 'notice', 'peer', 'source'],
 'x509': ['ts', 'id', 'certificate.version', 'certificate.serial',
        'certificate.subject', 'certificate.issuer',
        'certificate.not_valid_before', 'certificate.not_valid_after',
        'certificate.key_alg', 'certificate.sig_alg', 'certificate.key_type',
        'certificate.key_length', 'certificate.exponent', 'certificate.curve',
        'san.dns', 'san.uri', 'san.email', 'san.ip', 'basic_constraints.ca',
        'basic_constraints.path_len']
        }

In [3]:
# Load the conn log file, the first 8 rows contain file metadata
# so we skip those rows on the initial load

df = pd.read_csv("async_infection_zeek_logs/conn.log", sep="\t", skiprows=8, names=zeek_cols["conn"], low_memory=False)

In [4]:
## Checking for desired schema
df.shape, display(HTML(df.head().to_html(index=False) ))

ts,uid,orig_h,orig_p,resp_h,resp_p,proto,service,duration,orig_bytes,resp_bytes,conn_state,local_orig,local_resp,missed_bytes,history,orig_pkts,orig_ip_bytes,resp_pkts,resp_ip_bytes,tunnel_parents
1714219472.127111,CD2Swb3CglD8mdmAX,192.168.100.152,50110.0,204.79.197.239,443.0,tcp,-,-,-,-,RSTRH,-,-,0.0,^r,0.0,0.0,1.0,40.0,-
1714219472.978589,C3YUyT31BXg7DV30fj,192.168.100.152,50080.0,52.123.251.169,443.0,tcp,-,-,-,-,RSTRH,-,-,0.0,^r,0.0,0.0,1.0,40.0,-
1714219473.206099,CnVQMw2rQ7TU9dw92l,192.168.100.152,50102.0,204.79.197.219,443.0,tcp,-,-,-,-,RSTRH,-,-,0.0,^r,0.0,0.0,1.0,40.0,-
1714219475.003535,Co8goqBndOQb4OaDb,192.168.100.152,50096.0,204.79.197.203,443.0,tcp,-,-,-,-,RSTRH,-,-,0.0,^r,0.0,0.0,1.0,40.0,-
1714219475.469351,Ci0Z78LEBLMRD2Bvb,192.168.100.152,50091.0,204.79.197.237,443.0,tcp,-,-,-,-,RSTRH,-,-,0.0,^r,0.0,0.0,1.0,40.0,-


((35949, 21), None)

In [5]:
## Zeek logs end with a close signature, which does not contain
# useful data, we need to get rid of it

df.tail(2)

Unnamed: 0,ts,uid,orig_h,orig_p,resp_h,resp_p,proto,service,duration,orig_bytes,...,conn_state,local_orig,local_resp,missed_bytes,history,orig_pkts,orig_ip_bytes,resp_pkts,resp_ip_bytes,tunnel_parents
35947,1714219639.151060,CM8pCB4HbWHhT7qlKc,192.168.100.136,49808.0,52.226.139.180,443.0,tcp,-,86160.385716,6001.0,...,OTH,-,-,0.0,DaTdATT,472.0,25243.0,420.0,26700.0,-
35948,#close,2024-05-08-19-16-08,,,,,,,,,...,,,,,,,,,,


In [6]:
# Drop the last row using index based on slicing

df = df.iloc[:-1]

In [7]:
# Confirm the last row of the dataframe is clean data

df.tail(2)

Unnamed: 0,ts,uid,orig_h,orig_p,resp_h,resp_p,proto,service,duration,orig_bytes,...,conn_state,local_orig,local_resp,missed_bytes,history,orig_pkts,orig_ip_bytes,resp_pkts,resp_ip_bytes,tunnel_parents
35946,1714219603.255488,C8bzgl4ynMAiTuzUOi,192.168.100.152,49695.0,52.226.139.185,443.0,tcp,-,86243.96207,5649,...,OTH,-,-,0.0,DTadATT,471.0,24853.0,417.0,26223.0,-
35947,1714219639.15106,CM8pCB4HbWHhT7qlKc,192.168.100.136,49808.0,52.226.139.180,443.0,tcp,-,86160.385716,6001,...,OTH,-,-,0.0,DaTdATT,472.0,25243.0,420.0,26700.0,-


In [8]:
# Not necesarily needed but I like to see human read timestamps when working
# We'll convert the timestamps to datetime objects
# insert the objects into the first column of the dataframe
# and give the column a name of Time 

converted_timestamps = [datetime.fromtimestamp(ts) for ts in df.ts.astype(float)]
df.insert(0, "Time", converted_timestamps)
display(HTML( df.head(2).to_html(index=False) ))

Time,ts,uid,orig_h,orig_p,resp_h,resp_p,proto,service,duration,orig_bytes,resp_bytes,conn_state,local_orig,local_resp,missed_bytes,history,orig_pkts,orig_ip_bytes,resp_pkts,resp_ip_bytes,tunnel_parents
2024-04-27 07:04:32.127111,1714219472.127111,CD2Swb3CglD8mdmAX,192.168.100.152,50110.0,204.79.197.239,443.0,tcp,-,-,-,-,RSTRH,-,-,0.0,^r,0.0,0.0,1.0,40.0,-
2024-04-27 07:04:32.978589,1714219472.978589,C3YUyT31BXg7DV30fj,192.168.100.152,50080.0,52.123.251.169,443.0,tcp,-,-,-,-,RSTRH,-,-,0.0,^r,0.0,0.0,1.0,40.0,-


#
We have our zeek logs loaded and properly setup for analysis. Since we want to focus our analysis on communication between individual src and dest IPs we can perform some aggregation using groupby to create a dataframe of unique src, dest, dest port combinations, then using these unique values perform some summarization operations 
#

In [9]:
# We need a method to calculate the connection intervals
# between hosts

def make_time_interval(time_col):
    "Take a list of time columns, sort and return the intervals"
    time_col.sort(reverse=True)
    intervals = []
    for i in range(0, len(time_col)-1):
        if i < len(time_col):
            change = time_col[i] - time_col[i+1]
            seconds_change = change.total_seconds()
            intervals.append(abs(seconds_change))
    return intervals

In [10]:
# Grabbing unique source and dest pairs
reduced = df[["orig_h", "resp_h", "resp_p"]].value_counts().reset_index(name="pair_counts").query("pair_counts > 1")

In [11]:
# number of unique pairs
len(df), len(reduced), display(HTML(reduced.head(2).to_html(index=False) ))

orig_h,resp_h,resp_p,pair_counts
192.168.100.136,172.208.51.75,7707.0,13281
192.168.100.136,192.168.100.150,7680.0,994


(35948, 359, None)

In [12]:
# Perform log summarization for the unique src/dest pairs 
# Aggregate log statistics, create a dictionary for
# each pair and create a new dataframe after processing


meta_list = []
for pair in reduced.itertuples():
    tmp_df = df[(df.orig_h == pair.orig_h) & (df.resp_h == pair.resp_h)]
    times = tmp_df["Time"].tolist()
    intervals = make_time_interval(times)
    count = len(intervals)
    sum_bytes_in = tmp_df.resp_ip_bytes.sum()
    sum_bytes_out = tmp_df.orig_ip_bytes.sum()
    
    meta = {
        "src" : pair.orig_h,
        "dest" : pair.resp_h,
        "dest_port": pair.resp_p,
        "interval_average" : np.mean(intervals),
        "interval_std" : np.std(intervals),
        "count" : len(intervals) + 1,
        "bytes_out" : sum_bytes_out,
        "bytes_in" : sum_bytes_in
    }
    meta_list.append(meta)

new = pd.DataFrame(meta_list)

In [13]:
# Additional aggregate operations to transform bytes to MB 
# and calculate per session byte averages from in and out 
# perspective

new["bytes_diff"] = (new.bytes_out - new.bytes_in) / 1024 / 1024
new["out_bytes_per_session"] = new.bytes_out / new["count"]
new["in_bytes_per_session"] = new.bytes_in / new["count"]

## Quick analysis based on the most frequently occuring connections

In [14]:

# Sort the dataframe looking at the most src/dest pairs with the most connections
display(HTML(new.sort_values(by="count", ascending=False).head().to_html(index=False)))

src,dest,dest_port,interval_average,interval_std,count,bytes_out,bytes_in,bytes_diff,out_bytes_per_session,in_bytes_per_session
192.168.100.136,172.208.51.75,7707.0,6.50068,0.868396,13281,19789913.0,17996565.0,1.71027,1490.092086,1355.060989
192.168.100.136,192.168.100.2,137.0,50.376894,45.577729,1715,754165.0,126635.0,0.598459,439.746356,73.83965
192.168.100.136,192.168.100.2,53.0,50.376894,45.577729,1715,754165.0,126635.0,0.598459,439.746356,73.83965
192.168.100.136,192.168.100.2,3.0,50.376894,45.577729,1715,754165.0,126635.0,0.598459,439.746356,73.83965
192.168.100.136,192.168.100.150,7680.0,74.571572,171.628866,1151,334413.0,238878.0,0.091109,290.541268,207.539531


In [15]:
new["count"].skew(), new["count"].kurtosis(), 

(np.float64(15.759546965548177), np.float64(277.2234106091422))

# 

We can see that the top src/dest pair has 13.2k sightings, well over the next most frequent connection. This by all accounts stands out as an outlier within this dataset. In addition to th extreme high frequency count, looking at the observed characteristics for this pair we see an unusual destination port (7777), a pretty short average interval (6.5). These would all lead me to dig into this set of connections a little more, maybe now is time to perform IP reputation look ups, see what the interwebs say about TCP port 7777. If available, I would pivot to a tool with endpoint visibility to see if I can find the responsible process that spawned the connection and so on.

Of course we know this connection is in fact the C2 beacon. We even see the summarization methodology worked well because as the C2 configuration information described the beacon is set to run once every 6.5 seconds +/- 1.5 seconds of jitter and our summarization technique above calculates the average interval of 6.5 with a standard deviation of 0.87. 

#

## Bringing in the functions created during my time learning univariate anomaly detection 
The functions below can be applied to datasets to find potential anomalies in datasets based on a single feature from the data

In [16]:
def check_stat(val:float,
               midpoint:float,
               distance:int,
               n:int):
    """
    Check if a given value is within a given range of a 
    midpoint value and a number of increments. If the value is within 
    this range return a percentage else return 1.0 indicating the value 
    is an statistical outlier
    """
    if (abs(val - midpoint) < (n * distance)):
        return abs(val-midpoint) / (n * distance)
    return 1.0

def check_sd(val:float,
             mean:float,
             sd:float,
             min_num_sd:int):
     """
     Check if a given value is a specified number of 
     standard deviations away from the mean
     """
     return check_stat(val, mean, sd, min_num_sd)

def check_mad(val:float,
              median:float,
              mad:float,
              min_num_mad:int):
    """
    Check if a given value is with the range of
    the median absolute value and a specific length or distance
    If the value is within the range return a percentage, else
    return 1.o indicating it is an outlier
    """
    return check_stat(val, median, mad, min_num_mad)

def check_iqr(val:float,
              median:float,
              p25:float,
              p75:float,
              iqr:float,
              min_iqr_diff:float):
    """
    Check if on which side of the median a value exists
    If below the median checks if the value is min_iqr_diff times below the p25 IQR
    if above checks if the value min_iqr_diff times above the p75.
    if the value passes those checks return 1.0 to suggest the value
    is an outlier
    """
    if val < median:
        if val > p25:
             return 0.0
        elif (p25 - val) < (min_iqr_diff * iqr):
             return abs(p25 - val) / (min_iqr_diff * iqr)
        else:
            return 1.0
    else:
        if val < p75:
            return 0.0
        elif (val - p75) < (min_iqr_diff * iqr):
            return abs(val - p75) / (min_iqr_diff * iqr)
        else:
            return 1.0
        

def run_tests(dataframe):
    """
    Pandas dataframe containing univariate data to perform 
    anomaly detection against
    """
    mean = dataframe.value.mean()
    sd = dataframe.value.std(0)
    p25 = np.quantile(dataframe.value, 0.25)
    p75 = np.quantile(dataframe.value, 0.75)
    iqr = p75 - p25
    median = dataframe.value.median()
    mad = robust.mad(dataframe.value)
    calculations = {
        "mean": mean, "sd": sd, "p25": p25,
        "p75": p75, "iqr": iqr, "median": median,
        "mad":mad
    }
    dataframe["sds"] = [check_sd(val, mean, sd, 3.0) for val in dataframe.value]
    dataframe["mads"] = [check_mad(val, median, mad, 3.0) for val in dataframe.value]
    dataframe["iqrs"] = [check_iqr(val, median, p25, p75, iqr, 1.5) for val in dataframe.value]
    
    return (dataframe, calculations)
    
def score_results(
        dataframe,
        weights
):
    """
    Take a dataframe and dictionary of weights
    """
    return dataframe.assign(anomaly_score=(
        dataframe["sds"] * weights["sds"] + 
        dataframe["iqrs"] * weights["iqrs"] +
        dataframe["mads"] * weights["mads"]
    ))

def determine_outliers(
        dataframe,
        sensitivity_score,
        max_fractional_anomalies
):
    sensitivity_score = (100 -  sensitivity_score) / 100.0
    max_fractional_anomaly_score = np.quantile(dataframe.anomaly_score,
                                           1.0 - max_fractional_anomalies)
    if max_fractional_anomaly_score > sensitivity_score and max_fractional_anomalies < 1.0:
        sensitivity_score = max_fractional_anomaly_score
        
    return dataframe.assign(
        is_anomaly=(dataframe.anomaly_score > sensitivity_score)
        )
    

def detect_univariate_statistical(
        dataframe,
        sensitivity_score,
        max_fractional_anomalies
):
    weights = {
        "sds": 0.25,
        "iqrs": 0.35,
        "mads": 0.45
    }
    value_counts = len(dataframe.value)
    if value_counts < 3:
        return (dataframe.assign(is_anomaly=False, anomaly_score=0.0), weights, "Must have minimum of 3 items for anomaly detection")
    elif (max_fractional_anomalies <= 0.0 or max_fractional_anomalies > 1.0):
        return (dataframe.assign(is_anomaly=False, anomaly_score=0.0), weights, "Must have valid max fraction of anomalies, 0 < x <= 1.0")
    elif (sensitivity_score <= 0 or sensitivity_score > 100):
        return (dataframe.assign(is_anomaly=False, anomaly_score=0.0), weights, "Must have valid sensitivity score, 0 < x <= 100.0")
    else:
        df_test, calculations = run_tests(dataframe)
        df_scored = score_results(df_test, weights)
        df_out = determine_outliers(df_scored, sensitivity_score, max_fractional_anomalies)
        return  (df_out, weights, {"message" : "Ensemble of [mean +/- 3*SD, median +/- 3*MAD, median +/- 1.5*IQR],",
                                "calculations": calculations}) 

## Perform univariate anomaly detection from the dataset based on the number of sessions per src/dest pair

In [17]:
# pass the new dataframe to the  detect_univariate_statistical function, the function expects a column named "value"
# to perform its tests on so we'll rename the "count" prior
# to passing the dataframe. Additinally parameters to the function 
# include a sensitivitiy score which we are passing 80 which means
# we want the function to return values that have an 80% confidence level
# lastly minimum fraction anomaly value, which tells the function
# that we expect atleast that fraction of the entire dataset to 
# be an outlier we are passing an initial value of 0.05

res_df, _, calculations = detect_univariate_statistical(new.rename(columns={"count":"value"}), 80, .05)
calculations

{'message': 'Ensemble of [mean +/- 3*SD, median +/- 3*MAD, median +/- 1.5*IQR],',
 'calculations': {'mean': np.float64(153.68802228412255),
  'sd': np.float64(740.8499065008234),
  'p25': np.float64(2.0),
  'p75': np.float64(109.0),
  'iqr': np.float64(107.0),
  'median': np.float64(5.0),
  'mad': np.float64(4.447806655516806)}}

## Review the results from the above function, filter for only the records that were found as anomaly


In [18]:
# Show function output
res_df[res_df.is_anomaly == True].shape, display(HTML(res_df[res_df.is_anomaly == True].to_html(index=False) ))

src,dest,dest_port,interval_average,interval_std,value,bytes_out,bytes_in,bytes_diff,out_bytes_per_session,in_bytes_per_session,sds,mads,iqrs,anomaly_score,is_anomaly
192.168.100.136,172.208.51.75,7707.0,6.50068,0.868396,13281,19789913.0,17996565.0,1.71027,1490.092086,1355.060989,1.0,1.0,1.0,1.05,True
192.168.100.136,192.168.100.150,7680.0,74.571572,171.628866,1151,334413.0,238878.0,0.091109,290.541268,207.539531,0.448724,1.0,1.0,0.912181,True
192.168.100.152,192.168.100.150,7680.0,87.350425,198.150119,981,289377.0,222126.0,0.064136,294.981651,226.428135,0.372236,1.0,1.0,0.893059,True
192.168.100.136,192.168.100.2,53.0,50.376894,45.577729,1715,754165.0,126635.0,0.598459,439.746356,73.83965,0.702487,1.0,1.0,0.975622,True
192.168.100.136,192.168.100.2,137.0,50.376894,45.577729,1715,754165.0,126635.0,0.598459,439.746356,73.83965,0.702487,1.0,1.0,0.975622,True
192.168.100.150,192.168.100.2,53.0,94.935585,135.8954,911,86853.0,118550.0,-0.030229,95.33809,130.131723,0.34074,1.0,1.0,0.885185,True
192.168.100.152,192.168.100.2,53.0,95.458832,137.361951,905,89231.0,120438.0,-0.029761,98.59779,133.080663,0.338041,1.0,1.0,0.88451,True
192.168.100.151,192.168.100.2,53.0,97.536194,141.390069,884,85758.0,115894.0,-0.02874,97.011312,131.10181,0.328592,1.0,1.0,0.882148,True
192.168.100.150,192.168.100.2,3.0,94.935585,135.8954,911,86853.0,118550.0,-0.030229,95.33809,130.131723,0.34074,1.0,1.0,0.885185,True
192.168.100.152,192.168.100.2,3.0,95.458832,137.361951,905,89231.0,120438.0,-0.029761,98.59779,133.080663,0.338041,1.0,1.0,0.88451,True


((16, 16), None)

## What do the results say

Interesting observation from the results. The function correctly identified the outbound connection to 172.208.51.75 as 
a potential anomaly as well as several other src/dest pairs. But just looking at the meta data for the pairs it looks like the confirmed malicious pair stands out from other pairs from the perspective of other features as well; the interval average is significant shorter and the bytes calculations are higher than most if not all.

I got the idea of filtering the first round of anomalies through the detecton_univariate_statistical function, passing the results from the prior test to the function but focus on a different feature. Of course this is technically no longer univariate, but we're going with the flow here.

In [19]:
## Create a new dataframe from the first anomaly check for all pairs that were found as potential anomalies
# Since the function is looking for a column name of value we'll rename the value column back to the original 
# name of count
first_anomalies = res_df[res_df.is_anomaly == True].rename(columns={"value": "count"})


In [20]:
## Rerun the detect_univariate_statistical function with the new
# First since we're create multiple dataframes I'm going to make a new dataframe to pass to the function with a 
# more identifiable name

bytes_out_df = first_anomalies.rename(columns={"bytes_out":"value"})

res_df, _, calulations = detect_univariate_statistical(
    bytes_out_df,
    80, .05)
calulations
display(HTML(res_df[res_df.is_anomaly == True].to_html(index=False) ))

src,dest,dest_port,interval_average,interval_std,count,value,bytes_in,bytes_diff,out_bytes_per_session,in_bytes_per_session,sds,mads,iqrs,anomaly_score,is_anomaly
192.168.100.136,172.208.51.75,7707.0,6.50068,0.868396,13281,19789913.0,17996565.0,1.71027,1490.092086,1355.060989,1.0,1.0,1.0,1.05,True


## And then there was one

After the first round of filtering we are left with only the known C2 beacon. Lets repeat again with another feature

In [21]:
# Another pass using a different feature
# this time, the bytes_in metric


bytes_in_df = first_anomalies.rename(columns={"bytes_in":"value"})

res_df, _, calulations = detect_univariate_statistical(
    bytes_in_df,
    80, .05)
calulations
display(HTML(res_df[res_df.is_anomaly == True].to_html(index=False) ))

src,dest,dest_port,interval_average,interval_std,count,bytes_out,value,bytes_diff,out_bytes_per_session,in_bytes_per_session,sds,mads,iqrs,anomaly_score,is_anomaly
192.168.100.136,172.208.51.75,7707.0,6.50068,0.868396,13281,19789913.0,17996565.0,1.71027,1490.092086,1355.060989,1.0,1.0,1.0,1.05,True


## Pretty cool to cull the results in such a way that only the C2 beacon remains based on those features. 

It would be pretty trivial to see what the results would be for all of the aggregated features at once, we can loop it

In [22]:
# First we'll throw the features of interest in a list

feature_names = ['interval_average','interval_std','count','bytes_out','bytes_in','bytes_diff','out_bytes_per_session','in_bytes_per_session']

In [23]:
##
# Just structure a loop block to setup the univariate AD function for each feature, outside of  the loop we'll
# grab the initial size information and create a dictionary to store individual results as we may want to do 
# additional comparison checking pending the results of the individual tests. 
# We also print the results of each test

all_findings = {}


file_meta = {
    "Original Size" : f"{len(df)}",
    "Aggregated Size" : f"{len(new)}",
    "First Reduce Size" : f"{len(first_anomalies)}"
}

print(file_meta,"\n")
for feature in feature_names:
    
    res_df, _, calulations = detect_univariate_statistical(first_anomalies.rename(columns={feature:"value"})
                                                           ,80, .05)
    findings_df = res_df[res_df.is_anomaly == True].loc[::]
    results_len = len(findings_df)
    info = {'Feature': f"{feature}",
             'Number_Anomalous': f"{results_len}"}
    
    all_findings[feature] = findings_df
    print(info)

{'Original Size': '35948', 'Aggregated Size': '359', 'First Reduce Size': '16'} 

{'Feature': 'interval_average', 'Number_Anomalous': '1'}
{'Feature': 'interval_std', 'Number_Anomalous': '1'}
{'Feature': 'count', 'Number_Anomalous': '1'}
{'Feature': 'bytes_out', 'Number_Anomalous': '1'}
{'Feature': 'bytes_in', 'Number_Anomalous': '1'}
{'Feature': 'bytes_diff', 'Number_Anomalous': '1'}
{'Feature': 'out_bytes_per_session', 'Number_Anomalous': '1'}
{'Feature': 'in_bytes_per_session', 'Number_Anomalous': '1'}


# Interpreting the final output

The individual results don't illuminate any new records of interest, but the reduction numbers are encouraging. As analysts who are tasked with finding the 4 inch strand of hay that is frayed on one end in a warehouse of hay, meaningful data reduction is a significant battle. We can apply manual efforts rooted in domain and environment knowledge, that'll always be needed. But there have been countless times when I have been brought into an investigation where the environment is unknown, there is no documentation and domain knowledge is nill. In those instances having a proven technique to establish a starting point from leads is huge. 

Even from the initial reduction we now have a dataset that we can apply meaningful enrichment to at least paint some context around the network connections.


#### This is works well for this particular data sample, I wonder if it works with other log samples. Luckily the good people at [active countermeasure](https://www.activecountermeasures.com/) are doing God's work and providing learners with LARGE traffic samples that contain malware communication traffic, so let's try to apply one such [sample](https://acm-motd.s3.amazonaws.com/xenorat_zeek_logs.zip) surrounding xenorat network traffic.

[**Challenge 2 - Xenorat**](https://www.activecountermeasures.com/malware-of-the-day-xenorat/) 

- C2 Server: 172.208.51.75
- Beacon Timing: none
- Jitter: none



In [24]:
# The files have been saved individually going to do some globbing and pandas work to consolidate them
import glob

In [25]:
# Getting paths to all of the conn files
conn_files = glob.glob("xeno*/conn.*.log*")
conn_files

['xenorat_zeek_logs/conn.08_00_00-09_00_00.log',
 'xenorat_zeek_logs/conn.19_00_00-20_00_00.log',
 'xenorat_zeek_logs/conn.10_00_00-11_00_00.log',
 'xenorat_zeek_logs/conn.14_00_00-15_00_00.log',
 'xenorat_zeek_logs/conn.17_00_00-18_00_00.log',
 'xenorat_zeek_logs/conn.15_00_00-16_00_00.log',
 'xenorat_zeek_logs/conn.11_00_00-12_00_00.log',
 'xenorat_zeek_logs/conn.12_00_00-13_00_00.log',
 'xenorat_zeek_logs/conn.16_00_00-17_00_00.log',
 'xenorat_zeek_logs/conn.03_00_00-04_00_00.log',
 'xenorat_zeek_logs/conn.01_00_00-02_00_00.log',
 'xenorat_zeek_logs/conn.05_00_00-06_00_00.log',
 'xenorat_zeek_logs/conn.07_00_00-08_00_00.log',
 'xenorat_zeek_logs/conn.18_00_00-19_00_00.log',
 'xenorat_zeek_logs/conn.04_00_00-05_00_00.log',
 'xenorat_zeek_logs/conn.22_00_00-23_00_00.log',
 'xenorat_zeek_logs/conn.00_00_00-01_00_00.log',
 'xenorat_zeek_logs/conn.13_00_00-14_00_00.log',
 'xenorat_zeek_logs/conn.21_00_00-22_00_00.log',
 'xenorat_zeek_logs/conn.23_00_00-00_00_00.log',
 'xenorat_zeek_logs/

In [26]:
# make a loop to read make each file a dataframe and store is a list
dataframes = []
for file in conn_files:
    df = pd.read_csv(file, sep="\t", skiprows=8, names=zeek_cols["conn"], low_memory=False)
    # drop last row
    df = df.iloc[:-1]
    dataframes.append(df)

In [27]:
# Merge or concat all dataframes into one
df = pd.concat(dataframes)

In [28]:
# Check out file dataframe size and preview
df.shape, display(HTML( df.sample(5).to_html(index=False) ))

ts,uid,orig_h,orig_p,resp_h,resp_p,proto,service,duration,orig_bytes,resp_bytes,conn_state,local_orig,local_resp,missed_bytes,history,orig_pkts,orig_ip_bytes,resp_pkts,resp_ip_bytes,tunnel_parents
1716328590.010444,CLJUG54YcbajQ03TBh,fe80::6a6f:2ae7:422e:b6d3,5353.0,ff02::fb,5353.0,udp,dns,0.000158,90,0,S0,F,F,0.0,D,2.0,186.0,0.0,0.0,-
1716353683.322302,CtOxNz3ORqqZGmxCRg,192.168.2.19,35203.0,192.168.2.1,53.0,udp,dns,0.000455,92,46,SF,T,T,0.0,Dd,2.0,148.0,1.0,74.0,-
1716339799.016182,CoFRCu271ZoOVZedK9,192.168.2.19,58181.0,192.168.2.1,53.0,udp,dns,0.008357,116,314,SF,T,T,0.0,Dd,2.0,172.0,1.0,342.0,-
1716337592.143284,Cd0Lpp29akWmFnBeb5,192.168.2.19,33460.0,1.1.1.1,53.0,udp,dns,0.005007,116,250,SF,T,F,0.0,Dd,2.0,172.0,1.0,278.0,-
1716345387.78951,CqlKRi3h4b11GWYRi1,192.168.2.84,51306.0,23.208.15.98,443.0,tcp,ssl,60.179695,2755,5796,RSTO,T,F,0.0,ShADadR,13.0,3287.0,12.0,6288.0,-


((44010, 21), None)

### Just copying from the code cells above and repeating the same battery of tests

In [29]:
converted_timestamps = [datetime.fromtimestamp(ts) for ts in df.ts.astype(float)]
df.insert(0, "Time", converted_timestamps)
reduced = df[["orig_h", "resp_h", "resp_p"]].value_counts().reset_index(name="pair_counts").query("pair_counts > 1")

In [30]:
meta_list = []
for pair in reduced.itertuples():
    tmp_df = df[(df.orig_h == pair.orig_h) & (df.resp_h == pair.resp_h)]
    times = tmp_df["Time"].tolist()
    intervals = make_time_interval(times)
    count = len(intervals)
    sum_bytes_in = tmp_df.resp_ip_bytes.sum()
    sum_bytes_out = tmp_df.orig_ip_bytes.sum()
    
    meta = {
        "src" : pair.orig_h,
        "dest" : pair.resp_h,
        "dest_port": pair.resp_p,
        "interval_average" : np.mean(intervals),
        "interval_std" : np.std(intervals),
        "count" : len(intervals) + 1,
        "bytes_out" : sum_bytes_out,
        "bytes_in" : sum_bytes_in
    }
    meta_list.append(meta)

new = pd.DataFrame(meta_list)

new["bytes_diff"] = (new.bytes_out - new.bytes_in) / 1024 /1024
new["out_bytes_per_session"] = new.bytes_out / new["count"]
new["in_bytes_per_session"] = new.bytes_in / new["count"]

###
**Spoiler:** when using the same max fraction anomaly value as the first sample. The "outlier funnel" failed to produce the desired results i.e the C2 traffic missed the cut. I played around with the input values and increasing the value to .10 produced meaningful results. Increasing the value is acceptable, in the end we'll have more data marked as potential outliers but still be in a better spot than not pushing the data through the funnel.
###

In [31]:
# Getting skewness and kurtosis scores for curiousity sake.
# I'm thinking the reason we had to increase the max_number_of_anomalies variable
# of the test has to do with the fact this sample has more skewness and kurtosis
# which I believe means a lot of the data exists in the tail of the distibution
# and is prone to more outliers and extreme conditions. Could be way off base
# but wanted to capture the notes to come back to later

new["count"].skew(), new["count"].kurtosis()

(np.float64(21.287616059512075), np.float64(528.6638182201758))

In [32]:
res_df, _, calculations = detect_univariate_statistical(new.rename(columns={"count":"value"}), 90, .10)
display(HTML(res_df[res_df.is_anomaly == True].sample(5).to_html(index=False) ))

src,dest,dest_port,interval_average,interval_std,value,bytes_out,bytes_in,bytes_diff,out_bytes_per_session,in_bytes_per_session,sds,mads,iqrs,anomaly_score,is_anomaly
192.168.2.89,52.113.194.132,443.0,4910.523579,4613.514658,16,21357.0,124598.0,-0.098458,1334.8125,7787.375,0.031746,1.0,1.0,0.807937,True
192.168.2.77,52.137.102.105,443.0,5595.313504,4094.807482,15,26082.0,49424.0,-0.022261,1738.8,3294.933333,0.032479,1.0,1.0,0.80812,True
192.168.2.65,192.168.2.255,137.0,713.87213,483.426616,120,46636.0,0.0,0.044476,388.633333,0.0,0.0444,1.0,1.0,0.8111,True
192.168.2.14,192.168.2.79,7680.0,104.06012,1987.751005,831,269616.0,207012.0,0.059704,324.447653,249.111913,0.56498,1.0,1.0,0.941245,True
192.168.2.82,192.168.2.1,49152.0,86.56836,134.26966,994,70694.0,183884.0,-0.107946,71.120724,184.993964,0.684326,1.0,1.0,0.971081,True


In [33]:
first_anomalies = res_df[res_df.is_anomaly == True].rename(columns={"value": "count"})
feature_names = ['interval_average','interval_std','count','bytes_out','bytes_in','bytes_diff','out_bytes_per_session','in_bytes_per_session']
all_findings = {}


file_meta = {
    "Original Size" : f"{len(df)}",
    "Aggregated Size" : f"{len(new)}",
    "First Reduce Size" : f"{len(first_anomalies)}"
}

print(file_meta,"\n")
for feature in feature_names:
    
    res_df, _, calulations = detect_univariate_statistical(first_anomalies.rename(columns={feature:"value"})
                                                           ,60, .05)
    findings_df = res_df[res_df.is_anomaly == True].loc[::]
    results_len = len(findings_df)
    info = {'Feature': f"{feature}",
             'Number_Anomalous': f"{results_len}"}
    
    all_findings[feature] = findings_df
    print(info)

{'Original Size': '44010', 'Aggregated Size': '853', 'First Reduce Size': '84'} 

{'Feature': 'interval_average', 'Number_Anomalous': '5'}
{'Feature': 'interval_std', 'Number_Anomalous': '5'}
{'Feature': 'count', 'Number_Anomalous': '5'}
{'Feature': 'bytes_out', 'Number_Anomalous': '5'}
{'Feature': 'bytes_in', 'Number_Anomalous': '5'}
{'Feature': 'bytes_diff', 'Number_Anomalous': '5'}
{'Feature': 'out_bytes_per_session', 'Number_Anomalous': '5'}
{'Feature': 'in_bytes_per_session', 'Number_Anomalous': '5'}


## Since we have multiple results lets bring them back together and look at as a whole

In [34]:
# Pulling the dataframes out of the loop results, we'll have to
# revert the column names back to the original feature name
# which we can easily do within the list comprehension
# Then we'll use pd.concat() to merge all of the dataframes
# Some src/dest pairs may have been found to be outliers from multiple tests
# so we'll drop columns added during tests as well as drop
# duplicate rows


normalized_df_findings = [
    v.rename(columns={"value":k}) for k, v in all_findings.items()
]

all_combined = pd.concat(normalized_df_findings)
all_combined.drop(columns=["sds", "mads", "iqrs", "anomaly_score"], inplace=True)
all_combined.drop_duplicates(inplace=True)
display(HTML(all_combined.to_html(index=False)))

src,dest,dest_port,interval_average,interval_std,count,bytes_out,bytes_in,bytes_diff,out_bytes_per_session,in_bytes_per_session,is_anomaly
192.168.2.82,151.101.138.172,80.0,5782.400954,4062.185907,14,62892.0,1236375.0,-1.119121,4492.285714,88312.5,True
0.0.0.0,255.255.255.255,67.0,6930.646651,5816.460391,12,8696.0,0.0,0.008293,724.666667,0.0,True
192.168.2.15,239.255.255.250,1900.0,6318.08914,19014.643362,12,12828.0,0.0,0.012234,1069.0,0.0,True
192.168.2.82,52.113.195.132,443.0,6032.743888,16493.810552,12,19650.0,83327.0,-0.060727,1637.5,6943.917,True
192.168.2.1,192.168.2.79,57935.0,6167.458337,16258.724895,12,6480.0,0.0,0.00618,540.0,0.0,True
192.168.2.13,239.255.255.250,1900.0,5325.09202,16827.149606,12,63339.0,0.0,0.060405,5278.25,0.0,True
192.168.2.13,239.255.255.250,3702.0,5325.09202,16827.149606,12,63339.0,0.0,0.060405,5278.25,0.0,True
192.168.2.19,1.1.1.1,53.0,7.316422,7.424754,11808,2030976.0,4132800.0,-2.004456,172.0,350.0,True
192.168.2.19,192.168.2.1,53.0,22.644662,49.560266,3812,595904.0,554997.0,0.039012,156.32319,145.5921,True
192.168.2.1,224.0.0.251,5353.0,30.002055,0.042252,2880,302400.0,0.0,0.288391,105.0,0.0,True


#
## While it doesn't reduce the data drastically we can apply additional filtering after the processing to drop the RFC1918, broadcast and multicast destination ip addresses as we don't expect *normal* C2 traffic to traverse to these IP spaces. 

##### **OBLIGATORY NOTE:** I'm aknowledging that by filtering out private IP space I could miss meaningful internal traffic that could be indicative of lateral movement, data staging/exfiltration, or even recon. Its important to keep a context based approach to the analysis process based on what we know now, what we think and what we don't know. With that in mind I know a host from the traffic sample is compromised with a somewhat commodity variant of malware, I know that after the initial compromise the infected host typically sends information to a C2 server
#

In [35]:
display(HTML(all_combined[~all_combined.dest.str.contains(
    r"\b(^(127|10|255|239|224)\.\d{1,3}|^192\.168|^172\.(?:1[6-9]|2[0-9]|3[0-1]))\.\d{1,3}\.\d{1,3}\b"
)].to_html(index=False)))

  display(HTML(all_combined[~all_combined.dest.str.contains(


src,dest,dest_port,interval_average,interval_std,count,bytes_out,bytes_in,bytes_diff,out_bytes_per_session,in_bytes_per_session,is_anomaly
192.168.2.82,151.101.138.172,80.0,5782.400954,4062.185907,14,62892.0,1236375.0,-1.119121,4492.285714,88312.5,True
192.168.2.82,52.113.195.132,443.0,6032.743888,16493.810552,12,19650.0,83327.0,-0.060727,1637.5,6943.917,True
192.168.2.19,1.1.1.1,53.0,7.316422,7.424754,11808,2030976.0,4132800.0,-2.004456,172.0,350.0,True
192.168.2.77,172.208.51.75,4444.0,71.471786,629.141605,227,1420856.0,2043880.0,-0.594162,6259.277533,9003.877,True
192.168.2.19,149.112.122.10,443.0,502.592023,342.867823,172,1381676.0,443953.0,0.894282,8033.0,2581.122,True
192.168.2.77,172.208.51.75,0.0,71.471786,629.141605,227,1420856.0,2043880.0,-0.594162,6259.277533,9003.877,True
192.168.2.85,151.139.47.180,80.0,1.332421,1.740996,17,177998.0,25992811.0,-24.618924,10470.470588,1528989.0,True
192.168.2.77,151.139.51.188,80.0,2.694079,5.552809,17,94655.0,18805856.0,-17.844392,5567.941176,1106227.0,True
192.168.2.82,151.139.51.188,80.0,1.85229,2.808766,12,868483.0,164078476.0,-155.649179,72373.583333,13673210.0,True
192.168.2.77,52.137.102.105,443.0,5595.313504,4094.807482,15,26082.0,49424.0,-0.022261,1738.8,3294.933,True


## Final thoughts

For the prior sample we have reduced the data down from ~44k log events to 10 combinations that are possibly malicious. After reducing the data down just doing a visual grep we can see possible src/dest combinations that we would prioritize based on the characteristics compared to the other outliers. The beaconing traffic is the "biggest" outlier in the group of outliers.  In this final output we see a group of connections between 192.168.2.77 and 172.208.51.75 over port 4444 (and port 0 for icmp), which standout in this grouping mainly for the non-standard port use, but from other statistical calculations that are not as obvious to the naked eye. This, is in fact the connection pair we are looking for, the XenoRat C2.

Below I combined the logs from both challenges, I ended up with ~79k connection logs, 1211 session pairs and in the end the process spit out 10 pairs that stood out more than others, thats a little bit better than 90% data reduction. Of course reduction would be meaningless if the things we are looking for were lost during the reduction process, but with this final test both sets of C2 beaconing were identified as outliers.

To threat hunt is to find the outliers, this is one of many possible methods to aid in making the outliers more identifiable in the sea of data. This is not a perfect method, by any means, but its promising and a simple enough process to easily incorporate into pertinent situations. As seen during the second sample the sensitivity score and max fractional anomaly setting is variable and will impact the range of outliers found. It will take some trial and error to determine what those settings look like for different datasets. 

### Encore - combine the samples from both challenges

Just out of more curiosity I combined the Zeek looks from both challenges and performed the outlier checks and *surprise* both src/dest pairs from the compromsed hosts to their respective C2 servers were idenified in the final result. 

In [36]:
conn_files = glob.glob("xeno*/conn.*.log*")
conn_files += ["async_infection_zeek_logs/conn.log"]

dataframes = []
for file in conn_files:
    df = pd.read_csv(file, sep="\t", skiprows=8, names=zeek_cols["conn"], low_memory=False)
    # drop last row
    df = df.iloc[:-1]
    dataframes.append(df)
df = pd.concat(dataframes)

converted_timestamps = [datetime.fromtimestamp(ts) for ts in df.ts.astype(float)]
df.insert(0, "Time", converted_timestamps)
reduced = df[["orig_h", "resp_h", "resp_p"]].value_counts().reset_index(name="pair_counts").query("pair_counts > 1")

meta_list = []
for pair in reduced.itertuples():
    tmp_df = df[(df.orig_h == pair.orig_h) & (df.resp_h == pair.resp_h)]
    times = tmp_df["Time"].tolist()
    intervals = make_time_interval(times)
    count = len(intervals)
    sum_bytes_in = tmp_df.resp_ip_bytes.sum()
    sum_bytes_out = tmp_df.orig_ip_bytes.sum()
    
    meta = {
        "src" : pair.orig_h,
        "dest" : pair.resp_h,
        "dest_port": pair.resp_p,
        "interval_average" : np.mean(intervals),
        "interval_std" : np.std(intervals),
        "count" : len(intervals) + 1,
        "bytes_out" : sum_bytes_out,
        "bytes_in" : sum_bytes_in
    }
    meta_list.append(meta)

new = pd.DataFrame(meta_list)

new["bytes_diff"] = (new.bytes_out - new.bytes_in) / 1024 /1024
new["out_bytes_per_session"] = new.bytes_out / new["count"]
new["in_bytes_per_session"] = new.bytes_in / new["count"]

res_df, _, calculations = detect_univariate_statistical(new.rename(columns={"count":"value"}), 90, .10)
first_anomalies = res_df[res_df.is_anomaly == True].rename(columns={"value": "count"})

feature_names = ['interval_average','interval_std','count','bytes_out','bytes_in','bytes_diff','out_bytes_per_session','in_bytes_per_session']
all_findings = {}


file_meta = {
    "Original Size" : f"{len(df)}",
    "Aggregated Size" : f"{len(new)}",
    "First Reduce Size" : f"{len(first_anomalies)}"
}

print(file_meta,"\n")
for feature in feature_names:
    
    res_df, _, calulations = detect_univariate_statistical(first_anomalies.rename(columns={feature:"value"})
                                                           ,60, .05)
    findings_df = res_df[res_df.is_anomaly == True].loc[::]
    results_len = len(findings_df)
    info = {'Feature': f"{feature}",
             'Number_Anomalous': f"{results_len}"}
    
    all_findings[feature] = findings_df

normalized_df_findings = [
    v.rename(columns={"value":k}) for k, v in all_findings.items()
]

all_combined = pd.concat(normalized_df_findings)
all_combined.drop(columns=["sds", "mads", "iqrs", "anomaly_score"], inplace=True)
all_combined.drop_duplicates(inplace=True)
display(HTML(all_combined[~all_combined.dest.str.contains(
    r"\b(^(127|10|255|239|224)\.\d{1,3}|^192\.168|^172\.(?:1[6-9]|2[0-9]|3[0-1]))\.\d{1,3}\.\d{1,3}\b"
)].to_html(index=False)))



{'Original Size': '79958', 'Aggregated Size': '1211', 'First Reduce Size': '118'} 



  display(HTML(all_combined[~all_combined.dest.str.contains(


src,dest,dest_port,interval_average,interval_std,count,bytes_out,bytes_in,bytes_diff,out_bytes_per_session,in_bytes_per_session,is_anomaly
192.168.2.19,149.112.122.10,443.0,502.592023,342.867823,172,1381676.0,443953.0,0.894282,8033.0,2581.122093,True
fe80::d4:61b7:f0bf:f504,ff02::fb,5353.0,190.622819,940.239998,454,198787.0,0.0,0.189578,437.856828,0.0,True
192.168.100.136,172.208.51.75,7707.0,6.50068,0.868396,13281,19789913.0,17996565.0,1.71027,1490.092086,1355.060989,True
192.168.2.19,1.1.1.1,53.0,7.316422,7.424754,11808,2030976.0,4132800.0,-2.004456,172.0,350.0,True
192.168.2.77,172.208.51.75,4444.0,71.471786,629.141605,227,1420856.0,2043880.0,-0.594162,6259.277533,9003.876652,True
192.168.2.77,172.208.51.75,0.0,71.471786,629.141605,227,1420856.0,2043880.0,-0.594162,6259.277533,9003.876652,True
192.168.100.150,20.54.24.231,443.0,289.773374,378.907335,298,541451.0,1086336.0,-0.519643,1816.949664,3645.422819,True
192.168.100.152,20.54.24.231,443.0,291.139733,383.195294,296,540723.0,1080074.0,-0.514365,1826.766892,3648.898649,True
192.168.100.151,20.54.24.231,443.0,300.097482,398.506387,286,517777.0,1042743.0,-0.500647,1810.409091,3645.954545,True
192.168.100.136,20.54.24.231,443.0,362.063867,417.032763,239,430524.0,865137.0,-0.414479,1801.355649,3619.820084,True
