# Access log visualization with parallel coordinates
Only default apache **access.log** is supported! Available columns to use in visualization:

* remote\_host
* remote\_logname
* remote\_user
* time\_received
* time\_received\_datetimeobj
* time\_received\_isoformat
* time\_received\_tz\_datetimeobj
* time\_received\_tz\_isoformat
* time\_received\_utc\_datetimeobj
* time\_received\_utc\_isoformat
* request\_first\_line
* request\_method
* request\_url
* request\_http\_ver
* request\_url\_scheme
* request\_url\_netloc
* request\_url\_path
* request\_url\_query
* request\_url\_fragment
* request\_url\_username
* request\_url\_password
* request\_url\_hostname
* request\_url\_port
* request\_url\_query\_dict
* request\_url\_query\_list
* request\_url\_query\_simple\_dict
* status
* response\_bytes\_clf
* request\_header\_referer
* request\_header\_user\_agent
* request\_header\_user\_agent\_\_browser\_\_family
* request\_header\_user\_agent\_\_browser\_\_version\_string
* request\_header\_user\_agent\_\_os\_\_family
* request\_header\_user\_agent\_\_os\_\_version\_string
* request\_header\_user\_agent\_\_is\_mobile

In [None]:
import os

# in case the tool is run with 'cincan'
if not os.path.isfile(os.environ['ACCESS_LOG_PATH']):
        os.environ['ACCESS_LOG_PATH'] = "/home/appuser/" + os.environ['ACCESS_LOG_PATH']

input_path = os.environ['ACCESS_LOG_PATH']
print(input_path)

columns = ["remote_host", "request_header_user_agent__browser__family", "request_header_user_agent__browser__version_string",
            "request_header_user_agent__os__family", "request_method", "response_bytes_clf", "status",
            "request_url_path", "request_header_user_agent"]

if type(columns) == str:
    import ast
    columns = ast.literal_eval(columns)

In [None]:
with open(input_path, "r") as f:
    log_data = f.readlines()

In [None]:
import pandas as pd
import apache_log_parser

from tqdm import tqdm_notebook as tqdm

line_parser = apache_log_parser.make_parser("%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"")

df = pd.DataFrame()

for log_line in tqdm(log_data, desc="Parsing log lines"):
    parsed_line = line_parser(log_line)
    parsed_line = dict(list(map(lambda key, value: [key, [str(value)]], parsed_line.keys(), parsed_line.values())))
    row_df = pd.DataFrame.from_dict(parsed_line)
    df = df.append(row_df, sort=False)

In [None]:
rename_columns = {  "request_header_user_agent__browser__family": "browser",
                    "request_header_user_agent__browser__version_string": "browser_version",
                    "request_header_user_agent__os__family": "os"}

columns.append("time_received_isoformat")
df = df[columns]

for column in rename_columns.keys():
    df = df.rename(columns={column: rename_columns[column]})
    
df["timestamp"] = df["time_received_isoformat"].apply(lambda value: pd.to_datetime(value).timestamp())

df = df.drop(columns=["time_received_isoformat"])

for col in df.columns:
    df[col + "_cat"] = df[col].astype("category").cat.codes

In [None]:
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go

init_notebook_mode(connected=True)

In [None]:
dims = []

for col in df.columns:
    if "_cat" in col:
        continue
            
    if "request_url_path" in col:
        continue
        
    if "request_header_user_agent" in col:
        continue
        
    if "timestamp" in col:
        dim = dict(  range = [df[col].min(), df[col].max()],
                     label = col, values = df[col])
    else:
        
        dim = dict(label = col, values = df[col + "_cat"])
    
    
    dims.append(dim)

In [None]:
data = [
    go.Parcoords(
        line = dict(color = df["timestamp"],
                   showscale = True,
                   reversescale = True,
                   cmin = df["timestamp"].min(),
                   cmax = df["timestamp"].max(),
                   ),
        dimensions = dims,
    )
]

iplot(data)