## Import Libraries

In [131]:
from parse import compile
from parse import with_pattern
import datetime as dt
import pandas as pd
import openpyxl
import json

## Global parameters

In [151]:
TEXT_LOG_FILE_PATH = r'WCG100140020.txt'
SAVE_XLSX_PATH = r'C:\Users\nomi8\OneDrive\Documents\Cours_BME\Agile\excel_parsed_log\parsed_log.xlsx'

## create_log_list
- description: function which create a **python list** from the original **text file** containing the logs.
- input: text file (.txt)
- output: list of string

In [133]:
def create_log_list(log_file):
    logs = open(log_file)
    lines  = logs.readlines()
    log_list = []
    log = ''
    for line in lines:
        if line[:11] == "2014/Oct/24":
            log_list.append(log)
            log = line
        else:
            log += line

    log_list = log_list[1:]
    return log_list

log_list = create_log_list(TEXT_LOG_FILE_PATH)

## portevent_filter
- description: function which filter a **log list** to extract only the logs of the type **PORTEVENT** (example : 2014/Oct/24 18:34:28.666455 mtc PORTEVENT WCG10014_0020.ttcn:53(testcase:WCG100140020) Port dnsInternalPort[0] was started.)
- input: list of string
- output: list of string

In [134]:
def portevent_filter(log_list):
    portevent_log_list = []
    file = open('porteventlogs.txt', 'w')
    for log in log_list:
        if "PORTEVENT" in log:
            log_line = log.replace('\n', '')
            portevent_log_list.append(log_line)
            file.write(log_line)
    return portevent_log_list

portevent_log_list  = portevent_filter(log_list)


## sending_message_filter
- description: function which filter a **log list** to extract only the logs which send message
- input: list of string
- output: list of string

In [135]:

def sending_message_filter(log_list):
    sending_log_list = []
    for log in log_list:
        if "Sent on" in log:
            sending_log_list.append(log)
    return sending_log_list

sending_log_list = sending_message_filter(log_list)

## Creation of log parsing fonctions :
- port_opening : parse the port opening log lines
- port_stopping : parse the port opening log lines
- port_connexion_establishment : parse log lines which are used to connect two users
- port_connexion_waiting : parse waiting connexion log lines 
- port_connexion_acceptance : parse connexion acceptance log lines 
- port_mapping : parse port mapping log lines 
- message_sending : parse log lines where messages are sent
- message_enqueuing : parse message enqueuing log lines 
- message_reception : parse message reception log lines 
- message_extraction : parse message extraction log lines 

In [136]:
port_opening = compile("{date:27} {user_name} {eventtype} {unknown_part} Port {portname} was started.")
port_stopping = compile("{date:27} {user_name} {eventtype} {unknown_part} Port {portname} was stopped.")
port_connexion_waiting = compile("{date:27} {message_receiver} {eventtype} {unknown_part} Port {message_receiver_portname} is waiting for connection from {message_sender_portname} on {communication_type} pathname {pathname}.")
port_connexion_establishment = compile("{date:27} {message_sender} {eventtype} {unknown_part} Port {message_sender_portname} has established the connection with {message_receiver_portname} using transport type {communication_type}.")
port_connexion_acceptance = compile("{date:27} {message_receiver} {eventtype} {unknown_part} Port {message_receiver_portname} has accepted the connection from {message_sender_portname}.")
port_mapping = compile("{date:27} {message_sender} {eventtype} {unknown_part} Port {message_sender_portname} was mapped to {message_receiver_portname}.")
message_sending = compile("{date:27} {message_sender} {eventtype} {unknown_part} Sent on {message_sender_portname} to {message_receiver} {message_type} : {message_content:0}")
message_enqueuing = compile("{date:27} {message_receiver} {eventtype} {unknown_part} Message enqueued on {message_receiver_portname} from {message_sender} {message_type} : {message_content:0} id {message_id_number}")
message_reception = compile("{date:27} {message_receiver} {eventtype} {unknown_part} Receive operation on port {message_receiver_portname} succeeded, message from {message_sender}: {message_type} : {message_content:0} id {message_id_number}")
message_extraction = compile("{date:27} {message_receiver} {eventtype} {unknown_part} Message with id {message_id_number} was extracted from the queue of {message_receiver_portname}.")


## create_log_sending_message_dataframe
- description: function creates a pandas Dataframe containing the **date**, the message **sender** and **receiver**, the **eventtype** (which is always PORTEVENT), the **message sender port name**, the **message type** and the **content of the message** using a pyhton list of logs where messages are sent (provided by the **sending_message_filter** fonction) 
- input: list of string
- output: Dataframe 

In [137]:
def create_log_sending_message_dataframe(sending_log_list):
    parsed_sending_log_dico = {'date': [], 'message_sender': [], 'eventtype': [], 'unknown_part': [], 'message_sender_portname': [], 
                            'message_receiver': [], 'message_type': [], 'message_content': []}
    for sending_log in sending_log_list:
        parsed_log = message_sending.parse(sending_log).named
        for k in parsed_log.keys():
            parsed_sending_log_dico[k].append(parsed_log[k])
    df = pd.DataFrame(parsed_sending_log_dico)
    return df

df = create_log_sending_message_dataframe(sending_log_list)

## log_format_to_json
- description: transform a message content extracted from the logs and format them to be a json string format so they can be turn directly into python dictionary 
- input: string
- output: string

In [138]:
def log_format_to_json(string):

    #decompose the all string into lines and then words to check patterns and modify it to a json string 
    lines = string.split('\n')
    edited_message = ''
    for line in lines: 
        decompose_line = line.split(' ')
        edited_line = ''

        #study patterns of the line to make the appropriate modification to turn it into a json valid line
        for i in range(len(decompose_line)):

            if decompose_line[i] != '':

                #add quote around keys
                if decompose_line[i] == ':=':
                    decompose_line[i-1] = '"' + decompose_line[i-1] + '"'

                    #add quote around all the values which are not dictionaries so it is easier to reach a json format
                    if decompose_line[i+1] != '{':
                        if '"' == decompose_line[i+1][0]:
                            break
                        else:
                            if i + 1 == len(decompose_line) - 1:
                                if ',' == decompose_line[i+1][-1]:
                                    decompose_line[i+1] = '"' + decompose_line[i+1].replace(',','') + '",'
                                else:
                                    decompose_line[i+1] = '"' + decompose_line[i+1] + '"'
                            else:
                                if ',' == decompose_line[-1][-1]:
                                    decompose_line[i+1] = '"' + decompose_line[i+1]
                                    decompose_line[-1] =  decompose_line[-1].replace(',','') + '",'
                                    break
                                else:
                                    decompose_line[i+1] = '"' + decompose_line[i+1]
                                    decompose_line[-1] =  decompose_line[-1] + '"'
                                    break

        #recreate the all string from the decomposed lines which has been modified
        for e in decompose_line:
            if e == '':
                edited_line += ' '
            if e == ':=':
                edited_line += ' ' + e + ' '
            else:
                edited_line += e
        edited_message += edited_line + '\n'
    
    #modify the all string again to transform unappropriate inside dictionaries into list which are easier handled by json 
    edited_message = edited_message.split(' ')
    open = 0
    ind = []
    for i in range(len(edited_message)):

        #change open braces ('{') into open square bracelets ('[')
        if '{' in edited_message[i] and 'body' not in edited_message[i-1]:
            if edited_message[i].count('}') > 1:
                for y in range(edited_message[i].count('}')):
                    open += 1
            else:
                open += 1
                f = list(filter(('').__ne__, edited_message[i:]))
                if len(f) > 0:
                    if '{' in f[1]:
                        edited_message[i] = edited_message[i].replace('{', '[',-1)
                        ind.append(open)
                    if '"' == f[1][0] and f[2] != ':=':
                        edited_message[i] = edited_message[i].replace('{', '[',-1)
                        ind.append(open)

        #change closing braces ('}') into closing square bracelets (']')
        if '}' in edited_message[i]:
            if edited_message[i].count('}') > 1:
                for y in range(edited_message[i].count('}')):
                    if len(ind) != 0 and open == ind[-1]:
                        s = ""
                        dico_i_split = edited_message[i].split('}')
                        for l in range(len(dico_i_split) - 1):
                            if l == y:
                                s += dico_i_split[l] + ']'
                            else:
                                s += dico_i_split[l] + '}'
                        s += dico_i_split[-1]
                        edited_message[i] = s
                        ind.pop(-1)
                    open -= 1
            else:
                if len(ind) != 0 and open == ind[-1]:
                    edited_message[i] = edited_message[i].replace('}',']',1)
                    ind.pop(-1)
                open -= 1

    #recreate the json string from the decomposed one that we created to transform dictionaries into lists
    for i in range(len(edited_message)):
        if edited_message[i] == '':
            edited_message[i] = ' '
    
    json_string = ""
    for i in range(len(edited_message)):
        json_string += edited_message[i]
    json_string = json_string.replace('=', '')
    
    return json_string

## json_string_to_dict
- description: transform a json string into a json dictionary
- input: string
- output: dictionary

In [139]:
def json_string_to_dict(string):
    dict_json = json.loads(string)
    return dict_json

## message_to_dict
- description: transform the message content string extracted from the logs into python dictionary to exploit the data in an easier way
- input: string
- output: dictionary

In [140]:
def message_to_dict(message):
    return json_string_to_dict(log_format_to_json(message))

## parse_messages
- description: create a list of python dictionaries from a list of message contents exctracted from the logs 
- input: list of string
- output: list of dictionaries (parsed_message_list), list of string (failed_message_to_dict : the list of failed message that could not be turn into dictionaries (there are only few))

In [141]:
def parse_messages(message_list):
    parsed_message_list = []
    failed_message_to_dict = []
    for i,m in enumerate(message_list):
        try:
            parsed_message_list.append((message_to_dict(m),i))
        except:
            failed_message_to_dict.append(log_format_to_json(m))
    return parsed_message_list, failed_message_to_dict

parsed_message_list, failed_message_to_dict = parse_messages(df.message_content)

## get_method
- description: extract the method from a message json dictionary 
- input: json dictionary
- output: string

In [142]:
def get_method(message):
    method = ""
    if 'method' in str(message):
        if 'aspHttp' in message.keys():
            method = message['aspHttp'][0]['httpMessage']['method']
        elif 'request' in message.keys():
            method = message['request']['method']
        elif 'aspsSip' in message.keys():
            if 'aspRequest' in message['aspsSip'][0].keys():
                method = message['aspsSip'][0]['aspRequest']['request']['requestLine']['method']
            else:
                method = ""
        else:
            method = ""
    return method 

## message_method_list
- description: extract the list of the method present in the logs
- input: list of json dictionaries
- output: list of string

## get_description
- description: extract the description from a message  
- input: json dictionary
- output: string

In [143]:
def get_description(message):
    description = ""
    if 'description' in str(message):
        if 'internalMessage' in message.keys():
            description = message['internalMessage']['description']
        else:
            description = ""
    return description 

In [144]:
def date_format(date_time_str):
    return dt.datetime.strptime(date_time_str, '%Y/%b/%d %H:%M:%S.%f')

## final_dataframe
- description: create a dataframe of the messages and their caracteristics 
- input: list json dictionary
- output: pandas dataframe

In [145]:
def final_dataframe(parsed_message_list,df):
    dico = {'date': [], 'sender': [], 'receiver': [], 'message_type': [], 'method' : [], 'description': [], 'message': []}
    for v,i in parsed_message_list:
        dico['date'].append(date_format(df.date[i]))
        dico['sender'].append(df.message_sender[i])
        dico['receiver'].append(df.message_receiver[i])
        dico['message_type'].append(df.message_type[i].split('.')[1])
        dico['method'].append(get_method(v))
        dico['description'].append(get_description(v))
        dico['message'].append(v)
    df = pd.DataFrame(dico)
    return df

final_df = final_dataframe(parsed_message_list,df)
final_df.head(10)

Unnamed: 0,date,sender,receiver,message_type,method,description,message
0,2014-10-24 18:34:28.684126,849,mtc,internalPortMessageWithAspsSip,,COMPONENT_UP_AND_RUNNING,{'internalMessage': {'description': 'COMPONENT...
1,2014-10-24 18:34:28.695096,mtc,849,internalPortMessageWithAspsSip,,COMMAND_START_LISTENER,{'internalMessage': {'description': 'COMMAND_S...
2,2014-10-24 18:34:28.696199,849,mtc,internalPortMessageWithAspsSip,,COMPONENT_DONE_WITH_SUCCESS,{'internalMessage': {'description': 'COMPONENT...
3,2014-10-24 18:34:28.700272,850,mtc,internalPortMessageWithMsrpMessages,,COMPONENT_UP_AND_RUNNING,{'internalMessage': {'description': 'COMPONENT...
4,2014-10-24 18:34:28.711152,mtc,850,internalPortMessageWithMsrpMessages,,COMMAND_START_LISTENER,{'internalMessage': {'description': 'COMMAND_S...
5,2014-10-24 18:34:28.711886,850,mtc,internalPortMessageWithMsrpMessages,,COMPONENT_DONE_WITH_SUCCESS,{'internalMessage': {'description': 'COMPONENT...
6,2014-10-24 18:34:28.724993,851,system,Listen,,,"{'local_hostname': '142.133.150.169', 'portnum..."
7,2014-10-24 18:34:28.725357,851,mtc,internalPortMessageWithAspHttp,,COMPONENT_UP_AND_RUNNING,{'internalMessage': {'description': 'COMPONENT...
8,2014-10-24 18:34:28.729562,852,system,Listen,,,"{'local_hostname': '142.133.150.169', 'portnum..."
9,2014-10-24 18:34:28.729867,852,mtc,internalPortMessageWithAspHttp,,COMPONENT_UP_AND_RUNNING,{'internalMessage': {'description': 'COMPONENT...


In [146]:
def data_frame_to_excel(file_name, df):
    return df.to_excel(file_name)

data_frame_to_excel(SAVE_XLSX_PATH, final_df)

In [147]:
def dataframe_messages_by_method(method_list):
    method_df = pd.DataFrame()
    for method in method_list:
        method_df  = pd.concat([method_df, final_df[final_df.method == method]])
    return method_df

method_df = dataframe_messages_by_method(['POST', 'GET'])
method_df.head(10)
    

Unnamed: 0,date,sender,receiver,message_type,method,description,message
10,2014-10-24 18:34:28.743314,mtc,852,internalPortMessageWithAspHttp,POST,COMMAND_HTTP_CLIENT_SEND_GET_ANY_MESSAGE,{'internalMessage': {'description': 'COMMAND_H...
12,2014-10-24 18:34:28.745526,852,system,HTTPMessage,POST,,"{'request': {'client_id': '9', 'method': 'POST..."
14,2014-10-24 18:34:28.781806,mtc,852,internalPortMessageWithAspHttp,POST,COMMAND_HTTP_CLIENT_SEND_GET_ANY_MESSAGE,{'internalMessage': {'description': 'COMMAND_H...
16,2014-10-24 18:34:28.783853,852,system,HTTPMessage,POST,,"{'request': {'client_id': '10', 'method': 'POS..."
20,2014-10-24 18:34:28.815184,mtc,853,internalPortMessageWithAspHttp,POST,COMMAND_HTTP_CLIENT_SEND_GET_ANY_MESSAGE,{'internalMessage': {'description': 'COMMAND_H...
22,2014-10-24 18:34:28.816740,853,system,HTTPMessage,POST,,"{'request': {'client_id': '9', 'method': 'POST..."
36,2014-10-24 18:34:29.027217,mtc,854,internalPortMessageWithAspHttp,POST,COMMAND_HTTP_CLIENT_SEND_GET_ANY_MESSAGE,{'internalMessage': {'description': 'COMMAND_H...
38,2014-10-24 18:34:29.029219,854,system,HTTPMessage,POST,,"{'request': {'client_id': '9', 'method': 'POST..."
50,2014-10-24 18:34:29.249645,mtc,853,internalPortMessageWithAspHttp,POST,COMMAND_HTTP_CLIENT_SEND_GET_ANY_MESSAGE,{'internalMessage': {'description': 'COMMAND_H...
52,2014-10-24 18:34:29.251118,853,system,HTTPMessage,POST,,"{'request': {'client_id': '10', 'method': 'POS..."


In [148]:
def dataframe_messages_by_sender(sender_list):
    sender_df = pd.DataFrame()
    for sender in sender_list:
        sender_df  = pd.concat([sender_df, final_df[final_df.sender == sender]])
    return sender_df

sender_df = dataframe_messages_by_sender(['855', 'mtc'])
sender_df.head(10)

Unnamed: 0,date,sender,receiver,message_type,method,description,message
66,2014-10-24 18:34:29.617120,855,system,Listen,,,"{'local_hostname': '142.133.150.169', 'portnum..."
67,2014-10-24 18:34:29.617446,855,mtc,internalPortMessageWithAspHttp,,COMPONENT_UP_AND_RUNNING,{'internalMessage': {'description': 'COMPONENT...
69,2014-10-24 18:34:29.630479,855,system,Connect,,,"{'hostname': '142.133.142.3', 'portnumber': '8..."
70,2014-10-24 18:34:29.631387,855,system,HTTPMessage,GET,,"{'request': {'client_id': '9', 'method': 'GET'..."
71,2014-10-24 18:34:29.654233,855,mtc,internalPortMessageWithAspHttp,,COMPONENT_DONE_WITH_SUCCESS,{'internalMessage': {'description': 'COMPONENT...
130,2014-10-24 18:34:32.514680,855,system,Connect,,,"{'hostname': '142.133.142.3', 'portnumber': '8..."
131,2014-10-24 18:34:32.515718,855,system,HTTPMessage,GET,,"{'request': {'client_id': '10', 'method': 'GET..."
132,2014-10-24 18:34:32.529661,855,mtc,internalPortMessageWithAspHttp,,COMPONENT_DONE_WITH_SUCCESS,{'internalMessage': {'description': 'COMPONENT...
149,2014-10-24 18:34:33.651476,855,system,Connect,,,"{'hostname': '142.133.142.3', 'portnumber': '8..."
150,2014-10-24 18:34:33.652646,855,system,HTTPMessage,GET,,"{'request': {'client_id': '11', 'method': 'GET..."


In [149]:
def dataframe_messages_by_date(date):
    date_df  = final_df[final_df.date == date]
    return date_df

date = date_format('2014/Oct/24 18:34:28.700272')
date_df = dataframe_messages_by_date(date)
date_df.head()


Unnamed: 0,date,sender,receiver,message_type,method,description,message
3,2014-10-24 18:34:28.700272,850,mtc,internalPortMessageWithMsrpMessages,,COMPONENT_UP_AND_RUNNING,{'internalMessage': {'description': 'COMPONENT...


In [150]:
def dataframe_messages_by_message_type(message_type_list):
    message_type_df = pd.DataFrame()
    for message_type in message_type_list:
        message_type_df  = pd.concat([message_type_df, final_df[final_df.message_type == message_type]])
    return message_type_df

message_type_df = dataframe_messages_by_message_type(['HTTPMessage'])
message_type_df.head()

Unnamed: 0,date,sender,receiver,message_type,method,description,message
12,2014-10-24 18:34:28.745526,852,system,HTTPMessage,POST,,"{'request': {'client_id': '9', 'method': 'POST..."
16,2014-10-24 18:34:28.783853,852,system,HTTPMessage,POST,,"{'request': {'client_id': '10', 'method': 'POS..."
22,2014-10-24 18:34:28.816740,853,system,HTTPMessage,POST,,"{'request': {'client_id': '9', 'method': 'POST..."
38,2014-10-24 18:34:29.029219,854,system,HTTPMessage,POST,,"{'request': {'client_id': '9', 'method': 'POST..."
52,2014-10-24 18:34:29.251118,853,system,HTTPMessage,POST,,"{'request': {'client_id': '10', 'method': 'POS..."
