In [None]:
import pandas as pd
import datetime as dt
import re
import os
import json

In [None]:
"""
CentraStage Agent Log Parser
----------------------------

Parses and analyzes log files from CentraStage/Datto RMM agents. Focuses on extracting DNS activity
and error-related messages to support monitoring, forensic investigations, or anomaly detection.

Main Capabilities:
------------------
- Recursively searches specified log directories for `.log` files
- Parses structured log lines using pipe-delimited format
- Extracts and expands JSON-like content from log messages
- Filters for DNS calls to known Concord/CentraStage endpoints
- Extracts errors and exception messages from all logs
- Outputs:
    - Parsed DNS resolution entries (`df_dns_info`)
    - Parsed error entries (`df_errors_info`)
    - Value counts for log activity rates by message type

Use Cases:
----------
- Analyzing agent communication with RMM control servers
- Diagnosing error behavior across logs
- Calculating log traffic volume over time
- Investigating potential downtime, failures, or network changes

Limitations:
------------
- Assumes a consistent pipe-delimited log format
- DNS and error filters are hardcoded (could be extended)
- Some methods lack defensive error checks on malformed logs

Author: Gabe McWilliams
"""


In [None]:
class ParseLogs:
    def __init__(self,
                 logs_dirs: list,
                 dns_destinations_list=None
                 ) -> None:

        # establish logs dir
        if dns_destinations_list is None:
            dns_destinations_list = [
                'concord-monitoring.centrastage.net',
                'update-concord.centrastage.net',
                'concord-agent-comms.centrastage.net',
                'concord-agent.centrastage.net',
                'concord-agent-notifications.centrastage.net',
                '01concordcc.centrastage.net',
                'concord-frontend-api.centrastage.net',
                'concord-realtime.centrastage.net',
                'concord.centrastage.net',
                'concordcc.centrastage.net',
                'concordws.centrastage.net',
                'update-concord-proxy.centrastage.net'
            ]
        self.__logs_dirs = logs_dirs

        # establish dns destinations to parse for
        self.__dns_destinations_list = dns_destinations_list

        # fetch files ending in 'log' and append to list
        self.__log_files_list = self.__logs_list__()

        # return dataframes from log parsing
        self.__logs_dns_info = self.__parse_logs_dns_info__()

        self.__logs_errors_info = self.__parse_logs_errors_info__()

    @staticmethod
    def __split_log_line__(line: str) -> dict:
        result = line.split('|')

        return {
            'agent_ver': result[0],
            'timestamp': result[1],
            'message_lvl': result[2],
            'action': result[3],
            'module_info': result[4]
        }

    @property
    def logs_dirs(self) -> list:
        return self.__log_dirs

    @property
    def dns_destinations(self) -> list:
        return self.__dns_destinations_list

    @property
    def log_files(self) -> list:
        return self.__log_files

    def __logs_list__(self) -> list:
        log_files_list = []

        for log_loc in self.__logs_dirs:
            for root, dirs, files in os.walk(log_loc):
                for file in files:
                    if 'log' in file:
                        log_files_list.append(os.path.join(root, file))

        return log_files_list

    @property
    def df_dns_info(self):
        df = pd.DataFrame(self.__logs_dns_info)

        df[['agent_ver', 'timestamp', 'message_lvl', 'action', 'module_info']] = df.apply(
            lambda x: self.__split_log_line__(x['line']), axis=1, result_type='expand')
        for index, row in df[:].iterrows():

            pattern = r'\"([^"]+)\":\s*\"([^"]+)\"'
            matches = re.findall(pattern, row['module_info'])

            for match in matches:
                df.loc[index, match[0]] = match[1]

        return df

    def __parse_logs_dns_info__(self) -> dict:
        dns_info_list = []
        log_lines_parsed = 0

        for file in self.__log_files_list[:]:
            with open(file, 'r') as f:
                logs_info_dict = {}
                logs = f.readlines()
                for index, line in enumerate(logs):
                    log_lines_parsed = log_lines_parsed + 1

                    for dns_dest in dns_destinations_list:
                        if dns_dest in line:
                            # print(f'Found {dns_dest} on line: {line}')
                            logs_info_dict['file'] = file
                            logs_info_dict['index'] = index
                            logs_info_dict['line'] = line
                            dns_info_list.append(logs_info_dict)

        print(f'Number of log files parsed: {len(log_files_list)}')
        print(f'Number of log lines parse: {log_lines_parsed}')

        return dns_info_list

    @property
    def df_errors_info(self) -> pd.DataFrame:
        df[['agent_ver', 'timestamp', 'message_lvl', 'action', 'module_info']] = df.apply(
            lambda x: self.__split_log_line__(x['line']), axis=1, result_type='expand')
        for index, row in df[:].iterrows():

            pattern = r'\"([^"]+)\":\s*\"([^"]+)\"'
            matches = re.findall(pattern, row['module_info'])

            for match in matches:
                df.loc[index, match[0]] = match[1]

        return df

    def __parse_logs_errors_info__(self) -> dict:

        errors_info_list = []
        log_lines_parsed = 0

        for file in self.__log_files_list[:]:
            with open(file, 'r') as f:
                logs_info_dict = {}
                logs = f.readlines()
                for index, line in enumerate(logs):
                    log_lines_parsed = log_lines_parsed + 1

                    if ('ERROR' in line) | ('exception' in line):
                        logs_info_dict['file'] = file
                        logs_info_dict['index'] = index
                        logs_info_dict['line'] = line
                        errors_info_list.append(logs_info_dict)

        return errors_info_list

In [None]:
# logs_dirs = [
#     "C:\ProgramData\CentraStage\AEMAgent\DataLog".replace("\\", "/"),
#     "C:\Program Files (x86)\CentraStage".replace("\\", "/")
# ]

logs_dirs = [
    "D:/example"
]

In [None]:
parser = ParseLogs(logs_dirs=logs_dirs)

In [None]:
df_dns_info = parser.df_dns_info

In [None]:
df_dns_info

In [None]:
df_errors_log_info = parser.df_errors_info

In [None]:
df_errors_log_info.Type.value_counts()

In [None]:
max = pd.to_datetime(df_file_log_info["timestamp"].max())
min = pd.to_datetime(df_file_log_info["timestamp"].min())

print(f"The first time entry was: {min}\nThe last time entry as: {max}")

In [None]:
hours_duration = round((pd.Timedelta(max - min).total_seconds()) / 3600)
print(f"The dataset time frame was {hours_duration} hours")
minutes_duration = round((pd.Timedelta(max - min).total_seconds()) / 60)
print(f"The dataset time frame was {minutes_duration} minutes")

In [None]:
df_centrastage_value_counts = df_file_log_info["module_info"].value_counts().reset_index()

In [None]:
df_centrastage_value_counts['counts_per_hour'] = df_centrastage_value_counts['count'].apply(
    lambda x: x / hours_duration)
df_centrastage_value_counts['counts_per_minute'] = df_centrastage_value_counts['count'].apply(
    lambda x: x / minutes_duration)

In [None]:
df_centrastage_value_counts.sum()

In [None]:
df_centrastage_value_counts