## SCOM Calculation from Logs

In [1]:
import json

#file = open("test_data/scenario1.json")
#file = open("test_data/scenario2.json")
#file = open("test_data/scenario3.json")
file = open("test_data/insert.json")
result = json.load(file)
file.close()

result

{'data': [{'traceID': '81b542bfc159ae0337abfc3c2d64d0b7',
   'spans': [{'traceID': '81b542bfc159ae0337abfc3c2d64d0b7',
     'spanID': 'c7649170f4eba7b5',
     'operationName': '/employees/insert',
     'references': [],
     'startTime': 1716794714966901,
     'duration': 66096,
     'tags': [{'key': 'error', 'type': 'bool', 'value': True},
      {'key': 'http.flavor', 'type': 'string', 'value': '1.1'},
      {'key': 'http.host', 'type': 'string', 'value': 'localhost:5001'},
      {'key': 'http.method', 'type': 'string', 'value': 'POST'},
      {'key': 'http.route', 'type': 'string', 'value': '/employees/insert'},
      {'key': 'http.scheme', 'type': 'string', 'value': 'http'},
      {'key': 'http.server_name', 'type': 'string', 'value': '0.0.0.0'},
      {'key': 'http.status_code', 'type': 'int64', 'value': 500},
      {'key': 'http.target', 'type': 'string', 'value': '/employees/insert'},
      {'key': 'http.user_agent',
       'type': 'string',
       'value': 'PostmanRuntime/7.37.3

In [2]:
import re 

def extract_table_name(sql):
    pattern = r'FROM\s+([a-zA-Z_][a-zA-Z0-9_]*)|JOIN\s+([a-zA-Z_][a-zA-Z0-9_]*)|INSERT\s+INTO\s+([a-zA-Z_][a-zA-Z0-9_]*)|UPDATE\s+([a-zA-Z_][a-zA-Z0-9_]*)|DELETE\s+FROM\s+([a-zA-Z_][a-zA-Z0-9_]*)'
    
    match = re.search(pattern, sql, re.IGNORECASE)
    
    if match:
        for group in match.groups():
            if group:
                return group
    return None

In [3]:
class Log:
    def __init__(self, span_id, spans, tags):
        self.span_id = span_id
        self.spans = spans
        self.tags = tags

    def __repr__(self):
        return f"Log(span_id={self.span_id}, spans={self.spans}, tags={self.tags})"

    def to_json(self):
        return json.dumps({
            'spanId': self.span_id,
            'spans': self.spans,
            'tags': self.tags
        }, indent=2)
    
    def get_operation_name(self):
        result = []
        for s in self.spans:
            result.append(s["operationName"])

        if len(result) > 0:
            input = result[0].split("/")
            return input[1]
        
        return None
    
    def get_db_statement(self):
        result = []
        for s in self.tags:
            if s["key"] == "db.statement":
                result.append(s["value"])

        if len(result) > 0:
            return result
        
        return None
    
    def get_table_name(self):
        statement = self.get_db_statement()
        if statement is not None:
            return extract_table_name(statement[0])

In [5]:
logs = []
for data in result["data"]:
    for log in data["spans"]:
        span_id = log['spanID']
        spans = []
        for r in log['references']: 
            spans.append(r["span"])

        tags = log['tags']
        span_obj = Log(span_id=span_id, spans=spans, tags=tags)
        logs.append(span_obj)

# Print all Span objects
for log in logs:
    print(log.span_id)
    print(log.get_operation_name())
    print(log.get_db_statement())
    print(log.get_table_name())

c7649170f4eba7b5
None
None
None
a4eea99d6c296179
employees
['INSERT INTO employees (name, position, start_date) VALUES (%s, %s, %s)']
employees
34bc2bae63d2ca1e
employees
['SELECT * FROM customers']
customers
c488e474bb7cfb21
None
None
None
0c157f54c708ca44
employees
['INSERT INTO employees (name, position, start_date) VALUES (%s, %s, %s)']
employees
ce46a1e08300c29e
None
None
None


In [7]:
grouped_logs= {}

for log in logs:
    operationName = log.get_operation_name()

    if operationName == None:
        continue

    if operationName not in grouped_logs:
        grouped_logs[operationName] = []

    table_name = log.get_table_name()

    if table_name in grouped_logs[operationName]: 
        continue
    
    grouped_logs[operationName].append(log.get_table_name())

for operation, group in grouped_logs.items():
    print(f"Operation: {operation}")
    for log in group:
        print(f"  {log}")

Operation: employees
  employees
  customers


In [10]:
def calculateConnectionIntensity(i, j):
     common_attributes = set(i).intersection(j)
     if len(common_attributes) == 0: return 0

     return len(common_attributes) / (min(len(set(i)), len(set(j))))

def scom(apis, number_of_tables):
    print(f"Tabellenanzahl: {number_of_tables}")

    n_of_apis = len(apis)
    if n_of_apis <= 1: return "Too few endpoints"

    total_weighted_connections = 0

    processed_pairs = set()  # Verarbeitete Paare speichern

    for i, api1 in enumerate(apis):
        for api2 in list(apis.keys())[i + 1:]:
            pair_key = tuple(sorted((api1, api2)))
            
            if pair_key in processed_pairs:
                continue  # Überspringen, wenn Paar schon verarbeitet wurde
            
            connection_intensity = calculateConnectionIntensity(apis[api1], apis[api2])
            n_involved_tables = len(set(apis[api1]).union(set(apis[api2])))
            weight = n_involved_tables / number_of_tables
            weighted_connection = connection_intensity * weight
            total_weighted_connections += weighted_connection
            processed_pairs.add(pair_key)  # Paar als verarbeitet markieren

    return total_weighted_connections / (n_of_apis*(n_of_apis-1) / 2)

In [11]:
print(f"SCOM for {grouped_logs}: {scom(grouped_logs, 4)}")

Tabellenanzahl: 4
SCOM for {'employees': ['employees', 'customers']}: Too few endpoints


In [38]:
# Insert, update, delete, ...? 

In [39]:
# Get number of involved endpoints
#n_apis = sum(1 for operation, group in grouped_logs.items() if operation is not None)
#print(f"Number of involved endpoints: {n_apis}")