## SCOM Calculation from Logs

In [1]:
import json

file = open("test_data/teastore.json")
result = json.load(file)
file.close()

result

{'data': [{'traceID': '3ee079f95d3dae629243ab7e6ca60419',
   'spans': [{'traceID': '3ee079f95d3dae629243ab7e6ca60419',
     'spanID': '717afc7e0c4f850e',
     'operationName': '/tools.descartes.teastore.registry/rest/*',
     'references': [{'refType': 'CHILD_OF',
       'traceID': '3ee079f95d3dae629243ab7e6ca60419',
       'spanID': '3b0c3f6e7ac4096e',
       'span': {'traceID': '3ee079f95d3dae629243ab7e6ca60419',
        'spanID': '3b0c3f6e7ac4096e',
        'operationName': 'HTTP GET',
        'references': [],
        'startTime': 1717164379570504,
        'duration': 4342,
        'tags': [{'key': 'http.flavor', 'type': 'string', 'value': '1.1'},
         {'key': 'http.method', 'type': 'string', 'value': 'GET'},
         {'key': 'http.status_code', 'type': 'int64', 'value': 200},
         {'key': 'http.url',
          'type': 'string',
          'value': 'http://registry:8080/tools.descartes.teastore.registry/rest/services/tools.descartes.teastore.persistence/'},
         {'key': 

### Extract table names

In [2]:
import re

table_name_pattern = re.compile(
    r"""
    (?i)   # Case-insensitive matching
    \bFROM\s+([`'"]?[a-zA-Z_][\w$]*[`'"]?)|   
    \bJOIN\s+([`'"]?[a-zA-Z_][\w$]*[`'"]?)|   
    \bINTO\s+([`'"]?[a-zA-Z_][\w$]*[`'"]?)|  
    \bUPDATE\s+([`'"]?[a-zA-Z_][\w$]*[`'"]?)| 
    \bDELETE\s+FROM\s+([`'"]?[a-zA-Z_][\w$]*[`'"]?)  
    """,
    re.VERBOSE
)

def extract_table_names(sql):
    matches = table_name_pattern.findall(sql)
    #matches = [
    #('employees', '', '', '', '', '', ''),
    #('', 'customers', '', '', '', '', '')]
    # filters out empty matches ('') and flattens result to normal list
    return [match for sublist in matches for match in sublist if match]

sql_statements = [
    "SELECT name, email FROM employees;",
    "SELECT * FROM employees e JOIN customers c ON e.id = c.employee_id;",
    "INSERT INTO orders (customer_id, product_id, order_date) VALUES (1, 2, '2024-05-25');",
    "UPDATE employees SET position = 'Manager' WHERE id = 1;",
    "DELETE FROM customers WHERE id = 1;"
]

for sql in sql_statements:
    print(f"SQL: {sql}")
    print(f"Tables: {extract_table_names(sql)}\n")


SQL: SELECT name, email FROM employees;
Tables: ['employees']

SQL: SELECT * FROM employees e JOIN customers c ON e.id = c.employee_id;
Tables: ['employees', 'customers']

SQL: INSERT INTO orders (customer_id, product_id, order_date) VALUES (1, 2, '2024-05-25');
Tables: ['orders']

SQL: UPDATE employees SET position = 'Manager' WHERE id = 1;
Tables: ['employees']

SQL: DELETE FROM customers WHERE id = 1;
Tables: ['customers']



### Parse JSON Input

In [92]:
class Log:
    def __init__(self, span_id, reference_tags, tags):
        self.span_id = span_id
        self.reference_tags = reference_tags
        self.tags = tags

    def __repr__(self):
        return f"Log(span_id={self.span_id}, reference_tags={self.reference_tags}, tags={self.tags})"

    def to_json(self):
        return json.dumps({
            'spanId': self.span_id,
            'reference_tags': self.reference_tags,
            'tags': self.tags
        }, indent=2)
    
    def get_endpoint_name(self):
        result = None

        for tag in self.reference_tags:
            if tag["key"] == "http.target":     
                result = tag["value"]
                break
        
        return result
    
    def get_db_statement(self):
        result = []
        for s in self.tags:
            if s["key"] == "db.statement":
                result.append(s["value"])

        if len(result) > 0:
            return result
        
        return None
    
    def get_table_names(self):
        statement = self.get_db_statement()
        
        if statement is not None:
            return extract_table_names(statement[0])
        
        return None

In [93]:
logs = []
for data in result["data"]:
    for log in data["spans"]:
        span_id = log['spanID']
        tags = log['tags']
        for reference in log["references"]:
            if "span" in reference: 
                span_obj = Log(span_id=span_id, reference_tags=reference["span"]["tags"], tags=tags)
                logs.append(span_obj)

# Print all Span objects
for log in logs:
    if log.get_table_names() != None:
        print(log.get_endpoint_name())
        print(log.get_table_names())


/tools.descartes.teastore.persistence/rest/categories
['PERSISTENCECATEGORY']
/tools.descartes.teastore.persistence/rest/orders
['PERSISTENCEORDER']
/tools.descartes.teastore.persistence/rest/orderitems
['PERSISTENCEORDERITEM']
/tools.descartes.teastore.persistence/rest/orderitems
['PERSISTENCEORDERITEM']
/tools.descartes.teastore.persistence/rest/categories
['PERSISTENCECATEGORY']


In [95]:
grouped_logs= {}

for log in logs:
    endpoint_name = log.get_endpoint_name()

    if endpoint_name == None:
        continue

    if endpoint_name not in grouped_logs:
        grouped_logs[endpoint_name] = []

    table_names = log.get_table_names()

    if table_names is not None:
        for name in table_names: 
            if name in grouped_logs[endpoint_name]:
                continue
            else: 
                grouped_logs[endpoint_name].append(name)

for operation, group in grouped_logs.items():
    print(f"Operation: {operation}")
    for log in group:
        print(f"  {log}")

print(grouped_logs)

Operation: /tools.descartes.teastore.webui/cartAction
Operation: /tools.descartes.teastore.persistence/rest/categories
  PERSISTENCECATEGORY
Operation: /tools.descartes.teastore.persistence/rest/orders
  PERSISTENCEORDER
Operation: /tools.descartes.teastore.persistence/rest/orderitems
  PERSISTENCEORDERITEM
{'/tools.descartes.teastore.webui/cartAction': [], '/tools.descartes.teastore.persistence/rest/categories': ['PERSISTENCECATEGORY'], '/tools.descartes.teastore.persistence/rest/orders': ['PERSISTENCEORDER'], '/tools.descartes.teastore.persistence/rest/orderitems': ['PERSISTENCEORDERITEM']}


### Calculate SCOM

In [97]:
def calculateConnectionIntensity(i, j):
     common_attributes = set(i).intersection(j)
     if len(common_attributes) == 0: return 0

     return len(common_attributes) / (min(len(set(i)), len(set(j))))

def scom(apis):
    n_of_apis = len(apis)
    if n_of_apis <= 1: return "Too few endpoints"

    total_weighted_connections = 0

    processed_pairs = set()  # Verarbeitete Paare speichern

    for i, api1 in enumerate(apis):
        for api2 in list(apis.keys())[i + 1:]:
            pair_key = tuple(sorted((api1, api2)))
            
            if pair_key in processed_pairs:
                continue  # Überspringen, wenn Paar schon verarbeitet wurde
            
            connection_intensity = calculateConnectionIntensity(apis[api1], apis[api2])
            n_involved_tables = len(set(apis[api1]).union(set(apis[api2])))
            #weight = n_involved_tables / number_of_tables
            #weighted_connection = connection_intensity * weight
            weighted_connection = connection_intensity * n_involved_tables
            total_weighted_connections += weighted_connection
            processed_pairs.add(pair_key)  # Paar als verarbeitet markieren

    return total_weighted_connections / (n_of_apis*(n_of_apis-1) / 2)

In [98]:
print(f"SCOM for {grouped_logs}: {scom(grouped_logs)}") 

SCOM for {'/tools.descartes.teastore.webui/cartAction': [], '/tools.descartes.teastore.persistence/rest/categories': ['PERSISTENCECATEGORY'], '/tools.descartes.teastore.persistence/rest/orders': ['PERSISTENCEORDER'], '/tools.descartes.teastore.persistence/rest/orderitems': ['PERSISTENCEORDERITEM']}: 0.0
