# Openshift Log Analysis

This is a notebook to analyze the logs of an OpenShift cluster. The notebook queries the logs of the cluster and checks simple things like the number of logs, the number of services, the number of json messages, json schema frequency, the number of replicas, common patterns in json/non-json messages and etc.

In [None]:
from utils import *

query = "{cluster_log_level=\"app-logs\", openshift_cluster=\"moc/smaug\"} |= \"error\""
start = "1652736197"
end = "1652746829"
limit = "5000"

x_scope_org_id = "cluster-app-logs"
token = ""

data = fetch_loki_logs(token, x_scope_org_id, query, start, end, limit)
print(f"{len(data)} entries found")

for stream in data:
    namespace = stream['stream']['k8s_namespace_name'] if 'k8s_namespace_name' in stream['stream'].keys() else 'unknown'
    log_level = stream['stream']['cluster_log_level']
    cluster = stream['stream']['openshift_cluster']
    lnx = len(stream['values'])
    print(f"Log level: {log_level}\tCluster: {cluster}\tEntries: {lnx}\tNamespace: {namespace}")

In [None]:
json_schema_frequency = {}
non_json_info = {}
json_logs = {}
levels = []

json_count = 0
non_json_count = 0

for stream in data:
    namespace = stream['stream']['k8s_namespace_name'] if 'k8s_namespace_name' in stream['stream'].keys() else 'unknown'
    log_level = stream['stream']['cluster_log_level']
    cluster = stream['stream']['openshift_cluster']
    
    for val_list in stream['values']:
        value = json.loads(val_list[1])
        message = value['message']

        try:
            json_message = json.loads(message)
            keys = json_message.keys()
            for key in keys:
                if key not in json_schema_frequency.keys():
                    json_schema_frequency[key] = {}
                    json_schema_frequency[key]['type'] = type(json.loads(message)[key]).__name__
                    json_schema_frequency[key]['count'] = 1
                    json_schema_frequency[key]['example'] = json_message[key]
                else:
                    json_schema_frequency[key]['count'] += 1
            json_count += 1
            
            if namespace not in json_logs.keys():
                json_logs[namespace] = [json_message]
            else:
                json_logs[namespace].append(json_message)

            if 'level' in json_message.keys():
                levels.append(json_message['level'])
        except:
            if namespace not in non_json_info.keys():
                non_json_info[namespace] = [message]
            else:
                non_json_info[namespace].append(message)
            non_json_count += 1

print(f"{non_json_count} non-json messages, {json_count} json keys")

In [None]:
for key in json_schema_frequency.keys():
    print(f"{key},{json_schema_frequency[key]['type']},{json_schema_frequency[key]['count']},{json_schema_frequency[key]['example']}")

In [None]:
for key in json_logs:
    print(f"{key}")
    for log in json_logs[key][:10]:
        print(f"{log}\n\n")

In [None]:
import random

mx = list(sorted([len(non_json_info[key]) for key in non_json_info]))[-10:]


for key in non_json_info:
    if len(non_json_info[key]) in mx:
        print(key)
        random.shuffle(non_json_info[key])
        print('\n'.join(non_json_info[key][:10]))