In [1]:
'''Python 測驗 task 3 參考解答 (基礎版)
'''

import json

def analyze_gcp_logs(filename: str, service: str = None) -> None:
    '''analyze the log file and aggregate the servities of services
    filename - log file name in JSON
    service  - specific service name to be filtered
    return   - None

    thoughts - 1. 設計 log 資料以 dict 儲存，才能達到動態增加 service_name 與 severity 的彈性
                  ex: services = {'pauca': {'INFO': 1}, 'wt': {'NOTICE': 1}, ...}
               2. 為求效率，for 迴圈掃描 log 只能進行一次，同時只處理參數指定的 service_name,
                  若未指定 service_name 則所有 service_name 皆保留。
               3. 輸出時才依據各 service_name 對應到的 serverity 名稱與數量計算須顯示的最大長度，
                  順道依據名稱排序，達成輸出可讀性。
               4. 防呆並強化 function，避免輸入錯誤導致 function 執行報錯，包含開檔失敗的錯誤攔截
                  以及 log 格式缺失過濾。
                  a. 開檔錯誤以 try-except 攔截，被攔截即結束整個 function 但加入提示輸出
                  b. log 格式以正面濾除方式處理，故須善用 dict.get(, {}) 而非 dict subscription，
                     ex: services['wt']。格式缺失的 log 即忽略。
    '''
    try:
        with open(filename, encoding='utf-8') as f:
            logs = json.load(f)
    except:
        print('bad log file')
        return
    
    services = {}  # to keep severity info of services, ex: {'pauca': {'INFO': 1}, 'wt': {'NOTICE': 1}, ...}
    
    # count severities of services
    for log in logs:    
        service_name = log.get('resource', {}).get('labels', {}).get('service_name')  # get default {} may ease the error handling
        severity = log.get('severity')
    
        # fill in service severity info if both service_name and severity exist, and service_name matches the assigned one
        if service_name and severity and (not service or service == service_name):
            if service_name in services:
                services[service_name][severity] = services[service_name].get(severity, 0) + 1
            else:
                services[service_name] = {severity: 1}
        
    # output
    if services:
        severities = sorted({severity for service_name in services for severity in services[service_name]})  # sorted severities
        max_service_len = max(len(service_name) for service_name in services)  # max service_name len
        max_count_len = max(len(str(count)) for service_name in services for count in services[service_name].values())  # max count len
        
        for service_name in services:
            values = [f'{severity}: {services[service_name].get(severity, 0):{max_count_len}}' for severity in severities]
            print(f'{service_name:{max_service_len}} {'/ '.join(values)}')
    else:
        print('no log be filtered')

In [2]:
help(analyze_gcp_logs)

Help on function analyze_gcp_logs in module __main__:

analyze_gcp_logs(filename: str, service: str = None) -> None
    analyze the log file and aggregate the servities of services
    filename - log file name in JSON
    service  - specific service name to be filtered
    return   - None

    thoughts - 1. 設計 log 資料以 dict 儲存，才能達到動態增加 service_name 與 severity 的彈性
                  ex: services = {'pauca': {'INFO': 1}, 'wt': {'NOTICE': 1}, ...}
               2. 為求效率，for 迴圈掃描 log 只能進行一次，同時只處理參數指定的 service_name,
                  若未指定 service_name 則所有 service_name 皆保留。
               3. 輸出時才依據各 service_name 對應到的 serverity 名稱與數量計算須顯示的最大長度，
                  順道依據名稱排序，達成輸出可讀性。
               4. 防呆並強化 function，避免輸入錯誤導致 function 執行報錯，包含開檔失敗的錯誤攔截
                  以及 log 格式缺失過濾。
                  a. 開檔錯誤以 try-except 攔截，被攔截即結束整個 function 但加入提示輸出
                  b. log 格式以正面濾除方式處理，故須善用 dict.get(, {}) 而非 dict subscription，
                     ex: services['wt']。格式缺失的 log 即忽略。



In [3]:
analyze_gcp_logs('serviceslogs.json')



In [4]:
analyze_gcp_logs('serviceslogs.json', 'pauca')



In [5]:
analyze_gcp_logs('aaa.json')

bad log file


In [6]:
analyze_gcp_logs('serviceslogs.json', 'test')

no log be filtered


In [7]:
'''Python 測驗 task 3 參考解答 (進階版)
基於基礎版微調為允許過濾用的 service 可以為多組
'''

import json

def analyze_gcp_logs_adv(filename: str, *service: str) -> None:
    '''analyze the log file and aggregate the servities of services
    filename - log file name in JSON
    service  - specific (multiple) service names to be filtered
    return   - None

    thoughts - 1. 設計 log 資料以 dict 儲存，才能達到動態增加 service_name 與 severity 的彈性
                  ex: services = {'pauca': {'INFO': 1}, 'wt': {'NOTICE': 1}, ...}
               2. 為求效率，for 迴圈掃描 log 只能進行一次，同時只處理參數指定的 service_name,
                  若未指定 service_name 則所有 service_name 皆保留。
               3. 輸出時才依據各 service_name 對應到的 serverity 名稱與數量計算須顯示的最大長度，
                  順道依據名稱排序，達成輸出可讀性。
               4. 防呆並強化 function，避免輸入錯誤導致 function 執行報錯，包含開檔失敗的錯誤攔截
                  以及 log 格式缺失過濾。
                  a. 開檔錯誤以 try-except 攔截，被攔截即結束整個 function 但加入提示輸出
                  b. log 格式以正面濾除方式處理，故須善用 dict.get(, {}) 而非 dict subscription，
                     ex: services['wt']。格式缺失的 log 即忽略。
    '''
    try:
        with open(filename, encoding='utf-8') as f:
            logs = json.load(f)
    except:
        print('bad log file')
        return
    
    services = {}  # to keep severity info of services, ex: {'pauca': {'INFO': 1}, 'wt': {'NOTICE': 1}, ...}
    
    # count severity of services
    for log in logs:    
        service_name = log.get('resource', {}).get('labels', {}).get('service_name')  # get default {} may ease the error handling
        severity = log.get('severity')
    
        # fill in service severity info if both service_name and severity exist, and service_name matches the assigned one
        if service_name and severity and (not service or service_name in service):
            if service_name in services:
                services[service_name][severity] = services[service_name].get(severity, 0) + 1
            else:
                services[service_name] = {severity: 1}
        
    # output
    if services:
        severities = sorted({severity for service_name in services for severity in services[service_name]})  # sorted severities
        max_service_len = max(len(service_name) for service_name in services)  # max service_name len
        max_count_len = max(len(str(count)) for service_name in services for count in services[service_name].values())  # max count len
        
        for service_name in services:
            values = [f'{severity}: {services[service_name].get(severity, 0):{max_count_len}}' for severity in severities]
            print(f'{service_name:{max_service_len}} {'/ '.join(values)}')
    else:
        print('no log be filtered')

In [8]:
analyze_gcp_logs_adv('serviceslogs.json')



In [9]:
analyze_gcp_logs_adv('serviceslogs.json', 'pauca', 'wt')

