In [1]:
import gzip
import json
import datetime
import requests

### 日志内容概览

In [2]:
lines = gzip.open('DevOps_interview_data_set.gz', 'rb').read().decode('ascii').replace('\n\t', '').splitlines()

In [3]:
len(lines)

2038

In [4]:
lines[0]

'May 13 00:01:58 BBAOMACBOOKAIR2 com.apple.xpc.launchd[1] (com.apple.mdworker.bundles[12513]): Could not find uid associated with service: 0: Undefined error: 0 501'

In [5]:
lines[len(lines)-1:]

['May 13 23:58:26 BBAOMACBOOKAIR2 com.apple.xpc.launchd[1] (com.apple.mdworker.bundles[56381]): Service exited with abnormal code: 78']

### 格式化日志

In [6]:
class Log:
    pass

logs = []
for line in lines:
    # May 13 00:22:49 --- last message repeated 1 time ---
    if line.find('last message repeated 1 time') > 0:
        continue
    
    # May 13 00:19:59 BBAOMACBOOKAIR2 com.apple.xpc.launchd[1] (com.apple.mdworker.bundles[12556]): Service exited with abnormal code: 78
    log = Log()
    
    # May 13 00:19:59
    log.time = line[0:15]
    log.hour = datetime.datetime.strptime(log.time, '%b %d %H:%M:%S').hour
    
    # BBAOMACBOOKAIR2 com.apple.xpc.launchd[1] (com.apple.mdworker.bundles[12556]): Service exited with abnormal code: 78
    logWithoutTime = line[16:]
    
    index = logWithoutTime.find(':')
    log.description = logWithoutTime[index+1:].strip(' ')
    
    # BBAOMACBOOKAIR2 com.apple.xpc.launchd[1] (com.apple.mdworker.bundles[12556])
    logWithoutTimeAndDesc = logWithoutTime[0:index]
    
    deviceIndex = logWithoutTimeAndDesc.find(' ')
    log.deviceName = logWithoutTimeAndDesc[0:deviceIndex]
    
    left = logWithoutTimeAndDesc.find('[')
    log.processName = logWithoutTimeAndDesc[deviceIndex:left].strip(' ')
    right = logWithoutTimeAndDesc.find(']')
    log.processId = int(logWithoutTimeAndDesc[left+1:right])
    
    logs.append(log)

In [7]:
jsonStr = json.dumps([log.__dict__ for log in logs[0:10]], indent=4)
print(jsonStr)

[
    {
        "time": "May 13 00:01:58",
        "hour": 0,
        "description": "Could not find uid associated with service: 0: Undefined error: 0 501",
        "deviceName": "BBAOMACBOOKAIR2",
        "processName": "com.apple.xpc.launchd",
        "processId": 1
    },
    {
        "time": "May 13 00:01:58",
        "hour": 0,
        "description": "Service exited with abnormal code: 78",
        "deviceName": "BBAOMACBOOKAIR2",
        "processName": "com.apple.xpc.launchd",
        "processId": 1
    },
    {
        "time": "May 13 00:02:12",
        "hour": 0,
        "description": "Failed to bootstrap path: path = /usr/libexec/mdmclient, error = 108: Invalid path",
        "deviceName": "BBAOMACBOOKAIR2",
        "processName": "com.apple.xpc.launchd",
        "processId": 1
    },
    {
        "time": "May 13 00:04:20",
        "hour": 0,
        "description": "ASL Sender Statistics",
        "deviceName": "BBAOMACBOOKAIR2",
        "processName": "syslogd",
        "

### 分析日志

In [8]:
timeWindows = {}
for i in range(9):
  val = '0{}00-0{}00'.format(i, i+1)
  timeWindows[i] = val

timeWindows[9] = '0900-1000'

for i in range(10, 24):
  val = '{}00-{}00'.format(i, i+1)
  timeWindows[i] = val
    
timeWindows

{0: '0000-0100',
 1: '0100-0200',
 2: '0200-0300',
 3: '0300-0400',
 4: '0400-0500',
 5: '0500-0600',
 6: '0600-0700',
 7: '0700-0800',
 8: '0800-0900',
 9: '0900-1000',
 10: '1000-1100',
 11: '1100-1200',
 12: '1200-1300',
 13: '1300-1400',
 14: '1400-1500',
 15: '1500-1600',
 16: '1600-1700',
 17: '1700-1800',
 18: '1800-1900',
 19: '1900-2000',
 20: '2000-2100',
 21: '2100-2200',
 22: '2200-2300',
 23: '2300-2400'}

In [9]:
class Result:
    pass

results = []

dict = {}
for log in logs:
    timeWindow = timeWindows[log.hour]
    key = '{}|{}|{}|{}|{}'.format(timeWindow, log.deviceName, log.processId, log.processName, log.description)
    if (key in dict):
        index = dict[key]
        results[index].numberOfOccurrence += 1
    else:
        result = Result()
        result.timeWindow = timeWindows[log.hour]
        result.deviceName = log.deviceName
        result.processId = log.processId
        result.processName = log.processName
        result.description = log.description
        result.numberOfOccurrence = 1
        results.append(result)
        dict[key] = len(results) - 1


In [10]:
jsonStr = json.dumps([result.__dict__ for result in results[0:10]], indent=4)
print(jsonStr)

[
    {
        "timeWindow": "0000-0100",
        "deviceName": "BBAOMACBOOKAIR2",
        "processId": 1,
        "processName": "com.apple.xpc.launchd",
        "description": "Could not find uid associated with service: 0: Undefined error: 0 501",
        "numberOfOccurrence": 17
    },
    {
        "timeWindow": "0000-0100",
        "deviceName": "BBAOMACBOOKAIR2",
        "processId": 1,
        "processName": "com.apple.xpc.launchd",
        "description": "Service exited with abnormal code: 78",
        "numberOfOccurrence": 17
    },
    {
        "timeWindow": "0000-0100",
        "deviceName": "BBAOMACBOOKAIR2",
        "processId": 1,
        "processName": "com.apple.xpc.launchd",
        "description": "Failed to bootstrap path: path = /usr/libexec/mdmclient, error = 108: Invalid path",
        "numberOfOccurrence": 1
    },
    {
        "timeWindow": "0000-0100",
        "deviceName": "BBAOMACBOOKAIR2",
        "processId": 113,
        "processName": "syslogd",
      

### 上传分析结果

In [11]:
headers = {'Content-type': 'application/json', 'Accept': 'text/plain'}
res = requests.post('http://localhost', headers = headers, data = jsonStr)
res

<Response [200]>