In [18]:
import pandas as pd
import numpy as np
import os

In [None]:
from datetime import datetime
import pytz
import time

tz = pytz.timezone('Asia/Shanghai')
def ts_to_date(timestamp):
    return datetime.fromtimestamp(timestamp//1000, tz).strftime('%Y-%m-%d')

def time_to_ts(ctime):
    try:
        timeArray = time.strptime(ctime, '%Y-%m-%d %H:%M:%S.%f')
    except:
        timeArray = time.strptime(ctime, '%Y-%m-%d %H:%M:%S')
    return int(time.mktime(timeArray))*1000

In [None]:
from multiprocessing import Process
import json
        
# log
class LogAnalysis(Process):
    def __init__(self, cases, log_dir, pid, load , anomaly_templates, dataset='gaia'):
        super().__init__()
        self.id = pid
        self.cases = cases.iloc[pid*load: (pid+1)*load]
        self.dataset = dataset
        self.data_dir = log_dir
        self.res = dict(zip(list(self.cases.index), [set() for _ in range(len(self.cases))]))
        self.time_used = 0
        self.anomaly_templates = anomaly_templates
        self.services = [service.split('.')[0] for service in os.listdir(
            self.data_dir) if service.split('.')[-1] == 'csv']
        
    def read_by_service(self, service):
        data = pd.read_csv(os.path.join(self.data_dir, f'{service}.csv'))
        if data['timestamp'].values[0] < 1e12:
            data['timestamp'] *= 1000
        print(data)
        return data
    
    def analysis(self):
        for service in self.services:
            log_data = self.read_by_service(service)
            start_time = time.time()
            for case_id, case in self.cases.iterrows():
                start_ts = time_to_ts(case['start'])
                end_ts = time_to_ts(case['end'])
                sub_data = log_data[(log_data['timestamp']>=start_ts)&(log_data['timestamp']<end_ts)]
#                 print(len(sub_data))
                for _, row in sub_data.iterrows():
                    if row['template_id'] in self.anomaly_templates:
                        self.res[case_id].add(service)
            end_time = time.time()
            self.time_used += (end_time - start_time)
    
    def save_res(self, savepath):
        for key in self.res:
            self.res[key] = list(self.res[key])
        with open(savepath, 'w') as f:
            json.dump(self.res, f)
        print(f'{self.id} Time used: ', self.time_used)
        print('Save successfully!')
    
    def run(self):
        self.analysis()
        self.save_res(f'{self.dataset}/log/{self.dataset}_log_{self.id}.json')
        with open(f'{self.dataset}/log/time_used_{self.id}', 'w') as f:
            f.write(f'{self.time_used}') 

In [None]:
# 配置
log_path = '/home/u2120210568/jupyterfiles/xiasibo/GAIA/data/logs'
label_path = '/home/u2120210568/multi_rca/case/gaia_resplit.csv' # 故障case的路径
demo_labels = pd.read_csv(label_path, index_col=0)
demo_labels = demo_labels[demo_labels['data_type']=='test']
processes = []
# 获取异常模板id
anomaly_templates = []
for _, row in templates.iterrows():
    if row['EventTemplate'] is not np.nan and 'ERROR' in row['EventTemplate']:
        anomaly_templates.append(row['EventId'])
for pid in range(20):
    processes.append(LogAnalysis(demo_labels, log_path, pid, 47, anomaly_templates))
    processes[-1].start()
    print(f'process {pid} starts...')
for pid in range(20):
    processes[pid].join()
    print(f'process {pid} ends.')

process 0 starts...
process 1 starts...
process 2 starts...
process 3 starts...
process 4 starts...
process 5 starts...
process 6 starts...
process 7 starts...
process 8 starts...
process 9 starts...
process 10 starts...
process 11 starts...
process 12 starts...
process 13 starts...
process 14 starts...
process 15 starts...
process 16 starts...
process 17 starts...
process 18 starts...
process 19 starts...
0 Time used:  31.25292706489563
Save successfully!
19 Time used:  38.30712270736694
Save successfully!
process 0 ends.
2 Time used:  33.56461548805237
Save successfully!
13 Time used:  37.67681074142456
Save successfully!
1 Time used:  41.45724177360535
Save successfully!
process 1 ends.
process 2 ends.
16 Time used:  45.684730529785156
Save successfully!
12 Time used:  44.28771781921387
Save successfully!
3 Time used:  44.11557483673096
Save successfully!
7 Time used:  45.50486993789673
Save successfully!
process 3 ends.
17 Time used:  49.17467164993286
Save successfully!
15 Time us

In [None]:
# 21aiops 日志提取
log_path = '/home/u2120210568/jupyterfiles/xiasibo/GAIA/2021-aiops-lab/log/log_event/logs_data'
label_path = '/home/u2120210568/multi_rca/case/21aiops_resplit.csv' # 故障case的路径
demo_labels = pd.read_csv(label_path, index_col=0)
processes = []
# 获取异常模板id
templates = pd.read_csv('/home/u2120210568/jupyterfiles/xiasibo/GAIA/2021-aiops-lab/log/event.csv')
# templates
anomaly_templates = []
for _, row in templates.iterrows():
    if row['EventTemplate'] is not np.nan and ('error' in row['EventTemplate'].lower() or 'full gc' in 
                                              row['EventTemplate'].lower()):
        anomaly_templates.append(row['EventId'])
        
for pid in range(10):
    processes.append(LogAnalysis(demo_labels[demo_labels['data_type']=='test'], 
                                 log_path, pid, 8, anomaly_templates, '21aiops'))
    processes[-1].start()
    print(f'process {pid} starts...')
for pid in range(10):
    processes[pid].join()
    print(f'process {pid} ends.')

process 0 starts...
process 1 starts...
process 2 starts...
process 3 starts...
process 4 starts...
process 5 starts...
process 6 starts...
process 7 starts...
process 8 starts...
process 9 starts...
6 Time used:  3.7700648307800293
Save successfully!
3 Time used:  3.8404808044433594
Save successfully!
1 Time used:  3.9609546661376953
Save successfully!
2 Time used:  4.259798049926758
Save successfully!
9 Time used:  4.153200149536133
Save successfully!
8 Time used:  4.273096561431885
Save successfully!
7 Time used:  4.515991449356079
Save successfully!
4 Time used:  4.68925142288208
Save successfully!
5 Time used:  4.904574632644653
Save successfully!
0 Time used:  5.290997266769409
Save successfully!
process 0 ends.
process 1 ends.
process 2 ends.
process 3 ends.
process 4 ends.
process 5 ends.
process 6 ends.
process 7 ends.
process 8 ends.
process 9 ends.


In [None]:
anomaly_templates

['401c9a52',
 '766aa0b9',
 'a7a05dc2',
 'c7292f56',
 '3a0ce66a',
 'c64fd57a',
 '67714984',
 '9b4042ac',
 'f8850338',
 '14e0243a',
 'bc136ae9',
 'e9280385',
 'c621672d',
 '7d387233',
 'f9c50996',
 '74e5810b',
 'e0b38a21',
 'ee8dd416',
 '440738bd',
 '307579ca',
 '8bd25b3f',
 'f50e1ae1',
 '68b99a12',
 'b3cb8198',
 '41021fe9',
 '7a2d7715',
 '1b19c2eb',
 '91265f0e',
 'ef76367a']

In [None]:
# 22aiops日志模板提取
# 处理日志
log_data = '/Users/fengxiaoyu/Desktop/PDiagnose/template_log_filebeat-testbed-log-service.csv'
df = pd.read_csv(log_data)

# 合并 template_id 和 value 列
df['EventId_EventTemplate'] = df['template_id'].astype(str) + '_' + df['value']

# 计算每个 template_id 的出现次数
template_counts = df['EventId_EventTemplate'].value_counts().reset_index()

# 重命名列名
template_counts.columns = ['EventId_EventTemplate', 'Occurrences']

# 拆分 EventId_EventTemplate 列为 EventId 和 EventTemplate 列
template_counts[['EventId', 'EventTemplate']] = template_counts['EventId_EventTemplate'].str.split('_', n=1, expand=True)

# 重新排列列的顺序并删除不需要的列
template_counts = template_counts[['EventId', 'EventTemplate', 'Occurrences']]

In [None]:
template_counts

Unnamed: 0,EventId,EventTemplate,Occurrences
0,0x29d8e8,"severity: info, message: conversion request su...",2863426
1,0x29d8e8,"severity: info, message: received conversion r...",2863419
2,0x79a34c,"severity: debug, message: request complete",1292178
3,0x79a34c,"severity: debug, message: request started",1292172
4,0x19743d,[40m[32minfo[39m[22m[49m: Microsoft.AspNet...,1239758
...,...,...,...
528722,0x18a1e2,ddItemAsync called with userId=7b7dbd36-9846-4...,1
528723,0x6dc4b1,EVERE: Exception while executing runnable io.g...,1
528724,0x6dc4b1,EVERE: Exception while executing runnable io.g...,1
528725,0x6dc4b1,EVERE: Exception while executing runnable io.g...,1


In [20]:
import pandas as pd
import os

# 将日志按照cmdb_id分组

def save_by_cmdb_id(file_path, output_dir):
    # 读取 CSV 文件
    data = pd.read_csv(file_path)
    
    # 检查输出目录是否存在，如果不存在，则创建
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # 按照 'cmdb_id' 列分组
    grouped = data.groupby('cmdb_id')

    # 遍历每个分组
    for name, group in grouped:
        print(group)
        # 将分组数据保存到对应的文件中
        group.to_csv(os.path.join(output_dir, f"{name}.csv"), index=False)

# 使用函数
file_path = '/Users/fengxiaoyu/Desktop/PDiagnose/template_log_filebeat-testbed-log-service.csv'  # CSV文件的路径
output_dir = '/Users/fengxiaoyu/Desktop/PDiagnose/22aiops/log'  # 输出目录的路径
save_by_cmdb_id(file_path, output_dir)


                        log_id   timestamp      cmdb_id  \
16        iZ6Bt38B8vQa58bZrQW0  1651507200  adservice-0   
17        ip6Bt38B8vQa58bZrQW0  1651507200  adservice-0   
18        jJ6Bt38B8vQa58bZrQW0  1651507200  adservice-0   
19        m56Bt38B8vQa58bZrQXG  1651507200  adservice-0   
20        mp6Bt38B8vQa58bZrQXG  1651507200  adservice-0   
...                        ...         ...          ...   
11702535  3I7OwX8B8vQa58bZWM4g  1651766397  adservice-0   
11702536  3Y7OwX8B8vQa58bZWM4g  1651766397  adservice-0   
11702552  zo7OwX8B8vQa58bZWM4g  1651766397  adservice-0   
11702553  2o7OwX8B8vQa58bZWM4g  1651766397  adservice-0   
11702554  y47OwX8B8vQa58bZWM4g  1651766397  adservice-0   

                                   log_name  \
16        log_adservice-service_application   
17        log_adservice-service_application   
18        log_adservice-service_application   
19        log_adservice-service_application   
20        log_adservice-service_application   
...      

In [21]:
import os
import pandas as pd
import time
import json

from datetime import datetime
import pytz
import time

tz = pytz.timezone('Asia/Shanghai')
def ts_to_date(timestamp):
    return datetime.fromtimestamp(timestamp//1000, tz).strftime('%Y-%m-%d')

def time_to_ts(ctime):
    try:
        # 先尝试解析包含小时和分钟的格式
        timeArray = time.strptime(ctime, '%Y/%m/%d %H:%M')
    except ValueError:
        try:
            # 如果失败，尝试只有日期的格式
            timeArray = time.strptime(ctime, '%Y/%m/%d')
        except ValueError:
            # 如果仍然失败，记录错误或返回一个默认值
            print(f"Failed to parse date: {ctime}")
            return None
    return int(time.mktime(timeArray))*1000

class LogAnalysis:
    def __init__(self, cases, log_dir, pid, load , anomaly_templates, dataset='gaia'):
        self.id = pid
        self.cases = cases.iloc[pid*load: (pid+1)*load]
        self.dataset = dataset
        self.data_dir = log_dir
        self.res = dict(zip(list(self.cases.index), [set() for _ in range(len(self.cases))]))
        self.time_used = 0
        self.anomaly_templates = anomaly_templates
        self.services = [service.split('.')[0] for service in os.listdir(
            self.data_dir) if service.split('.')[-1] == 'csv']
        
    def read_by_service(self, service):
        data = pd.read_csv(os.path.join(self.data_dir, f'{service}.csv'))
        if data['timestamp'].values[0] < 1e12:
            data['timestamp'] *= 1000
        return data
    
    def analysis(self):
        for service in self.services:
            log_data = self.read_by_service(service)
            start_time = time.time()
            print(self.cases)
            for case_id, case in self.cases.iterrows():
                start_ts = time_to_ts(case['start'])
                end_ts = time_to_ts(case['end'])
                sub_data = log_data[(log_data['timestamp']>=start_ts)&(log_data['timestamp']<end_ts)]
                for _, row in sub_data.iterrows():
                    if row['template_id'] in self.anomaly_templates:
                        self.res[case_id].add(service)
            end_time = time.time()
            self.time_used += (end_time - start_time)
    
    def save_res(self, savepath):
        for key in self.res:
            self.res[key] = list(self.res[key])
        with open(savepath, 'w') as f:
            json.dump(self.res, f)
        print(f'{self.id} Time used: ', self.time_used)
        print('Save successfully!')


import pandas as pd
import numpy as np

# 加载数据
log_path = '/Users/fengxiaoyu/Desktop/PDiagnose/22aiops/log'
label_path = '/Users/fengxiaoyu/Desktop/PDiagnose/22AIOps_run_table.csv'
demo_labels = pd.read_csv(label_path, index_col=0)
processes = []
# 获取异常模板id
# templates
anomaly_templates = []
for _, row in template_counts.iterrows():
    if row['EventTemplate'] is not np.nan and ('error' in row['EventTemplate'].lower()):
        anomaly_templates.append(row['EventId'])
        
# 只创建一个实例
pid = 0
load = len(demo_labels)
analyzer = LogAnalysis(demo_labels[(demo_labels['type'] == 'test') & (demo_labels['level'] == 'pod')], log_path, pid, load, anomaly_templates, '22aiops')
analyzer.analysis()
analyzer.save_res(f'{analyzer.dataset}/log/{analyzer.dataset}_log_{analyzer.id}.json')


      timestamp level                   cmdb_id failure_type           start  \
80   1651366271   pod         checkoutservice-0   k8s容器cpu负载   2022/5/1 8:51   
135  1651379795   pod   productcatalogservice-2    k8s容器进程中止  2022/5/1 12:36   
121  1651382580   pod                frontend-0   k8s容器写io负载  2022/5/1 13:23   
84   1651398576   pod                frontend-0   k8s容器cpu负载  2022/5/1 17:49   
86   1651407633   pod         shippingservice-0   k8s容器cpu负载  2022/5/1 20:20   
64   1651564006   pod         shippingservice-1    k8s容器内存负载  2022/5/3 15:46   
126  1651692556   pod   productcatalogservice-0   k8s容器写io负载   2022/5/5 3:29   
111  1651716458   pod   recommendationservice-2   k8s容器读io负载  2022/5/5 10:07   
129  1651920562   pod               adservice-0   k8s容器写io负载  2022/5/7 18:49   
95   1652032391   pod         shippingservice-1   k8s容器cpu负载   2022/5/9 1:53   
132  1652060333   pod  recommendationservice2-0   k8s容器写io负载   2022/5/9 9:39   
120  1652089514   pod               adse

In [22]:
# platform日志模板提取
# 处理日志
log_data = '/Users/fengxiaoyu/Desktop/PDiagnose/平台数据集/log/platform_template.csv'
df = pd.read_csv(log_data)

# 合并 template_id 和 value 列
df['EventId_EventTemplate'] = df['EventId'].astype(str) + '_' + df['message']

# 计算每个 template_id 的出现次数
template_counts = df['EventId_EventTemplate'].value_counts().reset_index()

# 重命名列名
template_counts.columns = ['EventId_EventTemplate', 'Occurrences']

# 拆分 EventId_EventTemplate 列为 EventId 和 EventTemplate 列
template_counts[['EventId', 'EventTemplate']] = template_counts['EventId_EventTemplate'].str.split('_', n=1, expand=True)

# 重新排列列的顺序并删除不需要的列
template_counts = template_counts[['EventId', 'EventTemplate', 'Occurrences']]

In [28]:
template_counts

Unnamed: 0,EventId,EventTemplate,Occurrences
0,0x6efd58,"severity:info,message:conversion request succe...",4471004
1,0x6c210a,"severity:debug,message:request started",2291326
2,0x6c210a,"severity:debug,message:request complete",2290965
3,0x781d38,"severity:info,message:Getting supported curren...",1698561
4,0x7c1a12,"severity:warning,message:failed to retrieve ads",1155332
...,...,...,...
335628,0x388a4c,main.(*frontendServer).addToCartHandler(0xc000...,1
335629,0x388a4c,"net/http.HandlerFunc.ServeHTTP(0xc00017dc00?, ...",1
335630,0x388a4c,github.com/gorilla/mux.(*Router).ServeHTTP(0xc...,1
335631,0x388a4c,"main.(*logHandler).ServeHTTP(0xc000464060, {0x...",1


In [24]:
import pandas as pd
import os

# 将日志按照cmdb_id分组

def save_by_cmdb_id(file_path, output_dir):
    # 读取 CSV 文件
    data = pd.read_csv(file_path)
    
    # 检查输出目录是否存在，如果不存在，则创建
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # 按照 'cmdb_id' 列分组
    grouped = data.groupby('cmdb_id')

    # 遍历每个分组
    for name, group in grouped:
        print(group)
        # 将分组数据保存到对应的文件中
        group.to_csv(os.path.join(output_dir, f"{name}.csv"), index=False)

# 使用函数
file_path = '/Users/fengxiaoyu/Desktop/PDiagnose/平台数据集/log/platform_template.csv'  # CSV文件的路径
output_dir = '/Users/fengxiaoyu/Desktop/PDiagnose/平台数据集/log'  # 输出目录的路径
save_by_cmdb_id(file_path, output_dir)


          Unnamed: 0.2  Unnamed: 0                log_id   timestamp  \
25                  25          25  2Rx9X44BgqWVbWTIyH8k  1710950409   
26                  26          26  4xx9X44BgqWVbWTIyX8g  1710950409   
27                  27          27  7hx9X44BgqWVbWTIyX8g  1710950419   
28                  28          28  -xx9X44BgqWVbWTIyX8g  1710950419   
29                  29          29  _Bx9X44BgqWVbWTIyX8g  1710950429   
...                ...         ...                   ...         ...   
20751035       6711159     2467013  1h53aY4BcF1whnmqNV4D  1711166399   
20751058       6711182     2467036  Dx53aY4BcF1whnmqOmGp  1711166397   
20751059       6711183     2467037  EB53aY4BcF1whnmqOmGp  1711166396   
20751060       6711184     2467038  ER53aY4BcF1whnmqOmGp  1711166397   
20751061       6711185     2467039  Eh53aY4BcF1whnmqOmGp  1711166396   

                              date      cmdb_id  \
25        2024-03-20T16:00:09.966Z  cartservice   
26        2024-03-20T16:00:09.966

In [44]:
import os
import pandas as pd
import time
import json

from datetime import datetime
import pytz
import time

tz = pytz.timezone('Asia/Shanghai')
def ts_to_date(timestamp):
    return datetime.fromtimestamp(timestamp//1000, tz).strftime('%Y-%m-%d')

def time_to_ts(ctime):
    try:
        # 先尝试解析包含小时和分钟的格式
        timeArray = time.strptime(ctime, '%Y/%m/%d %H:%M')
    except ValueError:
        try:
            # 如果失败，尝试只有日期的格式
            timeArray = time.strptime(ctime, '%Y/%m/%d')
        except ValueError:
            # 如果仍然失败，记录错误或返回一个默认值
            print(f"Failed to parse date: {ctime}")
            return None
    return int(time.mktime(timeArray))*1000

class LogAnalysis:
    def __init__(self, cases, log_dir, pid, load , anomaly_templates, dataset='gaia'):
        self.id = pid
        self.cases = cases.iloc[pid*load: (pid+1)*load]
        self.dataset = dataset
        self.data_dir = log_dir
        self.res = dict(zip(list(self.cases.index), [set() for _ in range(len(self.cases))]))
        self.time_used = 0
        self.anomaly_templates = anomaly_templates
        self.services = [service.split('.')[0] for service in os.listdir(
            self.data_dir) if service.split('.')[-1] == 'csv']
        
    def read_by_service(self, service):
        data = pd.read_csv(os.path.join(self.data_dir, f'{service}.csv'))
        if data['timestamp'].values[0] < 1e12:
            data['timestamp'] *= 1000
        return data
    
    def analysis(self):
        for service in self.services:
            log_data = self.read_by_service(service)
            start_time = time.time()
            # print(self.cases)
            for case_id, case in self.cases.iterrows():
                start_ts = int(case['st_time'])*1000
                end_ts = int(case['ed_time'])*1000
                sub_data = log_data[(log_data['timestamp']>=start_ts)&(log_data['timestamp']<end_ts)]
                for _, row in sub_data.iterrows():
                    if row['EventId'] in self.anomaly_templates:
                        self.res[case_id].add(service)
            end_time = time.time()
            self.time_used += (end_time - start_time)
    
    def save_res(self, savepath):
        for key in self.res:
            self.res[key] = list(self.res[key])
        with open(savepath, 'w') as f:
            json.dump(self.res, f)
        print(f'{self.id} Time used: ', self.time_used)
        print('Save successfully!')


import pandas as pd
import numpy as np

# 加载数据
log_path = '/Users/fengxiaoyu/Desktop/PDiagnose/平台数据集/log'
label_path = '/Users/fengxiaoyu/Desktop/PDiagnose/run_table.csv'
demo_labels = pd.read_csv(label_path, index_col=0)
processes = []
# 获取异常模板id
# templates
anomaly_templates = []
for _, row in template_counts.iterrows():
    if ('level:error' in row['EventTemplate'].lower()) or ('error code' in row['EventTemplate'].lower()):
        anomaly_templates.append(row['EventId'])
        
# 只创建一个实例
pid = 0
load = len(demo_labels[(demo_labels['data_type'] == 'test')])
analyzer = LogAnalysis(demo_labels[(demo_labels['data_type'] == 'test') ], log_path, pid, load, anomaly_templates, 'platform')
analyzer.analysis()
analyzer.save_res(f'{analyzer.dataset}/log/{analyzer.dataset}_log_{analyzer.id}.json')


0 Time used:  3.890899181365967
Save successfully!


In [45]:
load

13