In [1]:
import re
import zlib
import base64
import datetime
import numpy as np
import pandas as pd
import json
import configparser
from utils import *
from sys import platform
from elasticsearch import Elasticsearch

In [2]:
cf = configparser.ConfigParser()
cf.read('config/config.cfg')

env = 'DEVELOP'
if 'win' in platform:
    env = 'DEVELOP'
elif 'linux' in platform:
    env = 'PRODUCT'
    
class EsCtrl(object):
    def __init__(self):
        self.es_ctrl = Elasticsearch(cf['ENV_'+env]['ADDR'], ca_certs=cf['ELASTICSEARCH']['CA_CERTS'])

    def query_index_logs(self, index):
        # query = {
        #     "match": {
        #         "trace": "com_ericsson_trithread:INFO"
        #     }
        # }
        #data = self.es_ctrl.search(index=index, query=query, scroll='1s', size=10000)
        data = self.es_ctrl.search(index=index, scroll='1s', size=10000)
        sid = data['_scroll_id']
        scroll_size = len(data['hits']['hits'])
        res = []
        while scroll_size > 0:
            # Before scroll, process current batch of hits
            res.extend(data['hits']['hits'])
            data = self.es_ctrl.scroll(scroll_id=sid, scroll='1s')
            # Update the scroll ID
            sid = data['_scroll_id']
            # Get the number of results that returned in the last scroll
            scroll_size = len(data['hits']['hits'])
        return res

    def query_indices(self):
        res = []
        for key in self.es_ctrl.indices.get_alias().keys():
            if len(key) > 0:
                if '.analyzed_' in key:
                    res.append(key.replace('.analyzed_', ''))
        return res

    def is_exists(self, index):
        return self.es_ctrl.indices.exists(index=index)

    def count_index(self, index):
        return self.es_ctrl.count(index=index)['count']

    def store_index(self, index, data):
        data = deflate_and_base64_encode(json.dumps(data).encode('utf-8'))
        return self.es_ctrl.index(index=index, body={'content': data})

    def query_index(self, index):
        data = self.es_ctrl.search(index=index)
        data = json.loads(decode_base64_and_inflate(data['hits']['hits'][0]['_source']['content']))
        return data

In [3]:
############################################ Data Clean ################################################
def package_kv(df):
    res = {}
    k_type = {}
    for i, (kv,index,timestamp) in enumerate(zip(df.kv.values, df['index'].values, df.timestamp.values)):
        for item in kv:
            if len(item[1]) > 0:
                if item[0] in res:
                    res[item[0]].append(item[1]  + [timestamp] + [str(i)] + [index]) # [value1,value2,value3,timestamp,process_index,global_index]
                else:
                    res[item[0]] = [item[1]  + [timestamp] + [str(i)] + [index]]
    for key in res.keys():
        width = max(map(len, res[key])) # get max width
        type_list = []
        for i, item in enumerate(res[key]):
            if '0x' in item[0]:
                type_list.append('register')
            elif item[0].isupper():
                type_list.append('discrete')
            else:
                type_list.append('continuous')
                
            if len(item) != width:
                tmp = [0 for _ in range(0, width)]
                tmp[-3] = item[-3]
                tmp[-2] = item[-2]
                tmp[-1] = item[-1]
                res[key][i] = tmp
        res[key] = np.array(res[key]).transpose().tolist() # matrix transposition
        k_type[key] = 'discrete' if len(set(type_list)) > 1 else list(set(type_list))[0]
    return res,k_type


def package_inverted_index_table(table, key, data):
    def clean_special_symbols(text):
        for ch in ['/','*','{','}','[',']','(',')','#','+','-','!','=',':',',']:
            if ch in text:
                text = text.replace(ch," ")
        return re.sub(" +", " ", text)
    for index, msg in data:
        for word in set(clean_special_symbols(msg).split(' ')):
            w = word.lower()
            if w not in table:
                table[w] = {'x': [index], 'y': [key]} # x:global index, y: yaxis num 
            else:
                table[w]['x'].append(index)
                table[w]['y'].append(key)


def clean_data(esdata):
    def clean_msg_special_symbols(text):
        for ch in ['{', '}', '[', ']', '(', ')', '"', '::']:
            if ch in text:
                text = text.replace(ch, " ")
        return re.sub(" +", " ", text)

    story = []
    for item in esdata:
        if 'msg' in item['_source']:
            tmp = clean_msg_special_symbols(item['_source']['msg'])
            if len(re.findall('process \= (.*?)$', tmp)) > 0:
                process = re.findall('process \= (.*?),', tmp)[0]
                msg = re.findall('msg \= (.*?)$', tmp)[0]
            #                 fileAndLine = re.findall('fileAndLine \= \"(.*?)\"',item['_source']['msg'])[0].split(':')[0]
            # elif len(re.findall('procname \= (.*?)$', tmp)) > 0:
            #     process = re.findall('procname \= (.*?),', tmp)[0]
            #     #                 msg = tmp.split(',')[2].replace('"','').replace('}','').replace('{','')
            #     msg = tmp
            else:
                process = 'main'
                msg = tmp
                
            msg = msg.replace('= ',':').replace(' = ',':').replace(': ',':').replace(' : ',':').replace('=',':')

            for elm in re.split('[: ]',msg):
                if elm.isupper():
                    msg = re.sub('[: ]'+elm, ':'+elm, msg)

            msg = re.sub('(:(?!-).*?[ $])', r'\1,', (msg + ' $'))
            
            kv = [(k.strip(), [v.strip()] if (v+' ')[0].isalpha() else re.findall('[0-9.|x]+', v)) for k, v in re.findall('([A-Za-z0-9_.]+?)[ ]?[:=][ ]?(.*?)[,$]', msg)] # $ convenient regex at the end
            story.append([item['_source']['device'], item['_source']['trace'], process,  item['_source']['logtime'][:-1] + '.' + str(item['_source']['millisecond']) + 'Z', item['_source']['msg'], kv])

    story = pd.DataFrame(story, columns=['device', 'trace', 'process', 'timestamp', 'msg', 'kv']).sort_values('timestamp',ascending=True).reset_index(drop=True)
    story_line = {}
    inverted_index_table = {}
    for dev in set(story.device.values):
        data = story.loc[(story['device'] == dev), :].reset_index(drop=True)
        sub_inverted_index_table = {}
        for i, process_name in enumerate(sorted(set(data.process.values), key=list(data.process.values).index)):
            process = data.loc[(data['process'] == process_name), :].reset_index()
            process['index'] = process['index'].astype(str)
            process_start_time = process['timestamp'][0]
            process_start_count = process['index'][0]
            process_end_time = process['timestamp'][process.shape[0] - 1]
            process_end_count = process['index'][process.shape[0] - 1]
            package_inverted_index_table(sub_inverted_index_table, i, zip(process['index'].values, process.msg.values))
            msg = dict(zip(process['index'].values, [str(a) + '||' + b + '||' + c for a, b, c in
                                                     zip(process.index.values, process.timestamp.values,
                                                         process.msg.values)]))
            kv,k_type = package_kv(process)
            if dev not in story_line:
                story_line[dev] = [{'process': process_name, 'start_time': process_start_time, 'start_count': process_start_count, 'end_time': process_end_time, 'end_count': process_end_count, 'msg': msg, 'kv': kv, 'k_type':k_type}]
            else:
                story_line[dev].append({'process': process_name, 'start_time': process_start_time, 'start_count': process_start_count, 'end_time': process_end_time, 'end_count': process_end_count, 'msg': msg, 'kv': kv, 'k_type':k_type})
        inverted_index_table[dev] = sub_inverted_index_table
    return {'story_line': story_line, 'inverted_index_table': inverted_index_table}


def apply_filter_by_keywords(df):
    if (len(set(df['msg']) & set([':', '='])) > 0):
        return False
    return True


def apply_filter_digit(df):
    return re.sub('\d+', '', df['msg'])


def apply_keyword_highlight(df, keywords, color_highlight):
    tmp = [item.lower() for item in keywords]
    for item in tmp:
        if (item == 'abn:') & (item in df['msg'].lower()):
            return color_highlight
        elif len(set(df['msg'].lower().split(' ')).intersection(set(tmp))) > 0:
            return color_highlight
    return df['status']


def cal_time_difference(start, end):
    return datetime.datetime.strptime(end, "%H:%M:%S") - datetime.datetime.strptime(start, "%H:%M:%S")


############################################ XML Compression and Decompression ################################################
def decode_base64_and_inflate(b64string):
    decoded_data = base64.b64decode(b64string)
    return zlib.decompress(decoded_data , -15)


def deflate_and_base64_encode(string_val):
    zlibbed_str = zlib.compress(string_val)
    compressed_string = zlibbed_str[2:-4]
    return base64.b64encode(compressed_string).decode("utf-8")

In [221]:
index = 'visby_6626_dcgm_xiaobo_glt_sukamulya_cbn_cm_bxp_2051_telog'
es_ctrl = EsCtrl()
story = es_ctrl.query_index_logs(index)
data = clean_data(story)

In [19]:
import zipfile
import io

path = 'D:\FirstScene\GLT_SUKAMULYA_CBN_CM_220715_064452_WIB_MSRBS-GL_CXP9024418-15_R24M11_dcgm.zip'
path_logfiles = 'GLT_SUKAMULYA_CBN_CM_logfiles.zip'
teread = 'teread.log'
logs = {}
table =[]
dev = ''
table_flag = True
log_flag = False
with zipfile.ZipFile(path, 'r') as outer:
    with outer.open(path_logfiles, 'r') as nest:
        logfiles = io.BytesIO(nest.read())
        with zipfile.ZipFile(logfiles) as nested_zip:
            with nested_zip.open(teread, 'r') as log:
                lines = log.readlines()
                for i, line in enumerate(lines):
                    line = line.decode("utf-8")
                    if table_flag == True:
                        if ('coli>' in line) & ('te log read' in line):
                    else: 
                        if len(line) > 0:
                            if line[0] == '=':
                                log_flag = False
                        if log_flag:
                            logs[dev].append(line[0:-1])
                        if ('coli>' in line) & ('te log read' in line):
                            dev = line.split(' ')[1]
                            logs[dev] = []
                            log_flag = True

In [20]:
logs['BXP_2051']

['BXP_2051: [Trace log from 2 restarts before]',
 'BXP_2051: [2022-07-09 15:53:37.007706415] (+?.?????????) radio6626 com_ericsson_trithread:INFO: { cpu_id = 3 }, { process = "txlProcBranchE", fileAndLine = "dpdController.cc:2112", msg = "Power measurement, Pma:-14.35[-41.54 -9.50] dB, DpdPma:-18.25[-19.05 -17.65] dB, Pmb:-14.35, TorPmb:-14.42[-49.35 -9.50] dB, avgTxPma:-13.62 dB, pmDpdIrqStat:0x00008000, pmScaleFactor: 65K" }',
 'BXP_2051: [2022-07-09 15:53:37.011182005] (+0.003475590) radio6626 com_ericsson_trithread:INFO: { cpu_id = 3 }, { process = "txlProcBranchE", fileAndLine = "dpdController.cc:2118", msg = "txAtt:1871, txAttPeak:0, dpGainLoopEnable:true, dpGainCtrlType:DSA_AD_TXFE, torTemperature:820 (0.1C), torGainBackoff:0 (0.01dB), torGainLin:3.49945(0.01dB), torStepBit:8, cc1Ctrl0=0x00000100 , avgIMpa0:1530 [mAmp]" }',
 'BXP_2051: [2022-07-09 15:53:37.011202127] (+0.000020122) radio6626 com_ericsson_trithread:INFO: { cpu_id = 3 }, { process = "txlProcBranchE", fileAndLine =

In [22]:
with open('test', 'w') as fp:
    fp.write("\n".join(item for item in logs['BXP_2050']))

In [216]:
for i,elm in enumerate(data['story_line']['BXP_2051']):
    print(i,elm['process'])

0 txlProcBranchE 
1 tmoSchedulerTx_12tx 
2 txlProcBranchF 
3 txlProcBranchG 
4 txlProcBranchA 
5 dpPollProc 
6 txlProcBranchH 
7 AntModVswrCtrl 
8 rtsCtrl_requestProcessor 
9 txlProcBranchB 
10 txlProcBranchI 
11 txlProcBranchL 
12 TxBranchCtrlH 
13 txlProcBranchK 
14 txlProcBranchJ 
15 trDcProc 
16 PaSrvCHJK 
17 ledProc 
18 TxTimingPhaseCtrlH 
19 TxBranchCtrlJ 
20 txlProcBranchC 
21 TxBranchCtrlK 
22 TxLoSrvJ 
23 TxCoordinationSrvCHJK 
24 TxLoSrvK 
25 radioTrDcServer 
26 TxBranchCtrlC 
27 TxTimingPhaseCtrlK 
28 - 
29 bcProc 
30 TxTimingPhaseCtrlC 
31 RxBranchCtrlE 
32 faultManagerProc 
33 RxBranchCtrlF 
34 EquipCtrl 
35 TxLoSrvH 
36 TxLoSrvC 
37 eLogEngineProc 
38 txlProcBranchD 
39 cmd_proc 
40 AntennaModule 
41 TxBranchCtrlG 
42 TxCoordinationSrvDFG 
43 TxTimingPhaseCtrlG 
44 TxBranchCtrlD 
45 TxBranchCtrlF 
46 TxLoSrvG 
47 TxLoSrvF 
48 TxLoSrvD 
49 TxTimingPhaseCtrlD 
50 RxBranchCtrlG 
51 RxBranchCtrlH 
52 cfhe 
53 PaSrvDFG 
54 TxTimingPhaseCtrlF 
55 TorLoSrvD 
56 TorLoSrvG 
57 TxB

In [219]:
data['story_line']['BXP_2051'][0]['k_type']['txAtt']

'continuous'

In [218]:
data['story_line']['BXP_2051'][0]['kv']['txAtt']

[['1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1871',
  '1802',
  '1802',
  '1802',
  '1802',
  '1802',
  '1802',
  '1802',
  '1802',
  '1802',
  '1802',
  '1802',
  '1802',
  '1802',


In [193]:
def clean_msg_special_symbols(text):
    for ch in ['{', '}', '[', ']', '(', ')', '"', '::']:
        if ch in text:
            text = text.replace(ch, " ")
    return re.sub(" +", " ", text)
    
# msg = 'GaN Boost mode, set to boost mode Pma:-15.88[-41.54 -9.50] dB, DpdPma:-20.09[-20.58 -19.18] dB, Pmb:-15.88, TorPmb:-15.92[-50.88 -9.50] dB, avgTxPma:-18.14 dB, pmDpdIrqStat:0x00000000, pmScaleFactor: 65K'
msg = 'txAtt:145, txAttPeak:0, dpGainLoopEnable:true, dpGainCtrlType:VVA_QPB93, torTemperature:820 (0.1C), torGainBackoff:0 (0.01dB), torGainLin:3.82825(0.01dB), torStepBit:9, cc1Ctrl1=0x00000100 , avgIMpa0:690 [mAmp]'
# msg = '[TXL_GAIN] Pma:-inf[-41.54 -9.50] dB, DpdPma:-inf[-inf -inf] dB, Pmb:-inf, TorPmb:-inf[-inf -9.50] dB, avgTxPma:-inf dB, pmDpdIrqStat:0x00000000, pmScaleFactor: 65K'
# msg = 'New event= EVENT_DEACTIVATE carrierId= 196908 fbsId= 1 fbsState= DISABLED cycleRequired= YES 110'
# msg = 'Event CARRIER_DEACTIVATE for carrierId:778'
# msg = 'Set event RX_SETUP_EVENT to time: 250[ms], from 0x13000e3'
# msg = 'New event= EVENT_SETUP carrierId= 771 fbsId= 1 fbsState= SETUP cycleRequired= NO 0'
# msg = '0-insertion for fbsId=2 event=EVENT_RELEASE '
msg = clean_msg_special_symbols(msg)
msg = msg.replace('= ',':').replace(' = ',':').replace(': ',':').replace(' : ',':').replace('=',':')

for elm in re.split('[: ]',msg):
    if elm.isupper():
        msg = re.sub('[: ]'+elm, ':'+elm, msg)

msg = re.sub('(:(?!-).*?[ $])', r'\1,', (msg + ' $'))
# msg
kv = [(k.strip(), [v.strip()] if v.strip()[0].isalpha() else re.findall('[0-9.|x]+', v)) for k, v in re.findall('([A-Za-z0-9_.]+?)[ ]?[:=][ ]?(.*?)[,$]', msg)]
kv

[('txAtt', ['145']),
 ('txAttPeak', ['0']),
 ('dpGainLoopEnable', ['true']),
 ('dpGainCtrlType', ['VVA_QPB93']),
 ('torTemperature', ['820', '0.1']),
 ('torGainBackoff', ['0']),
 ('torGainLin', ['3.82825']),
 ('torStepBit', ['9']),
 ('cc1Ctrl1', ['0x00000100']),
 ('avgIMpa0', ['690'])]

In [200]:
bin(int('0x00000100', 16))[2:].zfill(32)

'00000000000000000000000100000000'

In [197]:
format('0x00000100', '0>42b')

ValueError: Unknown format code 'b' for object of type 'str'

In [204]:
'VVA_QPb93'.isupper()

False