In [None]:
import re
import zlib
import base64
import datetime
import numpy as np
import pandas as pd
import json
import configparser
from utils import *
from sys import platform
from elasticsearch import Elasticsearch

# import torch
# from tslearn.metrics import dtw, dtw_path
# from tslearn.metrics import lcss, lcss_path
# from transformers import BertTokenizer, BertModel
# from sklearn.preprocessing import LabelEncoder
# from sklearn.preprocessing import OneHotEncoder

# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# tokenizer = BertTokenizer.from_pretrained('google/bert_uncased_L-2_H-128_A-2')
# model = BertModel.from_pretrained("google/bert_uncased_L-2_H-128_A-2")
# model = model.to(device)
# device

In [None]:
cf = configparser.ConfigParser()
cf.read('config/config.cfg')

env = 'DEVELOP'
if 'win' in platform:
    env = 'DEVELOP'
elif 'linux' in platform:
    env = 'PRODUCT'
    
class EsCtrl(object):
    def __init__(self):
        self.es_ctrl = Elasticsearch(cf['ENV_'+env]['ADDR'], ca_certs=cf['ELASTICSEARCH']['CA_CERTS'])

    def query_index_logs(self, index):
        # query = {
        #     "match": {
        #         "trace": "com_ericsson_trithread:INFO"
        #     }
        # }
        #data = self.es_ctrl.search(index=index, query=query, scroll='1s', size=10000)
        data = self.es_ctrl.search(index=index, scroll='1s', size=10000)
        sid = data['_scroll_id']
        scroll_size = len(data['hits']['hits'])
        res = []
        while scroll_size > 0:
            # Before scroll, process current batch of hits
            res.extend(data['hits']['hits'])
            data = self.es_ctrl.scroll(scroll_id=sid, scroll='1s')
            # Update the scroll ID
            sid = data['_scroll_id']
            # Get the number of results that returned in the last scroll
            scroll_size = len(data['hits']['hits'])
        return res

    def query_indices(self):
        res = []
        for key in self.es_ctrl.indices.get_alias().keys():
            if len(key) > 0:
                if '.analyzed_' in key:
                    res.append(key.replace('.analyzed_', ''))
        return res

    def is_exists(self, index):
        return self.es_ctrl.indices.exists(index=index)

    def count_index(self, index):
        return self.es_ctrl.count(index=index)['count']

    def store_index(self, index, data):
        data = deflate_and_base64_encode(json.dumps(data).encode('utf-8'))
        return self.es_ctrl.index(index=index, body={'content': data})

    def query_index(self, index):
        data = self.es_ctrl.search(index=index)
        data = json.loads(decode_base64_and_inflate(data['hits']['hits'][0]['_source']['content']))
        return data

In [None]:
import numpy
import matplotlib.pyplot as plt

from tslearn.generators import random_walks
from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from tslearn import metrics


# numpy.random.seed(0)
# n_ts, sz, d = 2, 100, 1
# dataset = random_walks(n_ts=n_ts, sz=sz, d=d, random_state=5)

kv_a = [int(item) for item in story_a['kv']['txlProcBranchH']['txAtt(c)'][0]]
kv_b = [int(item) for item in story_b['kv']['txlProcBranchH']['txAtt(c)'][0]]
[kv_b.insert(0,0) for _ in range(0, len(kv_a) - len(kv_b))]

dataset = [kv_a, kv_b]
scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.)  # Rescale time series
dataset_scaled = scaler.fit_transform(dataset)

lcss_path, sim_lcss = metrics.lcss_path(dataset_scaled[0, :, 0], dataset_scaled[1, :, 0], eps=1.5)
dtw_path, sim_dtw = metrics.dtw_path(dataset_scaled[0, :, 0], dataset_scaled[1, :, 0])

plt.figure(1, figsize=(8, 8))

plt.plot(dataset_scaled[0, :, 0], "b-", label='First time series')
plt.plot(dataset_scaled[1, :, 0], "g-", label='Second time series')

for positions in lcss_path:
    plt.plot([positions[0], positions[1]],
             [dataset_scaled[0, positions[0], 0], dataset_scaled[1, positions[1], 0]], color='orange')
plt.legend()
plt.title("Time series matching with LCSS")

plt.figure(2, figsize=(8, 8))
plt.plot(dataset_scaled[0, :, 0], "b-", label='First time series')
plt.plot(dataset_scaled[1, :, 0], "g-", label='Second time series')

for positions in dtw_path:
    plt.plot([positions[0], positions[1]],
             [dataset_scaled[0, positions[0], 0], dataset_scaled[1, positions[1], 0]], color='orange')

plt.legend()
plt.title("Time series matching with DTW")

plt.tight_layout()
plt.show()

In [None]:
from extract import *

path = 'exiosuu_LTE_TALAGAKOCAK_GH_BXP_2053_telog'
fe = FileExtract(path)

In [None]:
from tslearn.metrics import dtw, dtw_path
from tslearn.metrics import lcss, lcss_path
import numpy
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt
from extract import *

def cal_lcss_path_and_score(s_y1, s_y2):
    path, score = lcss_path(s_y1, s_y2)
    return path, score

def cal_dtw_path_and_score(s_y1, s_y2):
    path, score = dtw_path(s_y1, s_y2)
    return path, score

with open(cf['ENV_'+env]['LOG_STORE_PATH'] + 'GLT_SUKAMULYA_CBN_CM_BXP_2051_telog.log_BXP_2051_radio6626_2022_10_10', "rb") as myfile:
    S = myfile.read()
story_a = json.loads(gzip.decompress(S))

with open(cf['ENV_'+env]['LOG_STORE_PATH'] + 'exiosuu_LTE_TALAGAKOCAK_GH_2052.log_BXP_2052_radio6626_2022_10_10', "rb") as myfile:
    S = myfile.read()
story_b = json.loads(gzip.decompress(S))

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from bisect import bisect

def NormalizeData(data):
    return (data - np.min(data)) / (np.max(data) - np.min(data))

def del_list_inplace(l, id_to_del):
    for i in sorted(id_to_del, reverse=True):
        del(l[i])

keyword = 'Pma(c)'
final = pd.DataFrame()
kv_a_global_indices = [int(item) for item in story_a['kv']['txlProcBranchH'][keyword][-1]]
kv_b_global_indices = [int(item) for item in story_b['kv']['txlProcBranchH'][keyword][-1]]

kv_a = [[i, kv_a_global_indices[i], float(item)] for i, item in enumerate(story_a['kv']['txlProcBranchH'][keyword][0])]
kv_b = [[i, kv_b_global_indices[i], float(item)] for i, item in enumerate(story_b['kv']['txlProcBranchH'][keyword][0])]

kv_a = pd.DataFrame(kv_a, columns=['x', 'global_index','value'])
kv_a['category'] = 'story_a'
kv_a['loop'] = 'origin'

kv_b = pd.DataFrame(kv_b, columns=['x', 'global_index','value'])
kv_b['category'] = 'story_b'
kv_b['loop'] = 'origin'

final = final.append(kv_a).reset_index(drop=True)
final = final.append(kv_b).reset_index(drop=True)

for loop,_ in enumerate(range(0, 2)):
    path, score = lcss_path(NormalizeData(list(kv_a.value.values)), NormalizeData(list(kv_b.value.values)), eps=0.1)
    o_a = [[i, kv_a['global_index'][item[0]], kv_a['value'][item[0]]] for i, item in enumerate(path)]
    o_b = [[i, kv_b['global_index'][item[1]], kv_b['value'][item[1]]] for i, item in enumerate(path)]
    tmp_a = pd.DataFrame(o_a, columns=['x', 'global_index','value'])
    tmp_a['category'] = 'story_a'
    tmp_a['loop'] = 'loop'+str(loop)
    tmp_b = pd.DataFrame(o_b, columns=['x', 'global_index','value'])
    tmp_b['category'] = 'story_b'
    tmp_b['loop'] = 'loop'+str(loop)
    final = final.append(tmp_a).reset_index(drop=True)
    final = final.append(tmp_b).reset_index(drop=True)

    kv_a = kv_a.drop(kv_a.index[ [item[0] for item in path] ]).reset_index(drop=True)
    kv_b = kv_b.drop(kv_b.index[ [item[1] for item in path] ]).reset_index(drop=True)
    
sns.relplot(
    data=final, kind="line",
    x="x", y="value", col="category", hue="loop")

In [None]:
tmp = final.loc[(final['loop'] != 'origin'), :].reset_index(drop=True)
sns.relplot(
    data=tmp, kind="line",
    x="x", y="value", col="category", hue="loop")

In [None]:
sns.relplot(
    data=final, kind="line",
    x="global_index", y="value", col="category", hue="loop")

In [None]:
process = 'txlProcBranchH'

highlight_story = {}
story_a_error_global_indices = []
for i, p in enumerate(story_a['inverted_index_table']['error']['process']):
    if p == process:
        story_a_error_global_indices.append(int(story_a['inverted_index_table']['error']['x'][i]))
        
story_b_error_global_indices = []
for i, p in enumerate(story_b['inverted_index_table']['error']['process']):
    if p == process:
        story_b_error_global_indices.append(int(story_b['inverted_index_table']['error']['x'][i]))

highlight_story = {'story_a':story_a_error_global_indices, 'story_b':story_b_error_global_indices}

In [None]:
res = []
for loop in set(final.loop.values):
    for story in set(final.category.values):
        gi = final.loc[(final['loop'] == loop)&(final['category'] == story), :].global_index.values
        print(loop, story, len(gi))
        tmp = []
        for index in highlight_story[story]:
            tmp.append(bisect(gi, index))
        res.append({'loop':loop, 'category':story, 'position': sorted(set(tmp), key=tmp.index)})
res

In [None]:
np.std(final.loc[(final['loop'] == 'loop1')&(final['category'] == 'story_a'), :].value.values)

In [None]:
np.std(final.loc[(final['loop'] == 'loop0')&(final['category'] == 'story_a'), :].value.values)

In [None]:
a = 132
b = [0, 10, 30, 60, 100, 150, 210, 280, 340, 480, 530]
print(bisect(b, a))

In [None]:
# 1.值范围 2.相似线段形态 3.highlight落在区间内 4.方差大

In [None]:
import re
import time

str1 = 'BXP_2: [2022-11-18 12:51:26.550407110] (+0.004950450) radio6626 com_ericsson_trithread:INFO: { cpu_id = 2 }, { process = "txlProcBranchI", fileAndLine = "dpdController.cc:1886", msg = "Gain started. Pma:-21.79[-41.54 -9.50] dB, DpdPma:-25.83[-26.49 -25.09] dB, Pmb:-21.79, TorPmb:-21.92[-56.79 -9.50] dB, avgTxPma:-inf dB, pmDpdIrqStat:0x00000000, pmScaleFactor: 65K" }\n'
regex = "\[%{TIMESTAMP:time}\] %{DROP:tmp}txAtt:%{INT:txAtt}, %{DROP:tmp}avgIMpa0:%{INT:avgIMpa0} "
# regex = "%{STRING:device}: \[%{TIMESTAMP:time}\] "
v_regex = regex
for i, r in enumerate(re.findall('%\{.*?\}', regex)):
    regex = regex.replace(r, '(.*?)')
    v_regex = v_regex.replace(r, '<font color="color:#FFFFFF">'+"\\"+str(i+1)+'</font>')

re.findall(regex, str1)

In [None]:
import uuid
import json #mcbPwr trxTxPwr

path = 'save_log/Visby_Telog_All_Branch_txAtt_torpmb_avgImpa.txt'
with open(path, 'r') as f:
    theme = json.loads(f.read())

In [None]:
import json
import uuid
branch = ['A','B','C','D','E','F','G','H','I','J','K','L']
config = {'search':[], 'chart':[], 'statistic':[]}
for index in range(12):
    item = {}
    item['namespace'] = '/' + str(uuid.uuid4()).replace('-','')
    item['alias'] = 'Branch' + str(branch[index])
    item['desc'] = 'Branch ' + str(branch[index]) + ' common use KeyValue and Mark'
    item['exp_search'] = '(txlProcBranch'+str(branch[index])+'|TxBranchCtrl'+str(branch[index])+').*(txAtt|linearization fault|external fault)'
    item['exp_extract'] = [
                        "{}[{timestamp:ti}] {}txAtt:{txAtt:d}, {}torTemperature:{torTemperature:d} {}avgIMpa0:{avgIMpa0:d} {}",
                        "{}[{timestamp:ti}] {}"
                        ]
    item['exp_mark'] = [{"alias":"LF","exp":"linearization fault","color":"#f00000"}]
    item['is_case_sensitive'] = True
    item['forward_rows'] = 0
    item['backward_rows'] = 0
    config['search'].append(item)

config['chart'] = []
config['statistic'] = []
    
# Serializing json
json_object = json.dumps(config)
# Writing to sample.json
with open("E:\\projects\\ericsson_flow\\new_files\\config.txt", "w") as outfile:
    outfile.write(json_object)

In [None]:
import uuid
branch = ['A','B','C','D','E','F','G','H','I','J','K','L']
config = {'search':[], 'keyValueTree':[]}
for index in range(12):
    item = {}
    item['uid'] = str(uuid.uuid4()).replace('-','')
    item['desc'] = 'PA measured values for driver name ' + 'Branch' + str(branch[index])
    item['search'] = 'PA & measured & driver'
    item['regexs'] = [
                        "\[%{TIMESTAMP:time}\] %{DROP:tmp}PaVddSv:"+str(index)+"; value: %{INT:PaVddSv"+str(index)+"}; ",
                        "\[%{TIMESTAMP:time}\] %{DROP:tmp}DpaVddSv:"+str(index)+"; value: %{INT:DpaVddSv"+str(index)+"}; ",
                        "\[%{TIMESTAMP:time}\] %{DROP:tmp}IMpaSv:"+str(index)+"\.0; value: %{INT:IMpaSv"+str(index)+"0}; ",
                        "\[%{TIMESTAMP:time}\] %{DROP:tmp}IMpaSv:"+str(index)+"\.1; value: %{INT:IMpaSv"+str(index)+"1}; ",
                        "\[%{TIMESTAMP:time}\] %{DROP:tmp}IDpaSv:"+str(index)+"\.0; value: %{INT:IDpaSv"+str(index)+"0}; ",
                        "\[%{TIMESTAMP:time}\] %{DROP:tmp}IDpaSv:"+str(index)+"\.1; value: %{INT:IDpaSv"+str(index)+"1}; ",
                        "\[%{TIMESTAMP:time}\] \(%{DROP:tmp}\) "
                        ]
    item['highlights'] = []
    config['search'].append(item)

In [None]:
re.findall('txPma +: (.*?),(.*?)txDpdPma +: (.*?),(.*?)', str1, flags=re.DOTALL)

In [None]:
from utils import *
from text_analysis import TextAnalysisModel

search_model = {
    "namespace": '',
    'alias': 'test',
    "desc": 'test search',
    "exp_search": '(txlProcBranchJ|TxBranchCtrlJ).*(txAtt|linearization fault|external fault)',
    "exp_extract": [
                    '{}[{timestamp:ti}] {}txAtt:{txAtt:d}, {}torTemperature:{torTemperature:d} {}avgIMpa0:{avgIMpa0:d} {}',
                    '{}[{timestamp:ti}] {}'
                    ],
    "exp_mark": [{'exp':'linearization fault', 'alias':'LF'}, {'exp':'external fault', 'alias':'EF'}],
}

statistic_model = {
    "namespace": '',
    'alias': 'statistic',
    "desc": 'test statistic',
    "exp": 'print(test.txAtt)',
}

insight_model = {
    "namespace": '/test',
    'alias': 'test',
    "exp_search": '(txlProcBranchJ|TxBranchCtrlJ).*(txAtt|linearization fault)',
    "exp_extract": '{}[{timestamp:ti}]{}, msg = {msg}',
    "exp_mark": {'exp':'linearization fault', 'alias':'LF'},
}

path = 'D:\\projects\\ericsson_flow\\new_files\\E55G948878_LE_SARONGGE_BXP_6_telog.log'
test_text_analysis_model = TextAnalysisModel()

text_file_model_namespace = test_text_analysis_model.file_container_model.namespace + '/'+ str(uuid.uuid4())
tes_text_analysis_model.file_container_model.on_new_file('', text_file_model_namespace, path)

text_file_model = tes_text_analysis_model.file_container_model.text_file_models[text_file_model_namespace]
model['namespace'] = text_file_model.text_file_function_model.search_function_model.namespace + '/'+ str(uuid.uuid4())
search_function_model = tes_text_analysis_model.file_container_model.text_file_models[text_file_model_namespace].text_file_function_model.search_function_model.unit_test(model)
search_atom_model = search_function_model.search_atom_models[model['namespace']].unit_test(model)

In [None]:
from utils import *
from text_analysis import InsightAtomModel

insight_model = {
    "namespace": '/test',
    'alias': 'test',
    "exp_search": '(txlProcBranchJ|TxBranchCtrlJ).*(txAtt|linearization fault)',
    "exp_extract": '{}[{timestamp:ti}]{}, msg = {msg}',
    "exp_mark": {'exp':'linearization fault', 'alias':'LF', 'color': '#f00000'},
}

path = 'D:\\projects\\ericsson_flow\\new_files\\E55G948878_LE_SARONGGE_BXP_6_telog.log'
with open(path, 'r') as f:
    lines = f.readlines()
text_file_model = json_to_object({'lines': lines})
            
insightAtomModel = InsightAtomModel('/test')
insightAtomModel.__dict__.update(insight_model)
insightAtomModel.text_file_model = text_file_model

In [None]:
insightAtomModel.insight()

In [None]:
insightAtomModel.outlier

In [None]:
type('111').__name__

In [None]:
for key in insightAtomModel.res_key_value.__dict__.keys():
    if (insightAtomModel.res_key_value.__dict__[key].type == 'str'):
        print(key, insightAtomModel.res_key_value.__dict__[key].value)

In [None]:
import re
from parse import parse

str1 = 'BXP_3: [2022-11-23 22:04:44.572174570] (+0.000333470) radio6626 com_ericsson_trithread:INFO: { cpu_id = 1 }, { process = "txlProcBranchB", fileAndLine = "dpdController.cc:1887", msg = "txAtt:289, txAttPeak:0, dpGainLoopEnable:true, dpGainCtrlType:DSA_AD_TXFE, torTemperature:495 (0.1C), torGainBackoff:0 (0.01dB), torGainLin:3.49543(0.01dB), torStepBit:6, cc0Ctrl1=0x00000118 , avgIMpa0:1980 [mAmp]" }'
str2 = 'BXP_3: [2022-12-10 15:55:26.739019220] (+0.000025340) radio6626 com_ericsson_trithread:INFO: { cpu_id = 1 }, { process = "TxBranchCtrlB", fileAndLine = "txChangeCycleHelper.cc:264", msg = "Txl branch J restart due to txL linearization fault!" }'
# str1 = "BXP_2: [221120 164014] 27: PA measured values for driver name: DpaVddSv:7; value: 26992; branch Id: 7"
# exp = "\[%{TIMESTAMP:time}\] \(%{STRING:cost}\) "
# exp = "\[%{TIMESTAMP:time}\] %{DROP:tmp}Pma:%{FLOAT:Pma0}\[%{DROP:tmp1}DpdPma:%{FLOAT:DpdPma0}\[%{DROP:tmp2}Pmb:%{FLOAT:Pmb}, TorPmb:%{FLOAT:TorPmb0}\[%{FLOAT:TorPmb1} %{FLOAT:TorPmb2}\] "
exp_search = '(txlProcBranchB|TxBranchCtrlB).*(txAtt|linearization fault)'
exp_extract = '{}[{timestamp:ti}] {}txAtt:{txAtt:d}, {}torTemperature:{torTemperature:d} {}avgIMpa0:{avgIMpa0:d} {}'
r_search = re.findall(exp_search, str1)
r_extract = parse(exp_extract, str1)
print(r_search)
print(r_extract.named)

In [1]:
from tslearn.metrics import dtw_path, lcss_path
import numpy as np

def cal_lcss_path_and_score(s_y1, s_y2):
    path, score = lcss_path(s_y1, s_y2)
    return path, score

Install h5py to use hdf5 features: http://docs.h5py.org/
  warn(h5py_msg)


In [4]:
a = [0,1,1,1,1,1,1,1,0,0,1,1,0,0,1,1]
b = [1,1,0,0]
dtw_path(a, b)

([(0, 0),
  (1, 0),
  (2, 0),
  (3, 0),
  (4, 0),
  (5, 0),
  (6, 0),
  (7, 0),
  (8, 0),
  (9, 0),
  (10, 0),
  (11, 1),
  (12, 2),
  (13, 2),
  (14, 2),
  (15, 3)],
 2.23606797749979)

In [None]:
lcss_path?

In [None]:
a= [0,100,0,100,0,100]
np.std(a)

In [None]:
np.std(b)

In [None]:
class A():
    def __init__(self):
        self.a = 1
        self.b = 2
        
    def __dict__(self):
        return 
        
    def test(self, num):
        print(num)

class B(A):
    def __init__(self):
        super().__init__()
        self.c = 3
        self.d = {'ins': ''}
        
class PubSub():
    def __init__(self):
        self.room = {}
        self.wait = {}

    def reference(self, namespace):
        return self.room[namespace]['ins']

    def book(self, namespace, ins, func):
        self.room[namespace] = {'ins': ins, 'subscriber': [], 'action': func}
        if namespace in self.wait:
            for func in self.wait[namespace]:
                self.room[namespace]['subscriber'].append(func)
            del self.wait[namespace]
        
# a = A()
b = B()
c = B()
d = PubSub()
e = d.reference

In [None]:
import re
from parse import parse

def key_value_replace(word):
    print(word.group(0), word.group(1), word.group(2))
    return ''
    
def clean_special_symbols(text, symbol):
    for ch in ['::', '/','*','{','}','[',']','(',')','#','+','-','!',';',',','"','\'','>','<','@','`','$','^','&','|','\n']:
        if ch in text:
            text = text.replace(ch,symbol)
    return re.sub(symbol+"+", symbol, text)

msg = 'BXP_3: [2022-11-23 22:04:44.572174570] (+0.000333470) radio6626 com_ericsson_trithread:INFO: { cpu_id = 1 }, { process = "txlProcBranchB", fileAndLine = "dpdController.cc:1887", msg = "txAtt:289, txAttPeak:0, dpGainLoopEnable:true, dpGainCtrlType:DSA_AD_TXFE, torTemperature:495 (0.1C), torGainBackoff:0 (0.01dB), torGainLin:3.49543(0.01dB), torStepBit:6, cc0Ctrl1=0x00000118 , avgIMpa0:1980 [mAmp]" }'
# msg = 'BXP_5: [2022-12-10 02:23:19.912787758] (+0.000603078) radio6626 com_ericsson_trithread:INFO: { cpu_id = 2 }, { process = "txlProcBranchH", fileAndLine = "dpdController.cc:1886", msg = "Wait for data. Pma:-17.49[-41.54 -9.50] dB, DpdPma:-21.52[-22.19 -20.79] dB, Pmb:-17.49, TorPmb:-27.69[-52.49 -9.50] dB, avgTxPma:-inf dB, pmDpdIrqStat:0x00000000, pmScaleFactor: 4K" }'
msg = parse('{}[{timestamp:ti}]{}, msg = {msg}', msg)
msg = clean_special_symbols(msg.named['msg'], ' ')

# for k, v in re.findall('([A-Za-z0-9_.]+?)[ ]?[:=][ ]?(.*?) ', msg):
#     print(k, v)

re.sub('([A-Za-z0-9_.]+?)[ ]?[:=][ ]?(.*?) ', key_value_replace, msg)

In [None]:
msg.named

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import UnivariateSpline
from scipy.ndimage import gaussian_filter1d

r = 66
x = np.array(list(data.x.values)[0:r])
y = np.array(list(data.y.values)[0:r])

In [None]:
y

In [None]:
def reject_outliers(data, m = 2.):
    d = np.abs(data - np.median(data))
    mdev = np.median(d)
    s = d/mdev if mdev else 0.
    return data[s<m]

reject_outliers()

In [None]:
def reject_outliers(data, m=2):
    return data[abs(data - np.mean(data)) < m * np.std(data)]

reject_outliers(y)

In [None]:
y_spl = UnivariateSpline(x,y,s=0,k=4)

plt.semilogy(x,y,'ro',label = 'data')
x_range = np.linspace(x[0],x[-1],1000)
plt.semilogy(x_range,y_spl(x_range))