In [1]:
import numpy as np
import math
import re
import pandas as pd
from bs4 import BeautifulSoup
import random

from google.colab import drive

In [2]:
!pip install bert-for-tf2
!pip install sentencepiece

Collecting bert-for-tf2
[?25l  Downloading https://files.pythonhosted.org/packages/a5/a1/acb891630749c56901e770a34d6bac8a509a367dd74a05daf7306952e910/bert-for-tf2-0.14.9.tar.gz (41kB)
[K     |████████                        | 10kB 12.5MB/s eta 0:00:01[K     |████████████████                | 20kB 13.2MB/s eta 0:00:01[K     |███████████████████████▉        | 30kB 9.6MB/s eta 0:00:01[K     |███████████████████████████████▉| 40kB 8.8MB/s eta 0:00:01[K     |████████████████████████████████| 51kB 4.7MB/s 
[?25hCollecting py-params>=0.9.6
  Downloading https://files.pythonhosted.org/packages/75/2c/2256f28ef35946682ce703e69de914773c3f62048f4de6966d4e2dc1930a/py-params-0.10.1.tar.gz
Collecting params-flow>=0.8.0
  Downloading https://files.pythonhosted.org/packages/a9/95/ff49f5ebd501f142a6f0aaf42bcfd1c192dc54909d1d9eb84ab031d46056/params-flow-0.8.2.tar.gz
Building wheels for collected packages: bert-for-tf2, py-params, params-flow
  Building wheel for bert-for-tf2 (setup.py) ... [

In [3]:
try:
    %tensorflow_version 2.x
except Exception:
    pass
import tensorflow as tf

import tensorflow_hub as hub

from tensorflow.keras import layers
import bert

In [4]:
drive.mount("/content/drive")

Mounted at /content/drive


In [5]:
cols = ["Error", "Category"]
data = pd.read_csv(
    "/content/drive/My Drive/Colab Notebooks/error/eng_error_with_cat.csv",
    header=None,
    names=cols,
    engine="python",
    encoding="latin1"
)

In [6]:
def clean_tweet(tweet):
    tweet = BeautifulSoup(tweet, "lxml").get_text()
    # Removing the @
    tweet = re.sub(r"@[A-Za-z0-9]+", ' ', tweet)
    # Removing the URL links
    tweet = re.sub(r"https?://[A-Za-z0-9./]+", ' ', tweet)
    # Keeping only letters
    tweet = re.sub(r"[^a-zA-Z.!?']", ' ', tweet)
    # Removing additional whitespaces
    tweet = re.sub(r" +", ' ', tweet)
    return tweet

In [8]:
print(data['Error'][0])
print(clean_tweet(data['Error'][0]))

docker: Error response from daemon: driver failed programming external connectivity on endpoint elastic_williams (d41be30d46cae4865b3fb513c280fca4dd080aa3252c8e0984dad8c96894605d): Error starting userland proxy: listen tcp 0.0.0.0:8000: bind: address already in use.
docker Error response from daemon driver failed programming external connectivity on endpoint elastic williams d be d cae b fb c fca dd aa c e dad c d Error starting userland proxy listen tcp . . . bind address already in use.


In [None]:
data_clean = [clean_tweet(tweet) for tweet in data.Error]

In [None]:
data_labels = data.Category.values

In [None]:
FullTokenizer = bert.bert_tokenization.FullTokenizer
bert_layer = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/1",
                            trainable=False)
vocab_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()
do_lower_case = bert_layer.resolved_object.do_lower_case.numpy()
tokenizer = FullTokenizer(vocab_file, do_lower_case)

In [None]:
def encode_sentence(sent):
    return tokenizer.convert_tokens_to_ids(tokenizer.tokenize(sent))

In [None]:
data_inputs = [encode_sentence(sentence) for sentence in data_clean]

In [None]:
data_with_len = [[sent, data_labels[i], len(sent)]
                 for i, sent in enumerate(data_inputs)]
random.shuffle(data_with_len)
data_with_len.sort(key=lambda x: x[2])
sorted_all = [(sent_lab[0], sent_lab[1])
              for sent_lab in data_with_len if sent_lab[2] > 7]

In [None]:
all_dataset = tf.data.Dataset.from_generator(lambda: sorted_all,
                                             output_types=(tf.int32, tf.int32))

In [None]:
BATCH_SIZE = 32
all_batched = all_dataset.padded_batch(BATCH_SIZE, padded_shapes=((None, ), ()))

In [None]:
NB_BATCHES = math.ceil(len(sorted_all) / BATCH_SIZE)
NB_BATCHES_TEST = NB_BATCHES // 10
all_batched.shuffle(NB_BATCHES)
test_dataset = all_batched.take(NB_BATCHES_TEST)
train_dataset = all_batched.skip(NB_BATCHES_TEST)

In [None]:
class DCNN(tf.keras.Model):
    
    def __init__(self,
                 vocab_size,
                 emb_dim=128,
                 nb_filters=50,
                 FFN_units=512,
                 nb_classes=2,
                 dropout_rate=0.1,
                 training=False,
                 name="dcnn"):
        super(DCNN, self).__init__(name=name)
        
        self.embedding = layers.Embedding(vocab_size,
                                          emb_dim)
        self.bigram = layers.Conv1D(filters=nb_filters,
                                    kernel_size=2,
                                    padding="valid",
                                    activation="relu")
        self.trigram = layers.Conv1D(filters=nb_filters,
                                     kernel_size=3,
                                     padding="valid",
                                     activation="relu")
        self.fourgram = layers.Conv1D(filters=nb_filters,
                                      kernel_size=4,
                                      padding="valid",
                                      activation="relu")
        self.pool = layers.GlobalMaxPool1D()
        
        self.dense_1 = layers.Dense(units=FFN_units, activation="relu")
        self.dropout = layers.Dropout(rate=dropout_rate)
        if nb_classes == 2:
            self.last_dense = layers.Dense(units=1,
                                           activation="sigmoid")
        else:
            self.last_dense = layers.Dense(units=nb_classes,
                                           activation="softmax")
    
    def call(self, inputs, training):
        x = self.embedding(inputs)
        x_1 = self.bigram(x) # batch_size, nb_filters, seq_len-1)
        x_1 = self.pool(x_1) # (batch_size, nb_filters)
        x_2 = self.trigram(x) # batch_size, nb_filters, seq_len-2)
        x_2 = self.pool(x_2) # (batch_size, nb_filters)
        x_3 = self.fourgram(x) # batch_size, nb_filters, seq_len-3)
        x_3 = self.pool(x_3) # (batch_size, nb_filters)
        
        merged = tf.concat([x_1, x_2, x_3], axis=-1) # (batch_size, 3 * nb_filters)
        merged = self.dense_1(merged)
        merged = self.dropout(merged, training)
        output = self.last_dense(merged)
        
        return output

In [None]:
VOCAB_SIZE = len(tokenizer.vocab)
EMB_DIM = 200
NB_FILTERS = 100
FFN_UNITS = 256
NB_CLASSES = 4

DROPOUT_RATE = 0.2

NB_EPOCHS = 5

In [None]:
Dcnn = DCNN(vocab_size=VOCAB_SIZE,
            emb_dim=EMB_DIM,
            nb_filters=NB_FILTERS,
            FFN_units=FFN_UNITS,
            nb_classes=NB_CLASSES,
            dropout_rate=DROPOUT_RATE)

In [None]:
if NB_CLASSES == 2:
    Dcnn.compile(loss="binary_crossentropy",
                 optimizer="adam",
                 metrics=["accuracy"])
else:
    Dcnn.compile(loss="sparse_categorical_crossentropy",
                 optimizer="adam",
                 metrics=["sparse_categorical_accuracy"])

In [None]:
checkpoint_path = "./drive/My Drive/Colab Notebooks/error/new_ckp"

ckpt = tf.train.Checkpoint(Dcnn=Dcnn)

ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=1)

if ckpt_manager.latest_checkpoint:
    ckpt.restore(ckpt_manager.latest_checkpoint)
    print("Latest checkpoint restored!!")

In [None]:
class MyCustomCallback(tf.keras.callbacks.Callback):

    def on_epoch_end(self, epoch, logs=None):
        ckpt_manager.save()
        print("Checkpoint saved at {}.".format(checkpoint_path))

In [None]:
Dcnn.fit(train_dataset,
         epochs=NB_EPOCHS,
         callbacks=[MyCustomCallback()])

Epoch 1/5
Checkpoint saved at ./drive/My Drive/Colab Notebooks/error/new_ckp.
Epoch 2/5
Checkpoint saved at ./drive/My Drive/Colab Notebooks/error/new_ckp.
Epoch 3/5
Checkpoint saved at ./drive/My Drive/Colab Notebooks/error/new_ckp.
Epoch 4/5
Checkpoint saved at ./drive/My Drive/Colab Notebooks/error/new_ckp.
Epoch 5/5
Checkpoint saved at ./drive/My Drive/Colab Notebooks/error/new_ckp.


<tensorflow.python.keras.callbacks.History at 0x7f2656881b70>

In [None]:
export_path = './drive/My Drive/Colab Notebooks/error/new_ckp/save_error_cnn.pb'
Dcnn.save(export_path, save_format='tf')

INFO:tensorflow:Assets written to: ./drive/My Drive/Colab Notebooks/error/new_ckp/save_error_cnn.pb/assets


INFO:tensorflow:Assets written to: ./drive/My Drive/Colab Notebooks/error/new_ckp/save_error_cnn.pb/assets


In [None]:
export_path = 'new_ckp/1'
weights_path = './drive/My Drive/Colab Notebooks/error/new_ckp/save_error_cnn.h5'
load_model = Dcnn.load_weights(weights_path)

In [None]:
from tensorflow import keras
model = keras.models.load_model(weights_path)

ValueError: ignored

In [None]:
def get_prediction(sentence):
    tokens = encode_sentence(sentence)
    #print(tokenizer.convert_ids_to_tokens(tokens))
    inputs = tf.expand_dims(tokens, 0)

    output = Dcnn(inputs, training=False)
    #sentiment = math.floor(output*2)
    sentiment = np.argmax(output)

    print('0: Port Error, 1: Pod Error, 2: No Network Access, 3: Application Failure')
    if sentiment == 0:
        print("Ouput of the model: {}\nPredicted Port Error.".format(
            output))
    elif sentiment == 1:
        print("Ouput of the model: {}\nPredicted Pod Error.".format(
            output))
    elif sentiment == 2:
        print("Ouput of the model: {}\nPredicted No Network Access Error.".format(
            output))
    elif sentiment == 3:
        print("Ouput of the model: {}\nPredicted Application Failure Error.".format(
            output))
    if np.max(output) < 0.5:
      print('but unstable')

In [None]:
def clean_input(question):
  words = question.split()
  tmp = ""
  for word in words:
    if word.isalpha():
      print(word)
      tmp = tmp + word + " "
  print(tmp)
  return tmp

In [None]:
tmp = clean_tweet('갑자기 kubectl 명령어가 안먹네요. 명령어 not found 납니다.')
en = encode_sentence(tmp)
print(tmp.split())
print(len(en))
print(en)

['kubectl', '.', 'not', 'found', '.']
8
[13970, 4783, 6593, 2140, 1012, 2025, 2179, 1012]


In [None]:
question = "안녕하세여 제 이름은 이진수 입니다 도대체 이런 에러가 왜 나오는 걸까요. listen tcp 0.0.0.0:8085: bind: address already in use. 이런 에러가 나오는데 도와주세요."
clean_question = clean_tweet(question)
#print(clean_question)
print('with cleansing: ')
get_prediction(clean_question)
print("")
print('without cleansing: ')
get_prediction(question)

with cleansing: 
0: Port Error, 1: Pod Error, 2: No Network Access, 3: Application Failure
Ouput of the model: [[0.80445087 0.04733028 0.12795943 0.02025938]]
Predicted Port Error.

without cleansing: 
0: Port Error, 1: Pod Error, 2: No Network Access, 3: Application Failure
Ouput of the model: [[0.8145586  0.06794474 0.08773543 0.02976128]]
Predicted Port Error.


In [None]:
question = "안녕하세여 제 이름은 이진수 입니다 도대체 이런 에러가 왜 나오는 걸까요. [ERROR] Failed to execute goal org.springframework.boot:spring-boot-maven-plugin:2.1.1.RELEASE:run (default-cli) \
on project boot-camp-delivery: An exception occurred while running. null: InvocationTargetException: Connector configured to listen on port 8082 failed to start -> [Help 1] \
이런 에러가 나오는데 도와주세요."
clean_question = clean_tweet(question)
#print(clean_question)
print('with cleansing: ')
get_prediction(clean_question)
print("")
print('without cleansing: ')
get_prediction(question)

with cleansing: 
0: Port Error, 1: Pod Error, 2: No Network Access, 3: Application Failure
Ouput of the model: [[9.9965465e-01 2.2034482e-04 9.6163254e-05 2.8851946e-05]]
Predicted Port Error.

without cleansing: 
0: Port Error, 1: Pod Error, 2: No Network Access, 3: Application Failure
Ouput of the model: [[9.9389267e-01 4.0482557e-03 1.5529150e-03 5.0620188e-04]]
Predicted Port Error.


In [None]:
get_prediction("alkjblijij lkjlaidj alkbkjiekl dij listen tcp 0.0.0.0:8085: bind: address already in use. lkdiajlkfsjie ")

0: Port Error, 1: Pod Error, 2: No Network Access, 3: Application Failure
Ouput of the model: [[0.82105136 0.07370572 0.07236295 0.03287995]]
Predicted Port Error.


In [None]:
get_prediction("askdii kigi kknaksndi lwlkjoifj lk,xmlkmoifl PullImage 'jimmy3553:0.10' from image service failed: rpc error: code = Unknown desc lkjsijk,xjoielk ")

0: Port Error, 1: Pod Error, 2: No Network Access, 3: Application Failure
Ouput of the model: [[0.02292391 0.9579741  0.005605   0.01349702]]
Predicted Pod Error.


In [None]:
get_prediction("NameError: name 'jimmy366377372' is not defined")

0: Port Error, 1: Pod Error, 2: No Network Access, 3: Application Failure
Ouput of the model: [[1.8408115e-03 3.5046900e-03 6.4135384e-04 9.9401313e-01]]
Predicted Application Failure Error.


In [None]:
get_prediction("Error getting metrics from dnsmasq ")

0: Port Error, 1: Pod Error, 2: No Network Access, 3: Application Failure
Ouput of the model: [[0.01210862 0.01220064 0.9672984  0.00839228]]
Predicted No Network Access Error.


In [None]:
get_prediction("Error while fetching metadata with correlation id 22 : {shop=LEADER_NOT_AVAILABLE} (org.apache.kafka.clients.NetworkClient)") # 이건 학습 x된 에러

0: Port Error, 1: Pod Error, 2: No Network Access, 3: Application Failure
Ouput of the model: [[0.42923784 0.24500684 0.19951493 0.12624045]]
Predicted Port Error.
but unstable


In [None]:
get_prediction("cars apples bananamuchim hello my name is Error getting metrics from dnsmasq: read udp 127.0.0.1:53159->127.0.0.1:53: read: connection refused \
is here for us")

0: Port Error, 1: Pod Error, 2: No Network Access, 3: Application Failure
Ouput of the model: [[0.0184464  0.00704717 0.962921   0.01158539]]
Predicted No Network Access Error.


In [None]:
q = input()
get_prediction(q)

hello my naem is jinsu lee i have question about you
0: Port Error, 1: Pod Error, 2: No Network Access, 3: Application Failure
Ouput of the model: [[0.41026315 0.15011324 0.16062963 0.27899393]]
Predicted Port Error.
but unstable
