Predict the score (pass/fail) for a Jannah.io Boot Deployment Using Log Files

In [5]:
import collections
import pathlib
import random

import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras import utils
from tensorflow.keras.layers import TextVectorization

import tensorflow_datasets as tfds
import tensorflow_text as tf_text

In [None]:
#papermill_description=Determine_Directory_Paths_For_Sorting_Log_Files
Jannah_Config = provisioner["inventory"]["group_vars"]["all"]["Jannah"]
MOLECULE_EPHEMERAL_DIRECTORY = provisioner["env"]["MOLECULE_EPHEMERAL_DIRECTORY"]
Logs_input = f"{MOLECULE_EPHEMERAL_DIRECTORY}/logs"
Logs_train_input = f"{Logs_input}/train"
Logs_test_input = f"{Logs_input}/test"
Logs_train_input_pass_deployment = f"{Logs_train_input}/pass"
Logs_train_input_fail_deployment = f"{Logs_train_input}/fail"
Logs_test_input_pass_deployment = f"{Logs_test_input}/pass"
Logs_test_input_fail_deployment = f"{Logs_test_input}/fail"
sorted_deployment_log_paths = {
    "train": {
                "pass": Logs_train_input_pass_deployment,
                "fail": Logs_train_input_fail_deployment
    },
    "test": {
            "pass": Logs_test_input_pass_deployment,
            "fail": Logs_test_input_fail_deployment
    }
}

In [None]:
#papermill_description=Create_Directories_For_Sorting_Log_Files
!mkdir -vp $Logs_train_input_pass_deployment/
!mkdir -vp $Logs_train_input_fail_deployment/
!mkdir -vp $Logs_test_input_pass_deployment/
!mkdir -vp $Logs_test_input_fail_deployment

In [None]:
#papermill_description=Copy_Log_Files_For_Processing
!cp -rp ~/jannah-operator/*.log $Logs_input/

In [None]:
#papermill_description=Glob_Log_Files
log_files  = pathlib.Path(Logs_input).glob("*.log")

In [None]:
#TODO: Note: To increase the difficulty of the classification problem, th replaced occurrences of the phrases "All assertions passed", "Assertion failed" from buffer
#      and write buffer back to file.

In [None]:
#papermill_description=Determine_Pass_or_Fail_Status_For_Each_Deployment_File
_FILE_BUF = ""
pass_deployments = []
failed_deployments = []
for _file in sorted((log_files)):
    with _file.open() as fd:
        _FILE_BUF = fd.read()
        if "All assertions passed" in _FILE_BUF and "Assertion failed" not in _FILE_BUF:
            pass_deployments.append(_file)
        else:
            failed_deployments.append(_file)

In [None]:
#papermill_description=Print_Pass_Deployment_Counts
len(pass_deployments)

In [None]:
#papermill_description=Print_Failed_Deployment_Counts
len(failed_deployments)

In [None]:
#papermill_description=Sort_Pass_Deployments_Into_Train_or_Test_Directories
import random
for _file in pass_deployments:
    train_or_test_group = random.choices(population=['train','test'], weights=[80,20]).pop()
    _new_file_name = _file.name.replace(".log",".txt")
    _new_file = f"{sorted_deployment_log_paths[train_or_test_group]['pass']}/{_new_file_name}"
    _file.rename(_new_file)

In [None]:
#papermill_description=Sort_Fail_Deployments_Into_Train_or_Test_Directories
import random
for _file in failed_deployments:
    train_or_test_group = random.choices(population=['train','test'], weights=[80,20]).pop()
    _new_file_name = _file.name.replace(".log",".txt")
    _new_file = f"{sorted_deployment_log_paths[train_or_test_group]['fail']}/{_new_file_name}"
    _file.rename(_new_file)

In [None]:
#papermill_description=Text_Dataset_From_Directory
train_dir=Logs_train_input
batch_size = 32
seed = 80

raw_train_ds = utils.text_dataset_from_directory(
    train_dir,
    batch_size=batch_size,
    validation_split=0.2,
    subset='training',
    seed=seed)

In [None]:
#papermill_description=Print_a_Sample_Line
for text_batch, label_batch in raw_train_ds.take(1):
  for i in range(10):
    print("Log Line: ", text_batch.numpy()[i])
    print("Label:", label_batch.numpy()[i])

In [None]:
#papermill_description=Print_the_Labels
for i, label in enumerate(raw_train_ds.class_names):
  print("Label", i, "corresponds to", label)

In [None]:
#papermill_description=Create_a_validation_set.
raw_val_ds = utils.text_dataset_from_directory(
    train_dir,
    batch_size=batch_size,
    validation_split=0.2,
    subset='validation',
    seed=seed)

In [None]:
#papermill_description=Create_a_test_set
test_dir = Logs_test_input
raw_test_ds = utils.text_dataset_from_directory(
    test_dir,
    batch_size=batch_size)

In [None]:
#papermill_description=Configure_the_datasets_for_performance
raw_train_ds = raw_train_ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)
raw_val_ds = raw_val_ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)
raw_test_ds = raw_test_ds.prefetch(buffer_size=tf.data.AUTOTUNE)