#  Incident Root Cause Analysis for DB ORA Errors

## Install required libraries

In [1]:
import sys
pip install pandas tensorflow scikit-learn --user

Note: you may need to restart the kernel to use updated packages.


## Importing required libraries

In [29]:
# Importing required libraries
import pandas as pd
import os
import tensorflow as tf
import time

## Loading the Dataset

In [3]:
# Load the dataset into a Pandas Dataframe
incidents = pd.read_csv("rca.csv")

# Exploring the loaded data
print(incidents.dtypes)
incidents.head()

Row No          int64
CPU_Load        int64
Memory_Load     int64
Latency         int64
ORA00600        int64
ORA07445        int64
ORA03113        int64
ORA12154        int64
RCA            object
dtype: object


Unnamed: 0,Row No,CPU_Load,Memory_Load,Latency,ORA00600,ORA07445,ORA03113,ORA12154,RCA
0,1,0,0,0,0,1,0,1,Low Memory
1,2,0,0,0,0,0,0,1,Low Memory
2,3,0,1,1,0,0,1,1,Low Memory
3,4,0,1,0,1,1,0,1,Low Memory
4,5,1,1,0,1,0,1,0,Network Latency


## Converting the Data

In [4]:
from sklearn import preprocessing

label_encoder = preprocessing.LabelEncoder()
incidents['RCA'] = label_encoder.fit_transform(
                                incidents['RCA'])

# Convert Pandas DataFrame to a numpy vector
np_symptom = incidents.to_numpy().astype(float)

# Extract the feature variables (X)
X_train = np_symptom[:,1:8]

# Extract the target variable (Y), convert to one-hot-encoding
Y_train=np_symptom[:,8]
Y_train = tf.keras.utils.to_categorical(Y_train,3)

print("Shape of feature variables :", X_train.shape)
print("Shape of target variable :",Y_train.shape)

Shape of feature variables : (1000, 7)
Shape of target variable : (1000, 3)


## Building the Deep Learning Model with Keras

### Model 1

In [43]:
from tensorflow import keras
from tensorflow.keras import optimizers
from tensorflow.keras.regularizers import l2
from sklearn.metrics import confusion_matrix

# Setup Training Parameters
EPOCHS=20
BATCH_SIZE=100
VERBOSE=1
OUTPUT_CLASSES=len(label_encoder.classes_)
N_HIDDEN=4
VALIDATION_SPLIT=0.2

#Create a Keras sequential model
model = tf.keras.models.Sequential()
#Add a Dense Layer
model.add(keras.layers.Dense(N_HIDDEN,
                             input_shape=(7,),
                              name='Dense-Layer-1',
                              activation='relu'))

# Add a second dense layer
model.add(keras.layers.Dense(N_HIDDEN,
                              name='Dense-Layer-2',
                              activation='relu'))

# Add a softmax layer for categorial prediction
model.add(keras.layers.Dense(OUTPUT_CLASSES,
                             name='Final',
                             activation='softmax'))

# Compile the model, using Adam optimizer
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Build the model
model.fit(X_train,
          Y_train,
          batch_size=BATCH_SIZE,
          epochs=EPOCHS,
          verbose=VERBOSE,
          validation_split=VALIDATION_SPLIT)

model.summary()

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Dense-Layer-1 (Dense)        (None, 4)                 32        
_________________________________________________________________
Dense-Layer-2 (Dense)        (None, 4)                 20        
_________________________________________________________________
Final (Dense)                (None, 3)                 15        
Total params: 67
Trainable params: 67
Non-trainable params: 0
_________________________________________________________________


## Performing Root Cause Analysis

In [7]:
# Predincting First Incident
CPU_Load=0
Memory_Load=1
Latency=1
ORA00600=0
ORA07445=1
ORA03113=0
ORA12154=0

prediction=model.predict_classes(
    [[CPU_Load,Memory_Load,Latency,
      ORA00600,ORA07445,ORA03113,ORA12154]])

print(label_encoder.inverse_transform(prediction))

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).
['Network Latency']


In [44]:
# Predincting Second Incident
CPU_Load=1
Memory_Load=1
Latency=1
ORA00600=0
#ORA07445=1
#ORA03113=0
ORA12154=1

prediction=model.predict_classes(
    [[CPU_Load,Memory_Load,Latency,
      ORA00600,ORA07445,ORA03113,ORA12154]])

confusion_matrix(label_encoder.inverse_transform(prediction), prediction)
print(label_encoder.inverse_transform(prediction))

TypeError: '<' not supported between instances of 'int' and 'str'

In [9]:
# Predicting Several Incidents
print(label_encoder.inverse_transform(
        model.predict_classes([[1,0,0,0,1,1,0],
                                [0,1,0,1,0,0,0],
                                [1,1,0,1,1,0,1],
                                [0,0,0,0,0,1,0],
                                [1,0,1,0,1,1,1]])))

['Low Memory' 'Low Memory' 'Low Memory' 'Database Bug' 'Low Memory']


### Model 2

In [10]:
from tensorflow import keras
from tensorflow.keras import optimizers
from tensorflow.keras.regularizers import l2

# Setup Training Parameters
EPOCHS=10
BATCH_SIZE=50
VERBOSE=1
OUTPUT_CLASSES=len(label_encoder.classes_)
N_HIDDEN=15
VALIDATION_SPLIT=0.2

#Create a Keras sequential model
model2 = tf.keras.models.Sequential()
#Add a Dense Layer
model2.add(keras.layers.Dense(N_HIDDEN,
                             input_shape=(7,),
                              name='Dense-Layer-1',
                              activation='selu'))

# Add a second dense layer
model2.add(keras.layers.Dense(N_HIDDEN,
                              name='Dense-Layer-2',
                              activation='selu'))

# Add a thirds dense layer
model2.add(keras.layers.Dense(N_HIDDEN,
                              name='Dense-Layer-3',
                              activation='selu'))

# Add a softmax layer for categorial prediction
model2.add(keras.layers.Dense(OUTPUT_CLASSES,
                             name='Final',
                             activation='softmax'))

# Compile the model, using Adam optimizer
model2.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Build the model
model2.fit(X_train,
          Y_train,
          batch_size=BATCH_SIZE,
          epochs=EPOCHS,
          verbose=VERBOSE,
          validation_split=VALIDATION_SPLIT)

model2.summary()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Dense-Layer-1 (Dense)        (None, 15)                120       
_________________________________________________________________
Dense-Layer-2 (Dense)        (None, 15)                240       
_________________________________________________________________
Dense-Layer-3 (Dense)        (None, 15)                240       
_________________________________________________________________
Final (Dense)                (None, 3)                 48        
Total params: 648
Trainable params: 648
Non-trainable params: 0
_________________________________________________________________


In [11]:
# Predincting First Incident
CPU_Load=0
Memory_Load=1
Latency=1
ORA00600=0
ORA07445=1
ORA03113=0
ORA12154=0

prediction=model2.predict_classes(
    [[CPU_Load,Memory_Load,Latency,
      ORA00600,ORA07445,ORA03113,ORA12154]])

print(label_encoder.inverse_transform(prediction))

['Network Latency']


In [12]:
# Predincting Second Incident
CPU_Load=1
Memory_Load=1
Latency=1
ORA00600=0
ORA07445=1
ORA03113=0
ORA12154=1

prediction=model2.predict_classes(
    [[CPU_Load,Memory_Load,Latency,
      ORA00600,ORA07445,ORA03113,ORA12154]])

print(label_encoder.inverse_transform(prediction))

['Low Memory']


In [13]:
# Predicting Several Incidents
print(label_encoder.inverse_transform(
        model2.predict_classes([[1,0,0,0,1,1,0],
                                [0,1,0,1,0,0,0],
                                [1,1,0,1,1,0,1],
                                [0,0,0,0,0,1,0],
                                [1,0,1,0,1,1,1]])))

['Database Bug' 'Low Memory' 'Low Memory' 'Database Bug' 'Database Bug']


### Model 3

In [14]:
from tensorflow import keras
from tensorflow.keras import optimizers
from tensorflow.keras.regularizers import l2

# Setup Training Parameters
EPOCHS=5
BATCH_SIZE=25
VERBOSE=1
OUTPUT_CLASSES=len(label_encoder.classes_)
N_HIDDEN=128
VALIDATION_SPLIT=0.2

#Create a Keras sequential model
model3 = tf.keras.models.Sequential()
#Add a Dense Layer
model3.add(keras.layers.Dense(N_HIDDEN,
                             input_shape=(7,),
                              name='Dense-Layer-1',
                              activation='relu'))

# Add a second dense layer
model3.add(keras.layers.Dense(N_HIDDEN,
                              name='Dense-Layer-2',
                              activation='relu'))

# Add a thirds dense layer
model3.add(keras.layers.Dense(N_HIDDEN,
                              name='Dense-Layer-3',
                              activation='relu'))

# Add a softmax layer for categorial prediction
model3.add(keras.layers.Dense(OUTPUT_CLASSES,
                             name='Final',
                             activation='softmax'))

# Compile the model, using Adam optimizer
model3.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Build the model
model3.fit(X_train,
          Y_train,
          batch_size=BATCH_SIZE,
          epochs=EPOCHS,
          verbose=VERBOSE,
          validation_split=VALIDATION_SPLIT)

model3.summary()

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Dense-Layer-1 (Dense)        (None, 128)               1024      
_________________________________________________________________
Dense-Layer-2 (Dense)        (None, 128)               16512     
_________________________________________________________________
Dense-Layer-3 (Dense)        (None, 128)               16512     
_________________________________________________________________
Final (Dense)                (None, 3)                 387       
Total params: 34,435
Trainable params: 34,435
Non-trainable params: 0
_________________________________________________________________


In [15]:
# Predincting First Incident
CPU_Load=0
Memory_Load=1
Latency=1
ORA00600=0
ORA07445=1
ORA03113=0
ORA12154=0

prediction=model3.predict_classes(
    [[CPU_Load,Memory_Load,Latency,
      ORA00600,ORA07445,ORA03113,ORA12154]])

print(label_encoder.inverse_transform(prediction))

['Network Latency']


In [16]:
# Predincting Second Incident
CPU_Load=1
Memory_Load=1
Latency=1
ORA00600=0
ORA07445=1
ORA03113=0
ORA12154=1

prediction=model3.predict_classes(
    [[CPU_Load,Memory_Load,Latency,
      ORA00600,ORA07445,ORA03113,ORA12154]])

print(label_encoder.inverse_transform(prediction))

['Low Memory']


In [17]:
# Predicting Several Incidents
print(label_encoder.inverse_transform(
        model3.predict_classes([[1,0,0,0,1,1,0],
                                [0,1,0,1,0,0,0],
                                [1,1,0,1,1,0,1],
                                [0,0,0,0,0,1,0],
                                [1,0,1,0,1,1,1]])))

['Database Bug' 'Low Memory' 'Low Memory' 'Database Bug' 'Database Bug']
