<a href="https://colab.research.google.com/github/alexmacharia/deep_learning_projects/blob/main/Incident_Root_Cause_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [2]:
# Import libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split


In [3]:
# Load the data

df = pd.read_csv('/content/gdrive/MyDrive/deep_learning/data/root_cause_analysis.csv')

df.head()

Unnamed: 0,ID,CPU_LOAD,MEMORY_LEAK_LOAD,DELAY,ERROR_1000,ERROR_1001,ERROR_1002,ERROR_1003,ROOT_CAUSE
0,1,0,0,0,0,1,0,1,MEMORY_LEAK
1,2,0,0,0,0,0,0,1,MEMORY_LEAK
2,3,0,1,1,0,0,1,1,MEMORY_LEAK
3,4,0,1,0,1,1,0,1,MEMORY_LEAK
4,5,1,1,0,1,0,1,0,NETWORK_DELAY


In [4]:
# Convert label into numerical form
label_encoder = LabelEncoder()
df['ROOT_CAUSE'] = label_encoder.fit_transform(df['ROOT_CAUSE'])


df_array = df.to_numpy().astype(float)
X = df_array[:,1:8]

Y = df_array[:,8]



In [5]:
# Convert Y to one-hot encoding
Y = to_categorical(Y, 3)

# Split training and test data
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1)

print(X_train.shape)
print(X_test.shape)

print(y_train.shape)
print(y_test.shape)

(900, 7)
(100, 7)
(900, 3)
(100, 3)


In [6]:
# Create keras sequential model
EPOCHS = 20
BATCH_SIZE = 64
VERBOSE = 1
OUTPUT_CLASSES = len(label_encoder.classes_)
N_HIDDEN = 128
VALIDATION_SPLIT = 0.2

model = Sequential()
model.add(Dense(N_HIDDEN, input_shape=(7,), name='Dense_Layer_1', activation='relu'))
model.add(Dense(N_HIDDEN, name='Dense_Layer_2', activation='relu'))
model.add(Dense(OUTPUT_CLASSES, name='Final_Layer', activation='softmax'))

model.compile(loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Dense_Layer_1 (Dense)       (None, 128)               1024      
                                                                 
 Dense_Layer_2 (Dense)       (None, 128)               16512     
                                                                 
 Final_Layer (Dense)         (None, 3)                 387       
                                                                 
Total params: 17,923
Trainable params: 17,923
Non-trainable params: 0
_________________________________________________________________


In [7]:
model.fit(X_train,
          y_train,
          batch_size=BATCH_SIZE,
          epochs=EPOCHS,
          verbose=VERBOSE,
          validation_split=VALIDATION_SPLIT)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fbf5c52e910>

In [8]:
model.evaluate(X_test, y_test)



[0.2858756184577942, 0.8899999856948853]

In [9]:
# Predict root cause
CPU_LOAD=1
MEMORY_LOAD=0
DELAY=0
ERROR_1000=0
ERROR_1001=1
ERROR_1002=1
ERROR_1003=0

prediction=np.argmax(model.predict(
    [[CPU_LOAD,MEMORY_LOAD,DELAY,
      ERROR_1000,ERROR_1001,ERROR_1002,ERROR_1003]]), axis=1 )

print(label_encoder.inverse_transform(prediction))

['DATABASE_ISSUE']


In [10]:
# Predict as a batch
print(label_encoder.inverse_transform(np.argmax(
    model.predict([[1,0,0,0,1,1,0],
                                [0,1,1,1,0,0,0],
                                [1,1,0,1,1,0,1],
                                [0,0,0,0,0,1,0],
                                [1,0,1,0,1,1,1]]), axis=1
)))

['DATABASE_ISSUE' 'NETWORK_DELAY' 'MEMORY_LEAK' 'DATABASE_ISSUE'
 'DATABASE_ISSUE']


In [19]:
a = label_encoder.inverse_transform([0])

In [20]:
a

array([0])

In [21]:
df.head()

Unnamed: 0,ID,CPU_LOAD,MEMORY_LEAK_LOAD,DELAY,ERROR_1000,ERROR_1001,ERROR_1002,ERROR_1003,ROOT_CAUSE
0,1,0,0,0,0,1,0,1,1
1,2,0,0,0,0,0,0,1,1
2,3,0,1,1,0,0,1,1,1
3,4,0,1,0,1,1,0,1,1
4,5,1,1,0,1,0,1,0,2
