## Import relevant libraries

In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

## Load the data

In [30]:
raw_data = pd.read_csv('/content/root_cause_analysis.csv')
print(raw_data.head())
print(raw_data.info())
print(raw_data['ROOT_CAUSE'].unique())


#Separate independent and dependent variables

features = raw_data.iloc[: , 1:-1].values
target = raw_data.iloc[: , -1].values

print("feature")
print(features)
print("Target")
print(target)


   ID  CPU_LOAD  MEMORY_LEAK_LOAD  DELAY  ERROR_1000  ERROR_1001  ERROR_1002  \
0   1         0                 0      0           0           1           0   
1   2         0                 0      0           0           0           0   
2   3         0                 1      1           0           0           1   
3   4         0                 1      0           1           1           0   
4   5         1                 1      0           1           0           1   

   ERROR_1003     ROOT_CAUSE  
0           1    MEMORY_LEAK  
1           1    MEMORY_LEAK  
2           1    MEMORY_LEAK  
3           1    MEMORY_LEAK  
4           0  NETWORK_DELAY  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   ID                1000 non-null   int64 
 1   CPU_LOAD          1000 non-null   int64 
 2   MEMORY_LEAK_LOAD  1000 non-null   int64

## Data Preprocessing

In [31]:
#Convert Pandas DataFrame to a numpy vector
features = features.astype(float)


array(['MEMORY_LEAK', 'MEMORY_LEAK', 'MEMORY_LEAK', 'MEMORY_LEAK',
       'NETWORK_DELAY', 'NETWORK_DELAY', 'NETWORK_DELAY',
       'DATABASE_ISSUE', 'MEMORY_LEAK', 'NETWORK_DELAY', 'MEMORY_LEAK',
       'DATABASE_ISSUE', 'MEMORY_LEAK', 'DATABASE_ISSUE', 'NETWORK_DELAY',
       'MEMORY_LEAK', 'MEMORY_LEAK', 'MEMORY_LEAK', 'NETWORK_DELAY',
       'MEMORY_LEAK', 'NETWORK_DELAY', 'MEMORY_LEAK', 'MEMORY_LEAK',
       'MEMORY_LEAK', 'NETWORK_DELAY', 'MEMORY_LEAK', 'MEMORY_LEAK',
       'NETWORK_DELAY', 'NETWORK_DELAY', 'MEMORY_LEAK', 'DATABASE_ISSUE',
       'MEMORY_LEAK', 'MEMORY_LEAK', 'NETWORK_DELAY', 'MEMORY_LEAK',
       'DATABASE_ISSUE', 'MEMORY_LEAK', 'DATABASE_ISSUE',
       'DATABASE_ISSUE', 'NETWORK_DELAY', 'NETWORK_DELAY',
       'DATABASE_ISSUE', 'MEMORY_LEAK', 'DATABASE_ISSUE', 'NETWORK_DELAY',
       'NETWORK_DELAY', 'NETWORK_DELAY', 'DATABASE_ISSUE',
       'NETWORK_DELAY', 'DATABASE_ISSUE', 'MEMORY_LEAK', 'DATABASE_ISSUE',
       'DATABASE_ISSUE', 'DATABASE_ISSUE', 'NETWORK_

In [33]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
target = encoder.fit_transform(target)
target = target.astype(float)

target = tf.keras.utils.to_categorical(target, num_classes=3)

In [34]:
target

array([[0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       ...,
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.]], dtype=float32)

## Splitting into training and test set

In [37]:
from sklearn.model_selection import train_test_split

x_train , x_test , y_train , y_test = train_test_split(features , target , test_size = 0.10 , random_state=42)

print(x_train.shape)
print(y_train)

(900, 7)
[[0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 ...
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]]


## Define model 

In [43]:
output_classes = len(encoder.classes_)
print(output_classes)

3


In [48]:
input_size = x_train.shape[1]
output_nodes = len(encoder.classes_)
model = tf.keras.Sequential([
        tf.keras.layers.Dense(128 , input_shape = (7,) , name = 'Dense_layer1' , activation = 'relu') ,
        tf.keras.layers.Dense(128 ,  name = 'Dense_layer2' , activation = 'relu') ,
        tf.keras.layers.Dense(output_nodes ,  name = 'final_layer' , activation = 'softmax') 
])

#compile the model

model.compile(loss = 'categorical_crossentropy' , metrics=['accuracy'])

model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Dense_layer1 (Dense)        (None, 128)               1024      
                                                                 
 Dense_layer2 (Dense)        (None, 128)               16512     
                                                                 
 final_layer (Dense)         (None, 3)                 387       
                                                                 
Total params: 17,923
Trainable params: 17,923
Non-trainable params: 0
_________________________________________________________________


## Build the model

In [49]:
model.fit(x_train , y_train , batch_size = 64 , epochs = 20 , verbose = 1 , validation_split = 0.2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f92959bced0>