In [1]:
import sys
import os
import json
import pandas as pd
import numpy as np
import optparse

from keras.callbacks import TensorBoard
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dense, Dropout, SimpleRNN, Activation
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.preprocessing.text import Tokenizer
from collections import OrderedDict
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


In [2]:
#b) We will read the code in slightly differently than before: 
dataframe = pd.read_csv(r'C:\Users\u353822\Downloads\dev-access.csv', engine='python', quotechar='|', header=None)

In [3]:
#c) We then need to convert to a numpy.ndarray type: 
dataset = dataframe.values

In [9]:
X

array(['{"method":"post","query":{},"path":"/login","statusCode":401,"requestPayload":{"username":"Carl2","password":"bo"}}',
       '{"method":"post","query":{},"path":"/login","statusCode":401,"requestPayload":{"username":"pafzah","password":"worldburn432"}}',
       '{"method":"post","query":{},"path":"/login","statusCode":401,"requestPayload":{"username":"Panos1","password":"najrijkom"}}',
       ...,
       '{"method":"post","query":{},"path":"/checkout","statusCode":400,"requestPayload":{"creditCard":"<script src=\\"http://attacker/malicious\\u00e2\\u20ac\\u2018script.js\\"></script>"}}',
       '{"method":"post","query":{},"path":"/checkout","statusCode":400,"requestPayload":{"creditCard":"<meta http-equiv=\\"refresh\\">"}}',
       '{"method":"post","query":{},"path":"/checkout","statusCode":400,"requestPayload":{"creditCard":"<meta http-equiv=\\"refresh\\">"}}'],
      dtype=object)

In [4]:
#d) Check the shape of the data set - it should be (26773, 2). Spend some time looking at the data. 
dataset.shape

(26773, 2)

In [5]:
#e) Store all rows and the 0th index as the feature data:
X = dataset[:,0]

In [6]:
#f) Store all rows and index 1 as the target variable 
Y = dataset[:,1]

In [7]:
#g) In the next step, we will clean up the predictors. This includes removing features that are not valuable, such as timestamp and source. 
for index, item in enumerate(X):
    # Quick hack to space out json elements
    reqJson = json.loads(item, object_pairs_hook=OrderedDict)
    del reqJson['timestamp']
    del reqJson['headers']
    del reqJson['source']
    del reqJson['route']
    del reqJson['responsePayload']
    X[index] = json.dumps(reqJson, separators=(',', ':'))

In [8]:
#h) We next will tokenize our data, which just means vectorizing our text. Given the data we will tokenize every character (thus char_level = True)
tokenizer = Tokenizer(filters='\t\n', char_level=True)
tokenizer.fit_on_texts(X)

In [9]:
# we will need this later
num_words = len(tokenizer.word_index)+1
X = tokenizer.texts_to_sequences(X)

In [10]:
#i) Need to pad our data as each observation has a different length
max_log_length = 1024
X_processed = sequence.pad_sequences(X, maxlen=max_log_length)

In [11]:
#j) Create your train set to be 75% of the data and your test set to be 25%
X_train, X_test, y_train, y_test = train_test_split(X_processed,Y,test_size=.25, random_state=0)

In [12]:
#2. Model 1 - RNN: The first model will be a pretty minimal RNN with only an embedding layer, LSTM layer, and Dense layer. The next model we will add a few more layers. 
#a) Start by creating an instance of a Sequential model
model = Sequential()

In [13]:
#b) From there, add an Embedding laye
model.add(Embedding(input_dim=num_words, output_dim =32, input_length= max_log_length))

Instructions for updating:
Colocations handled automatically by placer.


In [14]:
#c) Add a simple RNN layer
model.add(SimpleRNN(32, activation ='relu'))

In [15]:
#d) Add a Dense Layer:
model.add(Dense(1, activation='sigmoid'))

In [16]:
#e) Compile using the .compile())
model.compile(loss = 'binary_crossentropy', optimizer= 'adam' ,metrics=['acc'])

In [17]:
#f) Print the model summary
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 1024, 32)          2016      
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 32)                2080      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 33        
Total params: 4,129
Trainable params: 4,129
Non-trainable params: 0
_________________________________________________________________
None


In [18]:
#g) Use the .fit() method to fit the model on the train data. Use a validation split of 0.25, epochs=3 and batch size = 128.
model.fit(X_train, y_train, epochs=3, batch_size=128, validation_split = .25)

Instructions for updating:
Use tf.cast instead.
Train on 15059 samples, validate on 5020 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x13606f98>

In [19]:
#h)  Use the .evaluate() method to get the loss value & the accuracy value on the test data. Use a batch size of 128 again.
print(model.metrics_names)
print(model.evaluate(X_test, y_test, batch_size=128))

['loss', 'acc']
[0.6597054686908191, 0.6192112339230341]


In [20]:
#3) Model 2 - RNN + Dropout Layers + New Activation Function:
model = Sequential()
model.add(Embedding(input_dim=num_words, output_dim =32, input_length= max_log_length))
model.add(LSTM(64, recurrent_dropout=.5))
model.add(Dropout(.5))
model.add(Dense(1, activation='sigmoid'))

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [21]:
#b) Compile model using the .compile() method:
model.compile(loss = 'binary_crossentropy', optimizer= 'adam' ,metrics=['acc'])

In [22]:
#c) Print summary 
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 1024, 32)          2016      
_________________________________________________________________
lstm_1 (LSTM)                (None, 64)                24832     
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 65        
Total params: 26,913
Trainable params: 26,913
Non-trainable params: 0
_________________________________________________________________
None


In [23]:
#d) Use the .fit() method to fit the model on the train data. Use a validation split of 0.25, epochs=3 and batch size = 128.
model.fit(X_train, y_train, epochs=3, batch_size=128, validation_split = .25)

Train on 15059 samples, validate on 5020 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x2d9035c0>

In [24]:
#e) Use the .evaluate() method to get the loss value & the accuracy value on the test data. Use a batch size of 128 again.
print(model.metrics_names)
print(model.evaluate(X_test, y_test, batch_size=128))

['loss', 'acc']
[0.17525158247771033, 0.954287421589365]


In [25]:
#4) Recurrent Neural Net Model 3: Build Your Own
#a) RNN Requirements: 
#- Use 5 or more layers
#- Add a layer that was not utilized in Model 1 or Model 2 (Note: This could be a new Dense layer or an additional LSTM)

model = Sequential()
model.add(Embedding(input_dim=num_words, output_dim =64, input_length= max_log_length))
model.add(LSTM(4, recurrent_dropout=.5, return_sequences=True))
model.add(Dropout(.5))
model.add(LSTM(4))
model.add(Dense(1, activation='relu'))

In [26]:
#b) Compiler Requirements: 
#- Try a new optimizer for the compile step
#- Keep accuracy as a metric (feel free to add more metrics if desired)
model.compile(loss = 'binary_crossentropy', optimizer= 'adadelta' ,metrics=['acc'])

In [27]:
#c) Print the model summary
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 1024, 64)          4032      
_________________________________________________________________
lstm_2 (LSTM)                (None, 1024, 4)           1104      
_________________________________________________________________
dropout_2 (Dropout)          (None, 1024, 4)           0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 4)                 144       
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 5         
Total params: 5,285
Trainable params: 5,285
Non-trainable params: 0
_________________________________________________________________
None


In [28]:
#d) Use the .fit() method to fit the model on the train data. Use a validation split of 0.25, epochs=3 and batch size = 128.
model.fit(X_train, y_train, epochs=3, batch_size=128, validation_split = .25)

Train on 15059 samples, validate on 5020 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x5a505390>

In [29]:
#e) Use the .evaluate() method to get the loss value & the accuracy value on the test data. Use a batch size of 128 again.
model.evaluate(X_test,y_test,  batch_size=3)



[0.30824287524155974, 0.8764565342757322]

Conceptual Questions: 

5) Explain the difference between the relu activation function and the sigmoid activation function.
The sigmoid function squashes real numbers to a range between 0 and 1. The RelU activation function is zero below 0 and linar with a slope of 1 for x values larger than 0.

6) Describe what one epoch actually is (epoch was a parameter used in the .fit() method).
One epoch means that the model is trained with the entire data set once. If you train the model with 2 epochs, the model will be trained with the training data twice. 

7) Explain how dropout works (you can look at the keras code and/or documentation) for (a) training, and (b) test data sets.
Dropout is where nodes within the neural network are randomly dropped in order to acheive less  paramaters for the overall model.

8) Explain why problems such as this homework assignment are better modeled with RNNs than CNNs. What type of problem will CNNs outperform RNNs on?
CNNs are better for this type of problem because the data is static. If the data was time sensitive, an RNN would be a better alternative. CNNs outperform RNNs for visual image recognition, and they also do better on static data.

9) Explain what RNN problem is solved using LSTM and briefly describe how.
LSTMs are used for time senitive data. LSTM use nodes connected to one another in a cyclical fashion. The bottom layer is able to store history because they are connected to themselves in a recurrent fashion. 