In [2]:
import tensorflow as tf
print(tf.__version__)
print("GPU name {}".format(tf.test.gpu_device_name()))

2.0.0
GPU name 


# Sequence modelling 

## Coding tutorials
 #### [1.  The IMDb dataset](#coding_tutorial_1)
 #### [2. Padding and masking sequence data](#coding_tutorial_2)
 #### [3. The Embedding layer](#coding_tutorial_3)
 #### [4. The Embedding Projector](#coding_tutorial_4)
 #### [5. Recurrent neural network layers](#coding_tutorial_5)
 #### [6. Stacked RNNs and the Bidirectional wrapper](#coding_tutorial_6)

***
<a id="coding_tutorial_1"></a>
## The IMDb Dataset

#### Load the IMDB review sentiment dataset

In [3]:
# Import imdb

import tensorflow.keras.datasets.imdb as imdb

In [4]:
# Download and assign the data set using load_data()

(x_train, y_train) ,(x_test, y_test) = imdb.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


#### Inspect the dataset

In [6]:
# Inspect the type of the data


print(type(x_train))

(25000,) (25000,)
<class 'numpy.ndarray'>


In [7]:
# Inspect the shape of the data

print(x_train.shape,y_train.shape,x_test.shape)

(25000,) (25000,) (25000,)


In [8]:
# Display the first dataset element input
# Notice encoding
x_train[0]


[1,
 14,
 22,
 16,
 43,
 530,
 973,
 1622,
 1385,
 65,
 458,
 4468,
 66,
 3941,
 4,
 173,
 36,
 256,
 5,
 25,
 100,
 43,
 838,
 112,
 50,
 670,
 22665,
 9,
 35,
 480,
 284,
 5,
 150,
 4,
 172,
 112,
 167,
 21631,
 336,
 385,
 39,
 4,
 172,
 4536,
 1111,
 17,
 546,
 38,
 13,
 447,
 4,
 192,
 50,
 16,
 6,
 147,
 2025,
 19,
 14,
 22,
 4,
 1920,
 4613,
 469,
 4,
 22,
 71,
 87,
 12,
 16,
 43,
 530,
 38,
 76,
 15,
 13,
 1247,
 4,
 22,
 17,
 515,
 17,
 12,
 16,
 626,
 18,
 19193,
 5,
 62,
 386,
 12,
 8,
 316,
 8,
 106,
 5,
 4,
 2223,
 5244,
 16,
 480,
 66,
 3785,
 33,
 4,
 130,
 12,
 16,
 38,
 619,
 5,
 25,
 124,
 51,
 36,
 135,
 48,
 25,
 1415,
 33,
 6,
 22,
 12,
 215,
 28,
 77,
 52,
 5,
 14,
 407,
 16,
 82,
 10311,
 8,
 4,
 107,
 117,
 5952,
 15,
 256,
 4,
 31050,
 7,
 3766,
 5,
 723,
 36,
 71,
 43,
 530,
 476,
 26,
 400,
 317,
 46,
 7,
 4,
 12118,
 1029,
 13,
 104,
 88,
 4,
 381,
 15,
 297,
 98,
 32,
 2071,
 56,
 26,
 141,
 6,
 194,
 7486,
 18,
 4,
 226,
 22,
 21,
 134,
 476,
 26,
 480,
 5

In [9]:
# Display the first dataset element output
y_train[0]


1

#### Load dataset with different options

In [None]:
# Load the dataset with defaults
imdb.load_data(path="imdb.npz",index_from=3) # Top Vocab ambil yang ke 4 , 1 + 3 

# ~/.keras/dataset/

In [None]:
# Limit the vocabulary to the top 500 words using num_words
imdb.load_data(num_words = 500)


In [None]:
# Ignore the top 10 most frequent words using skip_top
imdb.load_data(skip_top=10)


In [None]:
# Limit the sequence lengths to 500 using maxlen
 
imdb.load_data(maxlen=500) # only 500 words per sentiment

In [None]:
# Use '1' as the character that indicates the start of a sequence
imdb.load_data(start_char=3) # 3 will appear in every dataset.


#### Explore the dataset word index

In [12]:
# Load the imdb word index using get_word_index()

imdb_word_index = imdb.get_word_index()

In [13]:
# View the word index as a dictionary,
# accounting for index_from.

index_from = 3
imdb_word_index = {key: value + index_from for key, value in imdb_word_index.items()}
imdb_word_index

{'fawn': 34704,
 'tsukino': 52009,
 'nunnery': 52010,
 'sonja': 16819,
 'vani': 63954,
 'woods': 1411,
 'spiders': 16118,
 'hanging': 2348,
 'woody': 2292,
 'trawling': 52011,
 "hold's": 52012,
 'comically': 11310,
 'localized': 40833,
 'disobeying': 30571,
 "'royale": 52013,
 "harpo's": 40834,
 'canet': 52014,
 'aileen': 19316,
 'acurately': 52015,
 "diplomat's": 52016,
 'rickman': 25245,
 'arranged': 6749,
 'rumbustious': 52017,
 'familiarness': 52018,
 "spider'": 52019,
 'hahahah': 68807,
 "wood'": 52020,
 'transvestism': 40836,
 "hangin'": 34705,
 'bringing': 2341,
 'seamier': 40837,
 'wooded': 34706,
 'bravora': 52021,
 'grueling': 16820,
 'wooden': 1639,
 'wednesday': 16821,
 "'prix": 52022,
 'altagracia': 34707,
 'circuitry': 52023,
 'crotch': 11588,
 'busybody': 57769,
 "tart'n'tangy": 52024,
 'burgade': 14132,
 'thrace': 52026,
 "tom's": 11041,
 'snuggles': 52028,
 'francesco': 29117,
 'complainers': 52030,
 'templarios': 52128,
 '272': 40838,
 '273': 52031,
 'zaniacs': 52133,

In [17]:
# Retrieve a specific word's index
Inverted_dict_index = {value:key for key,value in imdb_word_index.items()} 
[Inverted_dict_index[key] for key in x_train[0] if key > index_from]


['this',
 'film',
 'was',
 'just',
 'brilliant',
 'casting',
 'location',
 'scenery',
 'story',
 'direction',
 "everyone's",
 'really',
 'suited',
 'the',
 'part',
 'they',
 'played',
 'and',
 'you',
 'could',
 'just',
 'imagine',
 'being',
 'there',
 'robert',
 "redford's",
 'is',
 'an',
 'amazing',
 'actor',
 'and',
 'now',
 'the',
 'same',
 'being',
 'director',
 "norman's",
 'father',
 'came',
 'from',
 'the',
 'same',
 'scottish',
 'island',
 'as',
 'myself',
 'so',
 'i',
 'loved',
 'the',
 'fact',
 'there',
 'was',
 'a',
 'real',
 'connection',
 'with',
 'this',
 'film',
 'the',
 'witty',
 'remarks',
 'throughout',
 'the',
 'film',
 'were',
 'great',
 'it',
 'was',
 'just',
 'brilliant',
 'so',
 'much',
 'that',
 'i',
 'bought',
 'the',
 'film',
 'as',
 'soon',
 'as',
 'it',
 'was',
 'released',
 'for',
 'retail',
 'and',
 'would',
 'recommend',
 'it',
 'to',
 'everyone',
 'to',
 'watch',
 'and',
 'the',
 'fly',
 'fishing',
 'was',
 'amazing',
 'really',
 'cried',
 'at',
 'the',


In [18]:
# View an input sentence

[Inverted_dict_index[key] for key in x_train[7] if key > index_from]


['the',
 'hamiltons',
 'tells',
 'the',
 'story',
 'of',
 'the',
 'four',
 'hamilton',
 'siblings',
 'teenager',
 'francis',
 'cory',
 'knauf',
 'twins',
 'wendell',
 'joseph',
 'mckelheer',
 'darlene',
 'mackenzie',
 'firgens',
 'the',
 'eldest',
 'david',
 'samuel',
 'who',
 'is',
 'now',
 'the',
 'surrogate',
 'parent',
 'in',
 'charge',
 'the',
 "hamilton's",
 'move',
 'house',
 'a',
 'lot',
 'franics',
 'is',
 'unsure',
 'why',
 'is',
 'unhappy',
 'with',
 'the',
 'way',
 'things',
 'are',
 'the',
 'fact',
 'that',
 'his',
 "brother's",
 'sister',
 'kidnap',
 'imprison',
 'murder',
 'people',
 'in',
 'the',
 'basement',
 "doesn't",
 'help',
 'relax',
 'or',
 'calm',
 "francis'",
 'nerves',
 'either',
 'francis',
 "know's",
 'something',
 'just',
 "isn't",
 'right',
 'when',
 'he',
 'eventually',
 'finds',
 'out',
 'the',
 'truth',
 'things',
 'will',
 'never',
 'be',
 'the',
 'same',
 'again',
 'br',
 'br',
 'co',
 'written',
 'co',
 'produced',
 'directed',
 'by',
 'mitchell',
 '

In [19]:
# Get the sentiment value

y_train[0] , y_train[7]

(1, 0)

---
<a id="coding_tutorial_2"></a>
## Padding and Masking Sequence Data

In [20]:
# Load the imdb data set

import tensorflow.keras.datasets.imdb as imdb
(x_train,y_train) , (x_test,y_test) = imdb.load_data()

#### Preprocess the data with padding

In [21]:
# Inspect the input data shape

x_train.shape

(25000,)

In [22]:
# Pad the inputs to the maximum length using maxlen

padded_x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,maxlen = 300, padding="post", truncating="pre")


In [26]:
# Inspect the output data shape

padded_x_train[0] , x_train[0]

(array([    1,    14,    22,    16,    43,   530,   973,  1622,  1385,
           65,   458,  4468,    66,  3941,     4,   173,    36,   256,
            5,    25,   100,    43,   838,   112,    50,   670, 22665,
            9,    35,   480,   284,     5,   150,     4,   172,   112,
          167, 21631,   336,   385,    39,     4,   172,  4536,  1111,
           17,   546,    38,    13,   447,     4,   192,    50,    16,
            6,   147,  2025,    19,    14,    22,     4,  1920,  4613,
          469,     4,    22,    71,    87,    12,    16,    43,   530,
           38,    76,    15,    13,  1247,     4,    22,    17,   515,
           17,    12,    16,   626,    18, 19193,     5,    62,   386,
           12,     8,   316,     8,   106,     5,     4,  2223,  5244,
           16,   480,    66,  3785,    33,     4,   130,    12,    16,
           38,   619,     5,    25,   124,    51,    36,   135,    48,
           25,  1415,    33,     6,    22,    12,   215,    28,    77,
      

#### Create a Masking layer

In [27]:
# Import numpy 

import numpy as np

In [28]:
# Masking expects to see (batch, sequence, features)
# Create a dummy feature dimension using expand_dims
padded_x_train = np.expand_dims(padded_x_train,axis = -1)
padded_x_train

array([[[   1],
        [  14],
        [  22],
        ...,
        [   0],
        [   0],
        [   0]],

       [[   1],
        [ 194],
        [1153],
        ...,
        [   0],
        [   0],
        [   0]],

       [[   1],
        [  14],
        [  47],
        ...,
        [   0],
        [   0],
        [   0]],

       ...,

       [[   1],
        [  11],
        [   6],
        ...,
        [   0],
        [   0],
        [   0]],

       [[   1],
        [1446],
        [7079],
        ...,
        [   0],
        [   0],
        [   0]],

       [[   1],
        [  17],
        [   6],
        ...,
        [   0],
        [   0],
        [   0]]], dtype=int32)

In [29]:
padded_x_train.shape

(25000, 300, 1)

In [31]:
# Create a Masking layer 

tf_x_train = tf.convert_to_tensor(padded_x_train, dtype = tf.float32)
masking_layer = tf.keras.layers.Masking(mask_value = 0.0)


In [32]:
# Pass tf_x_train to it
masked_x_train = masking_layer(tf_x_train)


In [33]:
# Look at the dataset

tf_x_train[0]

<tf.Tensor: id=13, shape=(300, 1), dtype=float32, numpy=
array([[1.0000e+00],
       [1.4000e+01],
       [2.2000e+01],
       [1.6000e+01],
       [4.3000e+01],
       [5.3000e+02],
       [9.7300e+02],
       [1.6220e+03],
       [1.3850e+03],
       [6.5000e+01],
       [4.5800e+02],
       [4.4680e+03],
       [6.6000e+01],
       [3.9410e+03],
       [4.0000e+00],
       [1.7300e+02],
       [3.6000e+01],
       [2.5600e+02],
       [5.0000e+00],
       [2.5000e+01],
       [1.0000e+02],
       [4.3000e+01],
       [8.3800e+02],
       [1.1200e+02],
       [5.0000e+01],
       [6.7000e+02],
       [2.2665e+04],
       [9.0000e+00],
       [3.5000e+01],
       [4.8000e+02],
       [2.8400e+02],
       [5.0000e+00],
       [1.5000e+02],
       [4.0000e+00],
       [1.7200e+02],
       [1.1200e+02],
       [1.6700e+02],
       [2.1631e+04],
       [3.3600e+02],
       [3.8500e+02],
       [3.9000e+01],
       [4.0000e+00],
       [1.7200e+02],
       [4.5360e+03],
       [1.1110e+03]

In [34]:
masked_x_train[0]

<tf.Tensor: id=17, shape=(300, 1), dtype=float32, numpy=
array([[1.0000e+00],
       [1.4000e+01],
       [2.2000e+01],
       [1.6000e+01],
       [4.3000e+01],
       [5.3000e+02],
       [9.7300e+02],
       [1.6220e+03],
       [1.3850e+03],
       [6.5000e+01],
       [4.5800e+02],
       [4.4680e+03],
       [6.6000e+01],
       [3.9410e+03],
       [4.0000e+00],
       [1.7300e+02],
       [3.6000e+01],
       [2.5600e+02],
       [5.0000e+00],
       [2.5000e+01],
       [1.0000e+02],
       [4.3000e+01],
       [8.3800e+02],
       [1.1200e+02],
       [5.0000e+01],
       [6.7000e+02],
       [2.2665e+04],
       [9.0000e+00],
       [3.5000e+01],
       [4.8000e+02],
       [2.8400e+02],
       [5.0000e+00],
       [1.5000e+02],
       [4.0000e+00],
       [1.7200e+02],
       [1.1200e+02],
       [1.6700e+02],
       [2.1631e+04],
       [3.3600e+02],
       [3.8500e+02],
       [3.9000e+01],
       [4.0000e+00],
       [1.7200e+02],
       [4.5360e+03],
       [1.1110e+03]

In [39]:
# Look at the ._keras_mask for the dataset

masked_x_train._keras_mask

<tf.Tensor: id=9, shape=(25000, 300), dtype=bool, numpy=
array([[ True,  True,  True, ..., False, False, False],
       [ True,  True,  True, ..., False, False, False],
       [ True,  True,  True, ..., False, False, False],
       ...,
       [ True,  True,  True, ..., False, False, False],
       [ True,  True,  True, ..., False, False, False],
       [ True,  True,  True, ..., False, False, False]])>

***
<a id="coding_tutorial_3"></a>
## The Embedding layer

#### Create and apply an `Embedding` layer

In [None]:
# Create an embedding layer using layers.Embedding
# Specify input_dim, output_dim, input_length



In [None]:
# Inspect an Embedding layer output for a fixed input
# Expects an input of shape (batch, sequence, feature)



In [None]:
# Inspect the Embedding layer weights using get_weights()



In [None]:
# Get the embedding for the 14th index



#### Create and apply an `Embedding` layer that uses `mask_zero=True`

In [None]:
# Create a layer that uses the mask_zero kwarg



In [None]:
# Apply this layer to the sequence and see the _keras_mask property



---
<a id="coding_tutorial_4"></a>
## The Embedding Projector

#### Load and preprocess the IMDb data

In [None]:
# A function to load and preprocess the IMDB dataset

def get_and_pad_imdb_dataset(num_words=10000, maxlen=None, index_from=2):
    from tensorflow.keras.datasets import imdb

    # Load the reviews
    (x_train, y_train), (x_test, y_test) = imdb.load_data(path='imdb.npz',
                                                          num_words=num_words,
                                                          skip_top=0,
                                                          maxlen=maxlen,
                                                          start_char=1,
                                                          oov_char=2,
                                                          index_from=index_from)

    x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,
                                                        maxlen=None,
                                                        padding='pre',
                                                        truncating='pre',
                                                        value=0)
    
    x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test,
                                                           maxlen=None,
                                                           padding='pre',
                                                           truncating='pre',
                                                           value=0)
    return (x_train, y_train), (x_test, y_test)

In [None]:
# Load the dataset



In [None]:
# A function to get the dataset word index

def get_imdb_word_index(num_words=10000, index_from=2):
    imdb_word_index = tf.keras.datasets.imdb.get_word_index(
                                        path='imdb_word_index.json')
    imdb_word_index = {key: value + index_from for
                       key, value in imdb_word_index.items() if value <= num_words-index_from}
    return imdb_word_index

In [None]:
# Get the word index



In [None]:
# Swap the keys and values of the word index



In [None]:
# View the first dataset example sentence



#### Build an Embedding layer into a model

In [None]:
# Get the maximum token value



In [None]:
# Specify an embedding dimension



In [None]:
# Build a model using Sequential:
#     1. Embedding layer
#     2. GlobalAveragePooling1D
#     3. Dense



In [None]:
# Functional API refresher: use the Model to build the same model



In [None]:
model.summary()

#### Compile, train, and evaluate the model

In [None]:
# Compile the model with a binary cross-entropy loss



In [None]:
# Train the model using .fit(), savng its history



In [None]:
# Plot the training and validation accuracy

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

history_dict = history.history

acc      = history_dict['accuracy']
val_acc  = history_dict['val_accuracy']
loss     = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(acc) + 1)

plt.figure(figsize=(14,5))
plt.plot(epochs, acc, marker='.', label='Training acc')
plt.plot(epochs, val_acc, marker='.', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epoch')
plt.ylabel('Classification accuracy')
plt.legend(loc='lower right')
plt.ylim(0, 1);

#### The TensorFlow embedding projector

The Tensorflow embedding projector can be found [here](https://projector.tensorflow.org/).

In [None]:
# Retrieve the embedding layer's weights from the trained model



In [None]:
# Save the word Embeddings to tsv files
# Two files: 
#     one contains the embedding labels (meta.tsv),
#     one contains the embeddings (vecs.tsv)

import io
from os import path

out_v = io.open(path.join('data', 'vecs.tsv'), 'w', encoding='utf-8')
out_m = io.open(path.join('data', 'meta.tsv'), 'w', encoding='utf-8')

k = 0

for word, token in word_index.items():
    if k != 0:
        out_m.write('\n')
        out_v.write('\n')
    
    out_v.write('\t'.join([str(x) for x in weights[token]]))
    out_m.write(word)
    k += 1
    
out_v.close()
out_m.close()
# beware large collections of embeddings!

---
<a id="coding_tutorial_5"></a>
## Recurrent neural network layers

#### Initialize and pass an input to a SimpleRNN layer

In [None]:
# Create a SimpleRNN layer and test it



In [None]:
# Note that only the final cell output is returned



#### Load and transform the IMDB review sentiment dataset

In [None]:
# A function to load and preprocess the IMDB dataset

def get_and_pad_imdb_dataset(num_words=10000, maxlen=None, index_from=2):
    from tensorflow.keras.datasets import imdb

    # Load the reviews
    (x_train, y_train), (x_test, y_test) = imdb.load_data(path='imdb.npz',
                                                          num_words=num_words,
                                                          skip_top=0,
                                                          maxlen=maxlen,
                                                          start_char=1,
                                                          oov_char=2,
                                                          index_from=index_from)

    x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,
                                                        maxlen=None,
                                                        padding='pre',
                                                        truncating='pre',
                                                        value=0)
    
    x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test,
                                                           maxlen=None,
                                                           padding='pre',
                                                           truncating='pre',
                                                           value=0)
    return (x_train, y_train), (x_test, y_test)

In [None]:
# Load the dataset



In [None]:
# A function to get the dataset word index

def get_imdb_word_index(num_words=10000, index_from=2):
    imdb_word_index = tf.keras.datasets.imdb.get_word_index(
                                        path='imdb_word_index.json')
    imdb_word_index = {key: value + index_from for
                       key, value in imdb_word_index.items() if value <= num_words-index_from}
    return imdb_word_index

In [None]:
# Get the word index using get_imdb_word_index()



#### Create a recurrent neural network model

In [None]:
# Get the maximum index value



In [None]:
# Using Sequential, build the model:
# 1. Embedding.
# 2. LSTM.
# 3. Dense.



#### Compile and fit the model

In [None]:
# Compile the model with binary cross-entropy loss



In [None]:
# Fit the model and save its training history



#### Plot learning curves

In [None]:
# Plot the training and validation accuracy

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

history_dict = history.history

acc      = history_dict['accuracy']
val_acc  = history_dict['val_accuracy']
loss     = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(acc) + 1)

plt.figure(figsize=(14,5))
plt.plot(epochs, acc, marker='.', label='Training acc')
plt.plot(epochs, val_acc, marker='.', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epoch')
plt.ylabel('Classification accuracy')
plt.legend(loc='lower right')
plt.ylim(0, 1);

#### Make predictions with the model

In [None]:
# View the first test data example sentence
# (invert the word index)



In [None]:
# Get the model prediction using model.predict()



In [None]:
# Get the corresponding label



---
<a id="coding_tutorial_6"></a>
## Stacked RNNs and the Bidirectional wrapper

#### Load and transform the IMDB review sentiment dataset

In [None]:
# A function to load and preprocess the IMDB dataset

def get_and_pad_imdb_dataset(num_words=10000, maxlen=None, index_from=2):
    from tensorflow.keras.datasets import imdb

    # Load the reviews
    (x_train, y_train), (x_test, y_test) = imdb.load_data(path='imdb.npz',
                                                          num_words=num_words,
                                                          skip_top=0,
                                                          maxlen=maxlen,
                                                          start_char=1,
                                                          oov_char=2,
                                                          index_from=index_from)

    x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,
                                                        maxlen=None,
                                                        padding='pre',
                                                        truncating='pre',
                                                        value=0)
    
    x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test,
                                                           maxlen=None,
                                                           padding='pre',
                                                           truncating='pre',
                                                           value=0)
    return (x_train, y_train), (x_test, y_test)

In [None]:
# Load the dataset



In [None]:
# A function to get the dataset word index

def get_imdb_word_index(num_words=10000, index_from=2):
    imdb_word_index = tf.keras.datasets.imdb.get_word_index(
                                        path='imdb_word_index.json')
    imdb_word_index = {key: value + index_from for
                       key, value in imdb_word_index.items() if value <= num_words-index_from}
    return imdb_word_index

In [None]:
# Get the word index using get_imdb_word_index()



#### Build stacked and bidirectional recurrent models

In [None]:
# Get the maximum index value and specify an embedding dimension



In [None]:
# Using Sequential, build a stacked LSTM model via return_sequences=True



In [None]:
# Using Sequential, build a bidirectional RNN with merge_mode='sum'



In [None]:
# Create a model featuring both stacked recurrent layers and a bidirectional layer



#### Compile and fit the model

In [None]:
# Compile the model



In [None]:
# Train the model, saving its history



In [None]:
# Plot the training and validation accuracy

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

history_dict = history.history

acc      = history_dict['accuracy']
val_acc  = history_dict['val_accuracy']
loss     = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(acc) + 1)

plt.figure(figsize=(14,5))
plt.plot(epochs, acc, marker='.', label='Training acc')
plt.plot(epochs, val_acc, marker='.', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epoch')
plt.ylabel('Classification accuracy')
plt.legend(loc='lower right')
plt.ylim(0, 1);