In [42]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensor2tensor.data_generators import problem
from tensor2tensor.data_generators import text_problems
from tensor2tensor.data_generators import text_encoder
from tensor2tensor.utils import registry
import keras
from keras.engine.topology import Layer
from keras.datasets import imdb
from keras.models import Sequential, Model
from keras import backend as K
# from keras.layers import Dense
# from keras.layers import LSTM
# from keras.layers import GlobalMaxPooling1D
# from keras.layers import BatchNormalization
# from keras.layers import Lambda
from keras.layers import *
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence, text
import json
import sklearn
from sklearn import preprocessing as skpp

In [2]:
data = pd.read_csv('./dataset/cleaned_lyrics.csv')

In [3]:
genres = data['genre'].unique()
data['genre_id'] = data.groupby(['genre']).ngroup()

mappings = data[['genre', 'genre_id']].drop_duplicates()
map_list = [(genre_id, genre) for genre, genre_id in mappings.values]
map_list.sort()
map_list

data_subset = data[['genre_id', 'genre', 'lyrics']]

In [6]:
numpy_data = data['lyrics'].values
max_words = 30000

# create a new Tokenizer
tokenizer = text.Tokenizer(num_words=max_words, oov_token='<UNK>')
# feed our song lyrics to the Tokenizer
tokenizer.fit_on_texts(numpy_data)

# Tokenizers come with a convenient list of words and IDs
dictionary = tokenizer.word_index

with open('dictionary.json', 'w') as dictionary_file:
    json.dump(dictionary, dictionary_file)
    
tokenizer.word_index = {e:i for e,i in tokenizer.word_index.items() if i <= max_words} # <= because tokenizer is 1 indexed
tokenizer.word_index[tokenizer.oov_token] = max_words + 1
indexed_data = tokenizer.texts_to_sequences(numpy_data)
indexed_data = np.array(indexed_data)

label_encoder = skpp.LabelEncoder()
indexed_labels = np.array(label_encoder.fit_transform(data['genre'].values))
#label_encoder.inverse_transform(np.array([10, 8])) #to get original genre text back

num_test = 30000

#shuffle data before splitting off test set
random_indexes = np.random.permutation(len(indexed_labels))
indexed_data = indexed_data[random_indexes]
indexed_labels = indexed_labels[random_indexes]

X_train = indexed_data[:-num_test]
y_train = indexed_labels[:-num_test]
X_test  = indexed_data[-num_test:]
y_test  = indexed_labels[-num_test:]

y_train = keras.utils.to_categorical(y_train)
y_test = keras.utils.to_categorical(y_test)

num_words = max_words + 2
# truncate and pad input sequences
max_review_length = 1000

X_train_padded = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test_padded = sequence.pad_sequences(X_test, maxlen=max_review_length)

In [182]:
# Implementation from https://github.com/Kyubyong/transformer/blob/master/modules.py
class PositionalEncoding(Layer):
    def __init__(self, **kwargs):
        super(PositionalEncoding,self).__init__(**kwargs)

    def build(self, input_shape):
        super(PositionalEncoding,self).build(input_shape)

    def call(self, x, mask=None):
        _, T, E = x.get_shape().as_list()
        position_ind = tf.tile(tf.expand_dims(tf.range(T), 0), [tf.shape(x)[0], 1])

        # First part of the PE function: sin and cos argument
        position_enc = np.array([
            [pos / np.power(10000, 2.*i/E) for i in range(E)]
            for pos in range(T)], dtype=np.float32)

        # Second part, apply the cosine to even columns and sin to odds.
        position_enc[:, 0::2] = np.sin(position_enc[:, 0::2])  # dim 2i
        position_enc[:, 1::2] = np.cos(position_enc[:, 1::2])  # dim 2i+1

        # Convert to a tensor
        lookup_table = tf.convert_to_tensor(position_enc)
        outputs = tf.nn.embedding_lookup(lookup_table, position_ind)
        return tf.add(outputs, x)

    def get_output_shape_for(self, input_shape):
        return input_shape
    
    def compute_output_shape(self, input_shape):
        return input_shape

def multi_head_attention(x, num_heads=10):
    E = embedding_vector_length
    queries = Dense(E, activation='relu')(x)
    keys = Dense(E, activation='relu')(x)
    values = Dense(E, activation='relu')(x)

    # Split and concat
    concat = lambda x: tf.concat(tf.split(x, num_heads, axis=2), axis=0)
    Q_ = Lambda(concat)(queries)
    K_ = Lambda(concat)(keys)
    V_ = Lambda(concat)(values)

    # Multiplication
    matmul = lambda x: tf.matmul(x[0], tf.transpose(x[1], (0, 2, 1)))
    # permute_k = Permute((2, 1))(K_)
    # outputs = K.batch_dot(Q_, permute_k) # (h*N, T_q, T_k)
    outputs = Lambda(matmul)([Q_, K_])

    # Scale
    divide = lambda x: x / (K_.get_shape().as_list()[-1] ** 0.5)
    outputs = Lambda(divide)(outputs)

    # Softmax
    softmax = lambda x: tf.nn.softmax(x)
    outputs = Lambda(softmax)(outputs)
    # outputs = K.softmax(outputs) # (h*N, T_q, T_k)

    # Dropouts
    outputs = Dropout(0.1)(outputs)

    # Weighted sum
    matmul2 = lambda x: tf.matmul(x[0], x[1])
    outputs = Lambda(matmul2)([outputs, V_])

    # outputs = K.batch_dot(outputs, V_) # ( h*N, T_q, C/h)

    # Restore shape
    concat2 = lambda x: tf.concat(tf.split(x, num_heads, axis=0), axis=2)
    outputs = Lambda(concat2)(outputs) # (N, T_q, C)

    return Add()([outputs, x])

def feed_forward(x):
    # Inner layer
    params = {"inputs": x, "filters": 2048, "kernel_size": 1,
              "activation": tf.nn.relu, "use_bias": True}
    outputs = tf.layers.conv1d(**params)
    # Readout layer
    params = {"inputs": outputs, "filters": 512, "kernel_size": 1,
              "activation": None, "use_bias": True}
    outputs = tf.layers.conv1d(**params)

    # Residual connection
    outputs += inputs

    return outputs


In [194]:
embedding_vector_length = 100

inputs = Input(shape=(max_review_length,))

embeds = Embedding(num_words, embedding_vector_length, input_length=max_review_length)(inputs)
transformer_input = PositionalEncoding()(embeds)
for i in range(6):
    multi_head = multi_head_attention(transformer_input)
    norm = BatchNormalization()(multi_head)
    conv1 = Conv1D(400, 1, activation='relu')(norm)
    conv2 = Conv1D(100, 1)(conv1)
    res = Add()([norm, conv2])
    transformer_input = BatchNormalization()(res)
pooling = GlobalMaxPooling1D()(transformer_input)
outputs = Dense(11, activation='softmax')(pooling)
model = Model(inputs=inputs, outputs=outputs)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(X_train_padded, y_train, nb_epoch=3, batch_size=64)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_102 (InputLayer)          (None, 1000)         0                                            
__________________________________________________________________________________________________
embedding_116 (Embedding)       (None, 1000, 100)    3000200     input_102[0][0]                  
__________________________________________________________________________________________________
positional_encoding_116 (Positi (None, 1000, 100)    0           embedding_116[0][0]              
__________________________________________________________________________________________________
dense_357 (Dense)               (None, 1000, 100)    10100       positional_encoding_116[0][0]    
__________________________________________________________________________________________________
dense_358 



Epoch 1/3


ResourceExhaustedError: OOM when allocating tensor with shape[640,1000,1000] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: lambda_574/MatMul = BatchMatMul[T=DT_FLOAT, adj_x=false, adj_y=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](lambda_571/concat, lambda_574/transpose)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[Node: training/Adam/gradients/lambda_600/Reshape_1_grad/Reshape/_4323 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_6815_training/Adam/gradients/lambda_600/Reshape_1_grad/Reshape", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


Caused by op 'lambda_574/MatMul', defined at:
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\ipykernel\kernelapp.py", line 486, in start
    self.io_loop.start()
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\tornado\platform\asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\asyncio\base_events.py", line 422, in run_forever
    self._run_once()
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\asyncio\base_events.py", line 1432, in _run_once
    handle._run()
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\asyncio\events.py", line 145, in _run
    self._callback(*self._args)
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\tornado\platform\asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\zmq\eventloop\zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\zmq\eventloop\zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\zmq\eventloop\zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\ipykernel\kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\ipykernel\ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\ipykernel\zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\IPython\core\interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\IPython\core\interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\IPython\core\interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\IPython\core\interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-194-f12a92659847>", line 8, in <module>
    multi_head = multi_head_attention(transformer_input)
  File "<ipython-input-182-af0612a55eb7>", line 49, in multi_head_attention
    outputs = Lambda(matmul)([Q_, K_])
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\keras\engine\topology.py", line 619, in __call__
    output = self.call(inputs, **kwargs)
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\keras\layers\core.py", line 663, in call
    return self.function(inputs, **arguments)
  File "<ipython-input-182-af0612a55eb7>", line 46, in <lambda>
    matmul = lambda x: tf.matmul(x[0], tf.transpose(x[1], (0, 2, 1)))
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\tensorflow\python\ops\math_ops.py", line 2071, in matmul
    a, b, adj_x=adjoint_a, adj_y=adjoint_b, name=name)
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 1295, in batch_mat_mul
    "BatchMatMul", x=x, y=y, adj_x=adj_x, adj_y=adj_y, name=name)
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\tensorflow\python\framework\ops.py", line 3290, in create_op
    op_def=op_def)
  File "C:\Users\Sayan\AppData\Local\conda\conda\envs\cs194project\lib\site-packages\tensorflow\python\framework\ops.py", line 1654, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[640,1000,1000] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: lambda_574/MatMul = BatchMatMul[T=DT_FLOAT, adj_x=false, adj_y=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](lambda_571/concat, lambda_574/transpose)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[Node: training/Adam/gradients/lambda_600/Reshape_1_grad/Reshape/_4323 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_6815_training/Adam/gradients/lambda_600/Reshape_1_grad/Reshape", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.



In [None]:
# Final evaluation of the model
scores = model.evaluate(X_test_padded, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))