# Training CNN

In [1]:
import numpy as np
import pandas as pd
from gensim.models import KeyedVectors
import warnings
warnings.filterwarnings(action='ignore')



In [2]:
from keras import Sequential
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
# from keras.layers import Embedding
from keras.layers import Dense, Input, Flatten
from keras.layers import Conv1D, MaxPooling1D, Embedding, Dropout
from keras.models import Model
from keras.callbacks import ModelCheckpoint
import matplotlib.pyplot as plt

Using TensorFlow backend.


Load the pseudo docs (indices of the pseudo words in word embedding) and the pseudo labels

In [3]:
training_data = np.load('./pseudo_docs.npy').item()
labels = np.load('./pseudo_labels.npy').item()

Prepare training data

In [4]:
label_names = ['equity', 'fixed_income', 'derivatives', 'alternatives']
training_x = np.vstack(list(map(lambda x: training_data[x], label_names)))
training_y = np.vstack(list(map(lambda x: labels[x], label_names)))

Load word embedding

In [5]:
filename = "./word vectors.kv"
word_vec = KeyedVectors.load(filename, mmap='r')
word_embedding = np.array(word_vec.wv.vectors)

In [6]:
vocab = list(word_vec.wv.vocab)

In [7]:
word_embedding_padded = np.vstack([np.zeros((1, word_embedding.shape[1])), word_embedding])

In [8]:
print('Shape of Data Tensor:', training_x.shape)
print('Shape of Label Tensor:', training_y.shape)

indices = np.arange(training_x.shape[0])
np.random.shuffle(indices)
x_train = training_x[indices]
y_train = training_y[indices]

Shape of Data Tensor: (12000, 5000)
Shape of Label Tensor: (12000, 4)


Build the CNN architecture

In [None]:
embedding_layer = Embedding(word_embedding_padded.shape[0],
                            word_embedding_padded.shape[1],
                            weights=[word_embedding_padded],
                            input_length=5000,
                            trainable=False)

sequence_input = Input(shape=(5000,), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)
l_cov1= Conv1D(128, 5, activation='relu')(embedded_sequences)
l_pool1 = MaxPooling1D(5)(l_cov1)
l_cov2 = Conv1D(128, 5, activation='relu')(l_pool1)
l_pool2 = MaxPooling1D(5)(l_cov2)
l_cov3 = Conv1D(128, 5, activation='relu')(l_pool2)
l_pool3 = MaxPooling1D(35)(l_cov3)  # global max pooling
l_flat = Flatten()(l_pool3)
l_dense_1 = Dense(128, activation='relu')(l_flat)
l_dropout_1 = Dropout(0.25)(l_dense_1)
l_dense_2 = Dense(128, activation='relu')(l_dropout_1)
l_dropout_2 = Dropout(0.4)(l_dense_2)
preds = Dense(4, activation='softmax')(l_dropout_2)

model = Model(sequence_input, preds)
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['acc'])

print("Simplified convolutional neural network")
model.summary()
# cp=ModelCheckpoint('./word_embedding_result_added_40_papers/model_cnn.hdf5',monitor='val_acc',verbose=1,save_best_only=True)

Simplified convolutional neural network
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 5000)              0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 5000, 200)         12304600  
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 4996, 128)         128128    
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 999, 128)          0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 995, 128)          82048     
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 199, 128)          0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 

In [None]:
history = model.fit(x_train, y_train,epochs= 20, batch_size= 50)

Epoch 1/20
Epoch 2/20
 1300/12000 [==>...........................] - ETA: 10:21 - loss: 1.3897 - acc: 0.2423

In [None]:
model_json = model.to_json()
with open('./model/model.json', 'w') as wf:
    wf.write(model_json)
    
model.save('./model/model_weights.h5')

In [None]:
# from keras.utils.vis_utils import plot_model
# plot_model(model, to_file='cnn_model.png', show_shapes=True, show_layer_names=True)

second train

In [None]:
# history=model.fit(x_train, y_train, epochs= 10, batch_size=10#,callbacks=[cp]
#                  )

In [None]:
#model.save('./model/trained_30.h5')