In [1]:
import sklearn.preprocessing as preprocess
import numpy as np
import pandas as pd
from keras import optimizers
import keras.layers as kl
from keras import backend as K
import spacy
from keras.utils import to_categorical
from spacy.lang.en import English
pd.set_option('display.max_colwidth', 0)
pd.set_option('display.max_rows', 1000)
from tqdm import tqdm_notebook
from keras.layers import Conv1D, MaxPooling1D, Input, TimeDistributed, Dense, LSTM, RepeatVector, GlobalAveragePooling1D
from keras.models import Model
import pickle


Using TensorFlow backend.


In [2]:
df_hd_tp = pd.read_csv('evaluation_set/cdc_ibm/headline_topic_mapping.csv')
df_ar_cl = pd.read_csv('evaluation_set/cdc_ibm/article_claim_mapping.csv')
with open('evaluation_set/cdc_ibm/articles.p', 'rb') as fp:
    articles = pickle.load(fp)
with open('evaluation_set/cdc_ibm/article_vectors.p', 'rb') as fp:
    article_vectors = pickle.load(fp)
df_hd_tp.keys(),df_ar_cl.keys(), len(articles.keys()), len(article_vectors.keys())

(Index(['Topic', 'Title', 'article Id', 'Headline'], dtype='object'),
 Index(['Unnamed: 0', 'Topic', 'Article', 'Claim'], dtype='object'),
 522,
 522)

In [3]:
def load_spacy():
    sentencizer = English()
    sentencizer.add_pipe(sentencizer.create_pipe('sentencizer'))
    nlp = spacy.load("en_core_web_md")
    return sentencizer, nlp
sentencizer, nlp = load_spacy()

In [4]:
def datagen(batchsize,dataframe):
    counter=0
    ar_ids,ar_sents,ar_head_vectors,ar_head_classes=[],[],[],[]
    while True:
        idx=np.random.choice(dataframe.Title.unique())
        hd = dataframe[dataframe.Title==idx]['Headline'].values[0]
        ar_id = dataframe[dataframe.Title==idx]['article Id'].values[0]
#         cl = dataframe[dataframe.Article==idx]['Claim'].values
        sentences=articles[ar_id]
#         print(len(sentences))
        sents = np.zeros((1300,300))
        vectors = article_vectors[ar_id]
        sents[:len(vectors)] = vectors
        ar_ids.append(ar_id)
        ar_sents.append(sents)
        hd_nlp = nlp(hd)
        head_classes = np.zeros(50, dtype='int')
        for i in range(len(hd_nlp)):
            head_classes[i] = hd_nlp[i].rank
        ar_head_vectors.append(hd_nlp.vector)
        ar_head_classes.append(to_categorical(num_classes=20000,y=head_classes))
        counter+=1
        if counter==batchsize:
            inputs = {'sentence_vectors' : np.array(ar_sents)
                            ,'headline_vector': np.array(ar_head_vectors)}
            outputs = {'headline_token_classes': np.array(ar_head_classes)}
            yield inputs,outputs
            ar_ids,ar_sents,ar_head_vectors,ar_head_classes=[],[],[],[]
            counter=0
            

In [5]:
tdg = datagen(32,df_hd_tp)

In [6]:
x,y = next(tdg)

In [7]:
x['sentence_vectors'].shape, x['headline_vector'].shape, y['headline_token_classes'].shape

((64, 1300, 300), (64, 300), (64, 50, 20000))

In [8]:
# class Attention_Layer(Layer):
#     def __init__(self,ch,**kwargs):
#         self.channels=ch
# #         self.h_w=self.channels//8
#         super(Attention_Layer,self).__init__(**kwargs)

#     def build(self,input_shape):
# #         kernel_shape=(1, 3, 3) + (self.channels, self.h_w)
# #         print(input_shape)
#         self.gamma = self.add_weight(name='gamma', shape=[1], initializer='zeros', trainable=True)
#         super(Attention_Layer, self).build(input_shape)

#     def call(self, x):
# #         print('input shape:',x.shape) # None, 16,12,512
#         s,h = 
#         exp_x = K.expand_dims(x,axis=-1)
# #         print('expanded input shape:',exp_x.shape) # N, 16, 12, 512, 1
# #         filters = Attention Heads
#         conv3d = kl.Conv3D(padding='same',filters=1, kernel_size=(3,3,512), strides=(1,1,512),kernel_initializer='he_normal',activation='relu')(exp_x)
# #         print('conv3d shape:', conv3d.shape)
#         conv3d = K.squeeze(K.squeeze(conv3d, axis=-1),axis=-1)
# #         print('conv3d shape:', conv3d.shape)
#         conv3d = K.expand_dims(conv3d,axis=1) # N, 1, 16, 12
# #         print('conv3d shape:', conv3d.shape)
#         softmax_alpha = K.softmax(conv3d, axis=1) # attention map # N, 16, 12
# #         print('softmax_alpha shape:', softmax_alpha.shape)
#         softmax_alpha = K.squeeze(softmax_alpha, axis=1) 
# #         print('softmax_alpha shape:', softmax_alpha.shape)
#         exp_softmax_alpha = K.expand_dims(softmax_alpha, axis=-1) # for elementwise multiplication
# #         print('exp_softmax_alpha shape:', exp_softmax_alpha.shape)
        
#         u = kl.multiply([exp_softmax_alpha, x])
        
#         u = (self.gamma * u) + (self.gamma * u * x)
# #         print('u shape:', u.shape)
#         return u
# #         return [u, softmax_alpha, self.gamma]

#     def compute_output_shape(self, input_shape): 
#         return input_shape
# #         return [input_shape,(None,16,12),(None,1)]
    
#     def get_config(self):
#         return super(Attention_Layer,self).get_config()

In [9]:
def build_model():
    inp_sentence_vectors = Input(shape=(1300, 300), name='sentence_vectors')
    inp_headline_vector = Input(shape=(300,), name='headline_vector')
    conv1 = Conv1D(filters=16,kernel_size=3,strides=1,activation='relu', padding='same')(inp_sentence_vectors)
    conv2 = Conv1D(filters=32,kernel_size=3,strides=1,activation='relu')(conv1)
    gap = GlobalAveragePooling1D()(conv2)
    repeat = RepeatVector(50)(gap)
    lstm = LSTM(256,return_sequences=True)(repeat)
    timeDist = TimeDistributed(Dense(20000,activation='softmax'), name='headline_token_classes')(lstm)
    model = Model(inp_sentence_vectors,timeDist)
    return model
model = build_model()
model.compile(optimizer=optimizers.Adam(),loss='categorical_crossentropy',metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sentence_vectors (InputLayer (None, 1300, 300)         0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 1300, 16)          14416     
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 1298, 32)          1568      
_________________________________________________________________
global_average_pooling1d_1 ( (None, 32)                0         
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 50, 32)            0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 50, 256)           295936    
_________________________________________________________________
headline_token_classes (Time (None, 50, 20000)         5140000   
Total para

In [None]:
hist = model.fit_generator(tdg,steps_per_epoch=1,epochs=100)

Epoch 1/100
