# Imports

In [1]:
import nltk
import pandas as pd
import numpy as np
import time
import tensorflow as tf
from gensim.models import KeyedVectors
from gensim.scripts.glove2word2vec import glove2word2vec
from nltk.corpus import stopwords
from nltk import word_tokenize, pos_tag
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer
from tensorflow import keras
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Input,GRU,LSTM,Dense,Conv2D,AveragePooling1D,TimeDistributed,Flatten,MaxPooling2D,MaxPooling1D,Convolution1D,Reshape,Dropout,Embedding,Permute,Lambda,Multiply
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.models import Model 
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
print(tf.__version__)

2.4.0


# Data preparation

### Utilize glove to be the initial word representation

In [3]:
input_file = r'glove.6B.50d.txt'
output_file = r'gensim_glove.6B.50d.txt'
glove2word2vec(input_file, output_file)

(400000, 50)

### Glove model

In [4]:
model = KeyedVectors.load_word2vec_format(output_file, binary=False)

### Read the data

In [5]:
f = pd.read_csv("data_place_of_birth.csv")

In [6]:
f = f[:1000]

In [7]:
f[f['label']==1]

Unnamed: 0,sentence,wikidata,label
6,"lin was born in houguan , which is around pres...",place of birth debrzno,1
16,born in the village of ibogun-olaogun to a far...,place of birth prague,1
18,paul hintze was born in 1864 in the little tow...,place of birth prague,1
28,chen yang is a chinese tv and radio personalit...,place of birth daegu,1
30,montgomery was born at irvine in ayrshire in s...,place of birth allendale,1
...,...,...,...
975,"gauthier grumier born 29 may 1984, in nevers i...",place of birth san severino marche,1
976,"stephen heller was born in pest now budapest, ...",place of birth suzhou,1
983,"fukuda was born on february 4, 1932 in tokyo t...",place of birth hexham,1
988,"matteo salvini was born in milan in 1973, the ...",place of birth funabashi,1


In [8]:
plain = f["sentence"].tolist()
wikid = f["wikidata"].tolist()

### drop the stopwords for sentences and wikidata

In [9]:
nltk.download("stopwords")
EngStopWords = set(stopwords.words("english"))

drop_stop = []
for p in range(0,len(plain)):
    j = []
    lower = plain[p].lower()
    for word in lower.split():
        if word in EngStopWords:
            pass
        else:
            j.append(word)
    
    d = j[0]
    for i in range(1,len(j)):
        d = d + " " + j[i]
    drop_stop.append(d)
    
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [10]:
wikidata = []
for p in range(0,len(wikid)):
    j = []
    lower = wikid[p].lower()
    for word in lower.split():
        if word in EngStopWords:
            pass
        else:
            j.append(word)
    
    d = j[0]
    for i in range(1,len(j)):
        d = d + " " + j[i]
    
    wikidata.append(d)

### Do the stemming

In [11]:
def get_wordnet_pos(tag):
    if tag.startswith('J'):
        return wordnet.ADJ
    elif tag.startswith('V'):
        return wordnet.VERB
    elif tag.startswith('N'):
        return wordnet.NOUN
    elif tag.startswith('R'):
        return wordnet.ADV
    else:
        return None
    
plain=[]

for i in range(0,len(drop_stop)):
    tokens = word_tokenize(drop_stop[i])  
    tagged_sent = nltk.pos_tag(tokens)    
    wnl = WordNetLemmatizer()
    lemmas_sent = []
    for tag in tagged_sent:
        wordnet_pos = get_wordnet_pos(tag[1]) or wordnet.NOUN
        lemmas_sent.append(wnl.lemmatize(tag[0], pos=wordnet_pos)) 
    delimiter = ' '
    ff = delimiter.join(lemmas_sent)
    plain.append(ff)

### Let wikidata and sentence to have their GloVe word representation

In [12]:
wl = len(wikidata)
sl = len(plain)
plain_e = []
wikidata_e = []

for i in range(len(wikidata)):
    a = wikidata[i].split()
    w = []
    for j in range(len(a)):
        try:
            w.append(model[a[j]].tolist())
        except:
            w.append([0]*50)
    if len(w) > wl:
        w = w[0:wl]
    else:
        for k in range(wl-len(w)):
            w.append([0]*50)
    wikidata_e.append(w)
    
for i in range(len(plain)):
    a = plain[i].split()
    w = []
    for j in range(len(a)):
        try:
            w.append(model[a[j]].tolist())
        except:
            w.append([0]*50)
    if len(w) > sl:
        w = w[0:sl]
    else:
        for k in range(sl-len(w)):
            w.append([0]*50)
    plain_e.append(w)

# Models

In [13]:
from tensorflow.keras import backend as k
from tensorflow.keras.layers import Layer
tf.compat.v1.disable_eager_execution()

### NSMN

In [14]:
class nsmnattention(Layer):

    def __init__(self, output_dim, **kwargs):
        self.output_dim = output_dim
        super(nsmnattention, self).__init__(**kwargs)

    def build(self, input_shape):
        
        self.kernelW = self.add_weight(name='Wall', 
                                      shape=(10, 10),
                                      initializer='uniform',
                                      trainable=False)
        self.kernelWs = self.add_weight(name='Ws', 
                                      shape=(wl,wl),
                                      initializer='uniform',
                                      trainable=False)
        self.kernelWc = self.add_weight(name='Wc', 
                                      shape=(sl,sl),
                                      initializer='uniform',
                                      trainable=False)
        self.kernelas = self.add_weight(name='Was', 
                                      shape=(10,1),
                                      initializer='uniform',
                                      trainable=False)
        self.kernelac = self.add_weight(name='Wac', 
                                      shape=(10,1),
                                      initializer='uniform',
                                      trainable=False)
        super(nsmnattention, self).build(input_shape)  


    def call(self, x):
        
        U = Permute((2,1))(x[0])
        V = Permute((2,1))(x[1])
        print("U.shape",U.shape)
        print("V.shape",V.shape)
        
        E = k.batch_dot(Permute((2,1))(U),V)
        
        print("E.shape",E.shape)
        
        U1 = k.batch_dot(V,Permute((2,1))((E)))     
        
        V1 = k.batch_dot(U,E)

        U = Permute((2,1))(U)
        U1 = Permute((2,1))(U1)
        V = Permute((2,1))(V)
        V1 = Permute((2,1))(V1)
        S = Permute((2,1))((tf.keras.layers.concatenate([U,U1,(U-U1),Multiply()([U,U1])])))
        T = Permute((2,1))((tf.keras.layers.concatenate([V,V1,(V-V1),Multiply()([V,V1])])))
        print("S.shape",S.shape)
        print("T.shape",T.shape)
                        
        P = LSTM(10,return_sequences=True)(S)
        Q = LSTM(10,return_sequences=True)(T)
        print("P.shape",P.shape)
        print("Q.shape",Q.shape)
                  
        p = MaxPooling1D((40))(P)
        q = MaxPooling1D((40))(Q)
        
        print("p.shape",p.shape)
        print("q.shape",q.shape)
        
        m = tf.keras.layers.concatenate([p,q,(p-q),Multiply()([p,q])])
        print("m.shape",m.shape)
        print('')
        
        return m

    def compute_output_shape(self, input_shape):
        return (None, 40)

### co-attention

In [15]:
class coattention(Layer):

    def __init__(self, output_dim, **kwargs):
        self.output_dim = output_dim
        super(coattention, self).__init__(**kwargs)

    def build(self, input_shape):
        
        
        self.kernelW = self.add_weight(name='Wall', 
                                      shape=(10, 10),
                                      initializer='uniform',
                                      trainable=True)
        self.kernelWs = self.add_weight(name='Ws', 
                                      shape=(wl,wl),
                                      initializer='uniform',
                                      trainable=True)
        self.kernelWc = self.add_weight(name='Wc', 
                                      shape=(sl,sl),
                                      initializer='uniform',
                                      trainable=True)
        self.kernelas = self.add_weight(name='Was', 
                                      shape=(10,1),
                                      initializer='uniform',
                                      trainable=True)
        self.kernelac = self.add_weight(name='Wac', 
                                      shape=(10,1),
                                      initializer='uniform',
                                      trainable=True)
        super(coattention, self).build(input_shape)  


    def call(self, x):
        C = x[0]
       
        print("C.shape",C.shape)
        RNN=Permute((2,1))(x[1])
        
        f = k.dot(C,self.kernelW)
        print("f.shape",f.shape)
        F = k.tanh(k.batch_dot(f,RNN))
        print("F.shape",F.shape)
        
        s = k.dot(RNN,self.kernelWs)
        print("s.shape",s.shape)
        c = k.dot(Permute((2,1))(C),self.kernelWc)
        print("c.shape",c.shape)
       
        Hs = k.tanh(s+k.batch_dot(c,F))
        print("Hs.shape",Hs.shape)
        Hc = k.tanh(c+k.batch_dot(s,Permute((2,1))(F)))
        print("Hc.shape",Hc.shape)
        
        
        As = k.softmax(k.dot(Permute((2,1))(Hs),self.kernelas))
        print("As.shape",As.shape)
        Ac = k.softmax(k.dot(Permute((2,1))(Hc),self.kernelac))
        print("Ac.shape",Ac.shape)
        
        As = Permute((2,1))(As)
        print("As.shape",As.shape)
        Ac = Permute((2,1))(Ac)
        print("Ac.shape",Ac.shape)
        
        sfinal = k.batch_dot(As,Permute((2,1))(RNN))
        print("sfinal.shape",sfinal.shape)
        
        cfinal = k.batch_dot(Ac,C)
        print("cfinal.shape",cfinal.shape)
        print('')
        
        return tf.keras.layers.concatenate([sfinal,cfinal])

    def compute_output_shape(self, input_shape):
        return (None, 20)

# Train the model

In [16]:
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical

In [17]:
y = f["label"].tolist()

X_train, X_test, y_train, y_test = train_test_split(plain_e, y , test_size=0.2, random_state=1000)
X_train_1, X_test_1, y_train, y_test = train_test_split(wikidata_e, y , test_size=0.2, random_state=1000)

y_train = to_categorical(y_train,2)
y_train = y_train.astype('int')
y_train = y_train.reshape(-1, 1, 2)
y_test = to_categorical(y_test,2)
y_test = y_test.astype('int')
y_test = y_test.reshape(-1, 1, 2)

wl = len(wikidata)
sl = len(plain)

In [18]:
winput = Input(shape=(sl,50))
wembed = LSTM(10,return_sequences=True)(winput)

winput_1 = Input(shape=(wl,50))
wembed_1 = LSTM(10,return_sequences=True)(winput_1)



In [19]:
co = nsmnattention(40)([wembed, wembed_1])
co = Dense(2)(co)
coc = coattention(20)([wembed, wembed_1])
coc = Dense(2)(coc)
c = tf.keras.layers.concatenate([co, coc])
output = Dense(2)(c)
output = Dense(2, activation="softmax")(output)

model = Model([winput, winput_1], [output])
model.summary()

RMSprop = tf.keras.optimizers.Adam(lr=0.01)
model.compile(optimizer=RMSprop, loss="categorical_crossentropy", metrics=[tf.keras.metrics.Accuracy()], experimental_run_tf_function=False)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=2)

history = model.fit([np.array(X_train), np.array(X_train_1)], [np.array(y_train)],
                  epochs=20, validation_split=0.2, callbacks=[early_stopping], batch_size=64)

scores = model.evaluate([np.array(X_test), np.array(X_test_1)], np.array(y_test), verbose=0)
pre = model.predict([np.array(X_test), np.array(X_test_1)])
print(scores)

U.shape (None, 10, 1000)
V.shape (None, 10, 1000)
E.shape (None, 1000, 1000)
S.shape (None, 40, 1000)
T.shape (None, 40, 1000)
P.shape (None, 40, 10)
Q.shape (None, 40, 10)
p.shape (None, 1, 10)
q.shape (None, 1, 10)
m.shape (None, 1, 40)

C.shape (None, 1000, 10)
f.shape (None, 1000, 10)
F.shape (None, 1000, 1000)
s.shape (None, 10, 1000)
c.shape (None, 10, 1000)
Hs.shape (None, 10, 1000)
Hc.shape (None, 10, 1000)
As.shape (None, 1000, 1)
Ac.shape (None, 1000, 1)
As.shape (None, 1, 1000)
Ac.shape (None, 1, 1000)
sfinal.shape (None, 1, 10)
cfinal.shape (None, 1, 10)

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 1000, 50)]   0                                            
__________________________________________________________________________________________________
input_2 (InputL



Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 00012: early stopping
[0.6449865627288819, 0.0]


# Evaluation metrics

In [20]:
from sklearn.metrics import accuracy_score, average_precision_score,precision_score,f1_score,recall_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

y_pre = []

for i in range(len(pre)):
    k = pre[i]
    w = np.where(k == np.max(k))[0][0].tolist()
    y_pre.append(w)

    
y = f["label"].tolist()

X_train, X_test, y_train, y_test = train_test_split(plain_e, y, test_size=0.2, random_state=1000)
X_train_1, X_test_1, y_train, y_test = train_test_split(wikidata_e, y, test_size=0.2, random_state=1000)

print(confusion_matrix(y_test, y_pre))

print('Weighted precision', precision_score(y_test, y_pre, labels=[1], average='macro'))
print('Weighted recall', recall_score(y_test, y_pre, labels=[1], average='macro'))
print('Weighted f1-score', f1_score(y_test, y_pre, labels=[1], average='macro'))

[[134   0]
 [ 66   0]]
Weighted precision 0.0
Weighted recall 0.0
Weighted f1-score 0.0


  _warn_prf(average, modifier, msg_start, len(result))


# Calculate the precision@50

In [21]:
a = set(np.argsort(np.array(y_pre)).tolist()[len(y_test) - 50:len(y_test)])
a = list(a)
p = []

for i in range(50):
    g = a[i]
    p.append(y_test[g])
    
pre50 = np.sum(p) / 50

print(pre50)

0.28


# Draft Area Below