In [1]:
import sys

sys.path.append('../')

import bz2
import csv, logging, pickle5 as pickle, pandas as pd, re, random
from dewiki.parser import Parser

from wikitextprocessor import Wtp
import seaborn as sns

import requests
import numpy as np
from numpy.linalg import norm
import pandas as pd
from sklearn.manifold import TSNE
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score

from dbpedia_spotlight import annotate_text
from neomodel import config, db

config.DATABASE_URL = 'neo4j://neo4j:l5IKrx07DGYdclK@151.106.35.64:7687'


import requests

In [2]:
import matplotlib.pyplot as plt
import os
import re
import shutil
import string
import tensorflow as tf
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score

from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers
from tensorflow.keras import losses
import tensorflow_transform as tft


from stellargraph.data import BiasedRandomWalk
from stellargraph import StellarGraph, IndexedArray
from gensim.models import Word2Vec

In [3]:
def get_labels(data):
    labels = []
    for d in data:
        labels.append(d['label']) if d['label'] not in labels else None

    return labels

def get_x_and_y(data):
    x, y = [], []
    for d in data:
        for dd in d['data']:
            tmp = dd['text'].replace('\n', '').replace('_', '')  # clean
            x.append({'label': d['label'], 'dbpedia_uri' : dd['dbpedia_uri'], 'text':tmp,'graph':dd['graph']}) if len(tmp) > 0 else None
            y.append(d['label']) if len(tmp) > 0 else None

    return x, y

def get_label_index(label):
    return [index for index, _label in enumerate(labels) if label == _label][0]


In [4]:
def get_graph(dbpedia_uri):
    query = """
MATCH(e:Entity{dbpedia_uri: $dbpedia_uri})-[rel]->(e2:Entity) 
MATCH(e2)-[rel2]->(e3:Entity)
return  distinct e.name,type(rel),e2.name,type(rel2),e3.name
"""
    query = """
MATCH(e:Entity{dbpedia_uri: $dbpedia_uri})-[rel]->(e2:Entity) 
return  distinct e.name,type(rel),e2.name
"""
    results, meta = db.cypher_query(query,{'dbpedia_uri': dbpedia_uri})
    
    return results

In [5]:
def get_graph_data(data):
    for data_ in data:
        print(f"Getting graphs for {data_['label']}")
        for d in data_['data']:
            # print(f"Getting graph for {d['name']}")
            # results = annotate_text(d['text'])
            graph_results = get_graph(d['dbpedia_uri'])
            # for annotation in results:
            #     graph_results.extend(get_graph(annotation['uri']))

            nodes, edges, edge_types, subjects = [], [],[], []
            for row in graph_results:
                if len(row) < 3: continue
                nodes.append(row[0]) if row[0] and row[0] not in nodes else None
                nodes.append(row[2]) if row[2] and row[2] not in nodes else None
                # nodes.append(row[4]) if row[4] and row[4] not in nodes else None


                if row[0] and row[2] and not any(e['source'] == row[0] and e['target'] == row[2] for e in edges):
                    edges.append({'source': row[0], 'target': row[2]})
                    edge_types.append(row[1])

                # if row[2] and row[4] and not any(e['source'] == row[2] and e['target'] == row[4] for e in edges):
                #     edges.append({'source': row[2], 'target': row[4]})
                #     edge_types.append(row[3])

            d['graph'] = {'nodes': nodes, 'edges': edges,'edge_types': edge_types}

    return data

In [6]:
def get_stellar_graph(graph):
    edges = pd.DataFrame({
        'source': [e['source'] for e in graph['edges']],
        'target': [e['target'] for e in graph['edges']],
        'type': graph['edge_types']
    })

    return StellarGraph(IndexedArray(index=graph['nodes']), edges, edge_type_column="type")


In [7]:
def get_embedding(G):
    walk_length = 10
    rw = BiasedRandomWalk(G)
    walks = rw.run(
        nodes=G.nodes(),  # root nodes
        length=walk_length,  # maximum length of a random walk
        n=10,  # number of random walks per root node
        p=0.5,  # Defines (unormalised) probability, 1/p, of returning to source node
        q=2.0,  # Defines (unormalised) probability, 1/q, for moving away from source node
        weighted=False,  # for weighted random walks
        seed=42,  # random seed fixed for reproducibility
    )

    model = Word2Vec(
        walks,  vector_size=100, window=5, min_count=0, sg=1, workers=1
    )

    return model.wv.vectors


In [8]:

def get_graph_embeddings_train():
    graph_embeddings_train = []
    for i, d in enumerate(x_train):
        print('%.2f%%' % ((i * 100) / len(x_train))) if i % 20 == 0 else None
        if len(d['graph']['nodes']) > 0:
            graph_embeddings_train.append(get_embedding(get_stellar_graph(d['graph'])))
        else:
            graph_embeddings_train.append(np.empty((0, 100)))

    with open('graph_embeddings_train.pkl', 'wb') as outp:
            pickle.dump(graph_embeddings_train, outp, pickle.HIGHEST_PROTOCOL)

    return graph_embeddings_train

def get_graph_embeddings_test():
    graph_embeddings_test = []
    for i,d in enumerate(x_test):
        print('%.2f%%' % ((i*100) / len(x_test))) if i % 100 == 0 else None
        if len(d['graph']['nodes']) > 0:
            graph_embeddings_test.append(get_embedding(get_stellar_graph(d['graph'])))
        else:
            graph_embeddings_test.append(np.empty((0,100)))

    with open('graph_embeddings_test.pkl', 'wb') as outp:
            pickle.dump(graph_embeddings_test, outp, pickle.HIGHEST_PROTOCOL)

    return graph_embeddings_test

def vectorize_text():
    max_features = 10000
    sequence_length = 100

    vectorize_layer = layers.TextVectorization(
        max_tokens=max_features,
        output_mode='int',
        output_sequence_length=sequence_length)

    vectorize_layer.adapt(x_train_texts)

    text_vectors_train = []
    for i, text in enumerate(x_train_texts):
        print('%.2f%%' % ((i * 100) / len(x_train_texts))) if i % 50 == 0 else None
        text_vectors_train.append(vectorize_layer(text))

    text_vectors_test = []
    for i, text in enumerate(x_test_texts):
        print('%.2f%%' % ((i * 100) / len(x_test_texts))) if i % 50 == 0 else None
        text_vectors_test.append(vectorize_layer(text))

    with open('text_vectors_train.pkl', 'wb') as outp:
            pickle.dump(text_vectors_train, outp, pickle.HIGHEST_PROTOCOL)

    with open('text_vectors_test.pkl', 'wb') as outp:
            pickle.dump(text_vectors_test, outp, pickle.HIGHEST_PROTOCOL)

    return text_vectors_train, text_vectors_test

def get_full_graph_feature_vectors(graph_embeddings,x):
    node_feature_index_map = {}
    index = 0
    full_graph_vector_length = 0
    for i1, xx in enumerate(x):
        for i2, node in enumerate(xx['graph']['nodes']):
            if not node in node_feature_index_map.keys():
                full_graph_vector_length += 1
                node_feature_index_map[node] = index
                index += 1

    graph_embeddings_full = []
    for i1, embedding in enumerate(graph_embeddings):
        print('%.2f%% - %.2f' % ((i1 * 100) / len(graph_embeddings),
                                 (
                                             len(graph_embeddings_full) * full_graph_vector_length * 4) / 1000000)) if i1 % 10 == 0 else None
        vectors = []
        for i2, node_embedding in enumerate(embedding):
            node = x[i1]['graph']['nodes'][i2]
            vector_index = node_feature_index_map[node]
            vectors.append((vector_index, node_embedding))

        sorted_vectors = sorted(vectors, key=lambda v: v[0])

        new_embedding = []
        for i2 in range(0, full_graph_vector_length):
            if i2 in [v[0] for v in sorted_vectors]:
                for val in [v[1] for v in sorted_vectors if v[0] == i2][0]:
                    new_embedding.append(val)
            else:
                new_embedding.extend([0 for i3 in range(0, graph_embedding_size)])
        graph_embeddings_full.append(np.array(new_embedding,dtype='float32'))

    return graph_embeddings_full


In [9]:
def compute_dense(vectors, vector_size=100):
    dense_graph_vectors = []
    
    for vector in vectors:
        num, new_vector, i1, sumOf = int(len(vector) / 100), [], 0, 0
        for i2, val in enumerate(vector):
            sumOf += val
            if i2 % num == 0:
                new_vector.append(sumOf / num)
                i1 = i2
        new_vector.append(sum(vector[i1:]) / (len(vector) - i1))
        dense_graph_vectors.append(new_vector)
    
    return dense_graph_vectors


### Load Data

In [45]:
with open(r'classification_data_with_graphs.pkl', 'rb') as pickle_file:
    data = pickle.load(pickle_file)

labels = get_labels(data)
x, y = get_x_and_y(data)  # starte

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
x_train_texts, x_test_texts = [xx['text'] for xx in x_train],  [yy['text'] for yy in x_test]
y_train_int = [get_label_index(label) for label in y_train]
# y_train_int = np.array(y_train_int, dtype='float64')


In [20]:
indexes = []
for i1,x in enumerate(x_train):
    for i2,node in enumerate(x['graph']['nodes']):
        indexes.append((i1,i2)) if node == 'London' else None
indexes

[(194, 3), (215, 4), (356, 3), (691, 2), (975, 3), (1067, 8), (1094, 7), (1108, 4), (1293, 3), (1536, 8), (1591, 3), (1937, 4), (2210, 8), (2249, 5), (2484, 4), (2748, 3), (3423, 3), (3701, 3), (4096, 3), (4403, 7), (4466, 2)]

In [None]:
# graph_embeddings_train = get_graph_embeddings_train()
# with open(r'graph_embeddings_train.pkl', 'rb') as pickle_file:
#     graph_embeddings_train = pickle.load(pickle_file)


In [22]:
with open(r'full_graph_feature_vector_train.pkl', 'rb') as pickle_file:
    graph_vectors_train = pickle.load(pickle_file)

In [None]:
# graph_embeddings_test = get_graph_embeddings_test()
# full_graph_feature_vector_test = get_full_graph_feature_vectors(graph_embeddings_test,x_test)


In [24]:
with open(r'full_graph_feature_vector_test.pkl', 'rb') as pickle_file:
    graph_vectors_test = pickle.load(pickle_file)

In [25]:
# text_vectors_train,text_vectors_test = vectorize_text()
with open(r'text_vectors_train.pkl', 'rb') as pickle_file:
    text_vectors_train = pickle.load(pickle_file)
with open(r'text_vectors_test.pkl', 'rb') as pickle_file:
    text_vectors_test = pickle.load(pickle_file)

In [26]:
[g for g in graph_vectors_train[0] if g != 0]

[0.9279607, 0.0761339, 0.6826504, 0.06821077, 0.5494435, 0.9300635, -0.8792705, -0.6854978, -0.47228694]

In [28]:
graph_embedding_size = 1
emb = [get_embedding(get_stellar_graph(x_train[0]['graph']))]
vectors = get_full_graph_feature_vectors(emb, x_train)
# [v for v in vectors[0] if v != 0]

0.00% - 0.00


### Data Cleaning and Concatenation

In [29]:
# Clean data
indexes = [i for i,d in enumerate(text_vectors_train) if len(d) == 0]
text_vectors_train_cleaned = [d for i,d in enumerate(text_vectors_train) if i not in indexes]
y_train_int_cleaned = [d for i,d in enumerate(y_train_int) if i not in indexes]
graph_vectors_train_cleaned = [d for i,d in enumerate(graph_vectors_train) if i not in indexes]

indexes = [i for i,d in enumerate(text_vectors_test) if len(d) == 0]
text_vectors_test_cleaned = [d for i,d in enumerate(text_vectors_test) if i not in indexes]
y_test_cleaned = [d for i,d in enumerate(y_test) if i not in indexes]
graph_vectors_test_cleaned = [d for i,d in enumerate(graph_vectors_test) if i not in indexes]


In [30]:
[g for g in graph_vectors_train[2] if g != 0]

[-1.1950477, -1.0818676, -1.2879734, -1.2654288, -1.2473239, -1.252564, -1.0313338, -1.3109224, -1.1200327, -1.2203186, -1.1087509, -1.1498816, -1.1955401, -0.9916061, -1.41463, -1.2499546, -1.1939098, -1.1418176, -1.2801851, -1.3134094, -1.2746111, -1.1993598, -1.159023, -1.3023642, -1.1537172, -1.3462013, -1.1690928, -1.0903003, -1.3188549, -1.2187461, -1.0963854, -1.1281712, -1.2315053, -1.1119274, -1.2049334, -1.2389414, -1.0587769, -1.274655, -1.2407218, -1.1706921, -1.2449642, -1.1370555, -1.2245362, -1.1958774, -1.2333192, -1.3160295, -1.1847967, -1.374695, -1.274093, -1.1012592, -1.3403703, -1.1909267, -1.0684329, -1.1135222, -1.2163643, -1.1608855, -1.1382778, -1.2479419, -1.1553657, -1.2225012, -1.1219006]

In [93]:
dense_graph_vectors_train = compute_dense(graph_vectors_train)
dense_graph_vectors_test = compute_dense(graph_vectors_test)
            

In [32]:
dense_graph_vectors_train = [np.array(v,dtype='float32') for v in dense_graph_vectors_train]
dense_graph_vectors_test = [np.array(v,dtype='float32') for v in dense_graph_vectors_test]

In [77]:
indexes = [i for i,d in enumerate(x_train) if d['label'] == 'Culture']
indexes

[86, 225, 265, 316, 452, 474, 548, 834, 843, 910, 1003, 1004, 1081, 1190, 1253, 1285, 1348, 1537, 1565, 1765, 1825, 1920, 1930, 1939, 2009, 2058, 2095, 2120, 2191, 2221, 2275, 2284, 2339, 2468, 2606, 2701, 2745, 2870, 2876, 3047, 3051, 3076, 3085, 3162, 3205, 3257, 3275, 3453, 3472, 3512, 3546, 3561, 3563, 3583, 3586, 3601, 3606, 3667, 3687, 3718, 3758, 3790, 3794, 3859, 3878, 3908, 4020, 4042, 4058, 4176, 4337, 4470]

In [35]:
import random
indexes = [random.randint(0,len(x_train)) for x in range(0,100)]
# indexes = [i for i in range(0,len(x_train) - 100)]


In [38]:
sums, c, avg = 0, 0, 0
for i1 in range(0,len(indexes)):
    A = dense_graph_vectors_train[indexes[i1]]
    for i2 in range(i1,len(indexes)):
        B = dense_graph_vectors_train[indexes[i2]]
        if norm(A) == 0 or norm(B) == 0: continue
        cosine = np.dot(A,B)/(norm(A)*norm(B))
        sums += cosine
        c += 1

sums / c

0.11978959224107659

In [104]:
A = dense_graph_vectors_train[3908]
B = dense_graph_vectors_train[4450]
np.dot(A,B)/(norm(A)*norm(B))

0.8297882246213256

In [105]:
A = graph_vectors_train[3908]
B = graph_vectors_train[4450]
np.dot(A,B)/(norm(A)*norm(B))

0.0

In [91]:
x_train[4470]



[0.0, 0.28365384615384615, 2.5865384615384617, 2.5865384615384617, 2.5865384615384617, 2.5865384615384617, 2.5865384615384617, 2.5865384615384617, 2.5865384615384617, 11.283653846153847, 11.283653846153847, 11.283653846153847, 11.283653846153847, 11.283653846153847, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846153846, 24.96153846

In [99]:
cosine = np.dot(A,B)/(norm(A)*norm(B))
print("Cosine Similarity:", cosine)

Cosine Similarity: 0.78641605


In [73]:
# Apply t-SNE transformation on node embeddings
tsne = TSNE(n_components=2, random_state=42)
node_embeddings_2d = tsne.fit_transform(dense_graph_vectors_train)

# draw the points
alpha = 0.7

plt.figure(figsize=(10, 8))
plt.scatter(
    node_embeddings_2d[:, 0],
    node_embeddings_2d[:, 1],
    # c=node_targets.cat.codes,
    cmap="jet",
    alpha=0.7,
)
plt.show()

In [63]:
x_train_np = np.array(text_vectors_train_cleaned, dtype='float64')
y_train_np = np.array(y_train_int_cleaned, dtype='float64')
x_test_np = np.array(text_vectors_test_cleaned, dtype='float64')
y_test_np = np.array(y_test_cleaned, dtype='str')

In [None]:
# rs = tf.reshape(train_graph_tensors[1],shape=(train_graph_tensors[1].shape[0]*train_graph_tensors[1].shape[1],))
# casted = tf.cast(text_vectors_train_cleaned[0], tf.float32)
# tf.concat([rs,casted], axis=0)

In [None]:
# graph_tensor = tf.convert_to_tensor(full_graph_feature_vector_train[0])
# text_tensor = text_vectors_train_cleaned[0]
# tf.concat([graph_tensor,tf.cast(text_tensor, tf.float32)], axis=0)

In [56]:
kg_tensors_train = []
for graph_embedding, text_tensor in zip(dense_graph_vectors_train, text_vectors_train_cleaned):
    graph_tensor = tf.convert_to_tensor(graph_embedding)
    # reshaped_graph_tensor = tf.reshape(graph_tensor,shape=(graph_tensor.shape[0]*graph_tensor.shape[1],))
    kg_tensors_train.append(tf.concat([graph_tensor,tf.cast(text_tensor, tf.float32)], axis=0))

In [57]:
kg_tensors_test = []
for graph_embedding, text_tensor in zip(dense_graph_vectors_test, text_vectors_test_cleaned):
    graph_tensor = tf.convert_to_tensor(graph_embedding)
    # reshaped_graph_tensor = tf.reshape(graph_tensor,shape=(graph_tensor.shape[0]*graph_tensor.shape[1],))
    kg_tensors_test.append(tf.concat([graph_tensor,tf.cast(text_tensor, tf.float32)], axis=0))

In [58]:
x_train_kg_np = np.array(kg_tensors_train, dtype='float64')
x_test_kg_np = np.array(kg_tensors_test, dtype='float64')


In [60]:
len(x_train_kg_np[0])

202

### Model Training

In [61]:
MAX_TOKENS_NUM = 5000  # Maximum vocab size.
MAX_SEQUENCE_LEN = 40  # Sequence length to pad the outputs to.
EMBEDDING_DIMS = 100

model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Embedding(MAX_TOKENS_NUM + 1, EMBEDDING_DIMS))
# model.add(layers.Dropout(0.1))
model.add(layers.GlobalAveragePooling1D())
# model.add(layers.Dropout(0.1))
model.add(layers.Dense(len(labels)))
model.summary()
model.compile(loss=losses.SparseCategoricalCrossentropy(from_logits=True),
              optimizer='adam',
              metrics=tf.metrics.SparseCategoricalAccuracy())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 100)         500100    
_________________________________________________________________
global_average_pooling1d (Gl (None, 100)               0         
_________________________________________________________________
dense (Dense)                (None, 78)                7878      
Total params: 507,978
Trainable params: 507,978
Non-trainable params: 0
_________________________________________________________________


In [24]:
history = model.fit(x_train_np,y_train_np, epochs=10, verbose=2)

Epoch 1/10
141/141 - 1s - loss: 4.3169 - sparse_categorical_accuracy: 0.0964
Epoch 2/10
141/141 - 0s - loss: 4.1606 - sparse_categorical_accuracy: 0.1311
Epoch 3/10
141/141 - 0s - loss: 3.9844 - sparse_categorical_accuracy: 0.1790
Epoch 4/10
141/141 - 0s - loss: 3.7823 - sparse_categorical_accuracy: 0.2952
Epoch 5/10
141/141 - 0s - loss: 3.5407 - sparse_categorical_accuracy: 0.3818
Epoch 6/10
141/141 - 0s - loss: 3.2848 - sparse_categorical_accuracy: 0.4325
Epoch 7/10
141/141 - 0s - loss: 3.0279 - sparse_categorical_accuracy: 0.5069
Epoch 8/10
141/141 - 0s - loss: 2.7782 - sparse_categorical_accuracy: 0.5610
Epoch 9/10
141/141 - 0s - loss: 2.5447 - sparse_categorical_accuracy: 0.5931
Epoch 10/10
141/141 - 0s - loss: 2.3280 - sparse_categorical_accuracy: 0.6327


In [64]:
history = model.fit(x_train_kg_np,y_train_np, epochs=10, verbose=2)

Epoch 1/10
141/141 - 1s - loss: 4.3191 - sparse_categorical_accuracy: 0.0657
Epoch 2/10
141/141 - 0s - loss: 4.1658 - sparse_categorical_accuracy: 0.0984
Epoch 3/10
141/141 - 0s - loss: 3.9842 - sparse_categorical_accuracy: 0.1160
Epoch 4/10
141/141 - 0s - loss: 3.8143 - sparse_categorical_accuracy: 0.1585
Epoch 5/10
141/141 - 0s - loss: 3.6654 - sparse_categorical_accuracy: 0.2050
Epoch 6/10
141/141 - 0s - loss: 3.5162 - sparse_categorical_accuracy: 0.2718
Epoch 7/10
141/141 - 0s - loss: 3.3615 - sparse_categorical_accuracy: 0.3232
Epoch 8/10
141/141 - 0s - loss: 3.2041 - sparse_categorical_accuracy: 0.3667
Epoch 9/10
141/141 - 0s - loss: 3.0486 - sparse_categorical_accuracy: 0.4047
Epoch 10/10
141/141 - 0s - loss: 2.8952 - sparse_categorical_accuracy: 0.4528


In [100]:
plt.plot(history.history['loss'])

[<matplotlib.lines.Line2D object at 0x7f2b18fe2be0>]

### Predict

In [65]:
results = model.predict(x_test_kg_np)

In [None]:
# results = model.predict(x_test_np)

In [67]:
def get_result_labels(results):
    return [labels[np.where(row==max(row))[0][0]] for row in results]

result_labels = get_result_labels(results)

In [68]:
len(results)

2209

In [69]:
print('Accuracy score: %.2f' % accuracy_score(result_labels, y_test_np))
print(classification_report(y_test_np, result_labels))


Accuracy score: 0.28


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                              precision    recall  f1-score   support

                Architecture       0.69      0.50      0.58        36
      Architecture-Structure       0.22      0.67      0.33        27
                  Art-Cinema       0.06      0.07      0.07        29
            Art-Cinema-Actor       0.11      0.12      0.12        32
                   Art-Dance       0.35      0.56      0.43        25
            Art-Dance-Dancer       0.18      0.22      0.20        32
                 Art-Fashion       0.00      0.00      0.00        33
        Art-Fashion-Designer       0.21      0.21      0.21        34
           Art-Fashion-Model       0.00      0.00      0.00        14
              Art-Literature       0.21      0.34      0.26        32
       Art-Literature-Writer       0.40      0.11      0.18        35
                   Art-Music       0.16      0.23      0.19        39
        Art-Music-Instrument       0.12      0.15      0.14        33
                Art