# Create a Knowledge Graph from Text

## Task 1: Import Libraries

In [None]:
import wikipedia
import re
import requests
import spacy
import spacy_transformers
from spacy import displacy
from spacy.matcher import Matcher
import networkx as nx
from pyvis.network import Network

## Task 2: Load the Data

In [None]:
wikipedia.set_lang('en')
title = " 'Maharana Pratap' "
data = wikipedia.page(title).content
print(data)

## Task 3: Preprocess the Data

In [None]:
data = data.lower().replace('\n', "")
data = re.sub('== see also ==.*|[@#:&\"]|===.*?===|==.*?==|\(.*?\)', '', data)
print(data)

## Task 4: Recognize Named Entities

In [None]:
nlp = spacy.load('en_core_web_lg')
doc = nlp(data)

displacy.render(doc, style="ent", jupyter=True)

## Task 5: Compute Coreference Clusters

In [None]:
# nlp.add_pipe('coreferee')
doc = nlp(data)

doc._.coref_chains.print()

## Task 6: Resolve Coreferences

In [None]:
resolved_data = ""
for token in doc:
    resolved_coref = doc._.coref_chains.resolve(token)
    if resolved_coref:
        resolved_data += " " + " and ".join(r.text for r in resolved_coref)
    elif token.dep_ == "punct":
        resolved_data += token.text
    else:
        resolved_data += " " + token.text
print(resolved_data)

## Task 7: Extract Relationships

In [None]:
def extract_relationship(sentence):
    doc = nlp(sentence)

    first, last = None, None

    for chunk in doc.noun_chunks:
        if not first:
            first = chunk
        else:
            last = chunk
        
    if first and last:
        return (first.text.strip(), last.text.strip(), str(doc[first.end:last.start]).strip())

    return (None, None, None)

## Task 8: Create a Graph

In [None]:
#A helper function that prints 5 words per row. Can be used for better readability of a given text.
print_five_words = lambda sentence: '\n'.join(' '.join(sentence.split()[i:i+5]) for i in range(0, len(sentence.split()), 5))

In [None]:
graph_doc = nlp(resolved_data)

nx_graph = nx.DiGraph()

for sent in enumerate(graph_doc.sents):
    if len(sent[1]) > 3:
        (a, b, c) = extract_relationship(str(sent[1]))

        if a and b:
            nx_graph.add_node(a, size = 5)
            nx_graph.add_node(b, size = 5)
            nx_graph.add_edge(a, b, weight = 1, title = print_five_words(c), arrows="to")

g = Network(notebook=True, cdn_resources='in_line')
g.from_nx(nx_graph)
g.show("example.html")

## Task 9: List the Related Entities

In [None]:

print(nx_graph.edges(['pratap']))