# Einstein Biography Knowledge Graph

This notebook demonstrates the creation of a knowledge graph using spaCy and NetworkX. The graph visualizes key entities and relationships from a short biography of Albert Einstein

### Importing necessary libraries.

In [None]:
import spacy
import networkx as nx
import matplotlib.pyplot as plt

### Loading and processing text with spaCy

In [None]:
# Load the spaCy English language model
nlp = spacy.load("en_core_web_sm")

# Text to analyze
text = "Born in the German Empire, Einstein moved to Switzerland in 1895, and at the age of seventeen he enrolled in the mathematics and physics teaching diploma program at the Swiss federal polytechnic school. In 1903, he secured a permanent position at the Swiss Patent Office."

# Process the text with spaCy
doc = nlp(text)

### Creating and populating a NetworkX graph with entities and relationships

In [None]:
# Create a networkx graph
G = nx.DiGraph()

# Add the main entity (Einstein) to the graph
G.add_node("Einstein", type="Person")

# Extract and add entities and relationships
for ent in doc.ents:
    # Add entity nodes
    G.add_node(ent.text, type=ent.label_)
    
    # Add edges between Einstein and other entities
    if ent.label_ in ["GPE", "ORG"]:
        G.add_edge("Einstein", ent.text, relation="associated_with")

# Add some specific relationships based on the text
G.add_edge("Einstein", "German Empire", relation="born_in")
G.add_edge("Einstein", "Switzerland", relation="moved_to")
G.add_edge("Einstein", "Swiss federal polytechnic school", relation="enrolled_in")
G.add_edge("Einstein", "Swiss Patent Office", relation="worked_at")

### Visualizing the graph using Matplotlib

In [None]:
# Visualize the graph
plt.figure(figsize=(10, 8))
pos = nx.spring_layout(G, seed=42)
node_colors = ['lightblue' if node == "Einstein" else 'lightgreen' for node in G.nodes()]
node_sizes = [2000 if node == "Einstein" else 1000 for node in G.nodes()]

nx.draw(G, pos, with_labels=True, node_color=node_colors, 
        node_size=node_sizes, font_size=8, 
        font_weight="bold", arrows=True)

# Draw edge labels
edge_labels = nx.get_edge_attributes(G, 'relation')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=6)

plt.title("Einstein Biography Knowledge Graph")
plt.tight_layout()
plt.show()

# Print out the graph details
print("Nodes:", list(G.nodes(data=True)))
print("\nEdges:", list(G.edges(data=True)))