In [19]:
!pip3 install llama-index
!pip3 install llama-index-core
!pip3 install llama-index-embeddings-ollama
!pip3 install llama-index-llms-ollama
!pip3 install llama-index-graph-stores-neo4j
!pip3 install llama-parse


Collecting llama-index-graph-stores-neo4j
  Downloading llama_index_graph_stores_neo4j-0.2.5-py3-none-any.whl.metadata (697 bytes)
Downloading llama_index_graph_stores_neo4j-0.2.5-py3-none-any.whl (11 kB)
Installing collected packages: llama-index-graph-stores-neo4j
Successfully installed llama-index-graph-stores-neo4j-0.2.5


In [3]:
# llama-parse is async-first, running the async code in a notebook requires the use of nest_asyncio
import nest_asyncio
nest_asyncio.apply()
import os

# Using OpenAI API for embeddings/llms
os.environ["OPENAI_API_KEY"] = "N.A. we use ollama"

In [13]:
# Just runs .complete to make sure the LLM is listening
from llama_index.llms.ollama import Ollama
from llama_index.core import Settings

llm = Ollama(model="llama3", base_url="http://192.168.1.102:11434", request_timeout=300.0, temperature=0.0)

response = llm.complete("Who is Laurie Voss? write in 10 words")
print(response)

Laurie Voss: Co-Founder and CTO of npm, JavaScript expert.


In [None]:
from llama_index.embeddings.ollama import OllamaEmbedding

ollama_embedding = OllamaEmbedding(
    model_name="mxbai-embed-large",
    base_url="http://192.168.1.102:11434",
    ollama_additional_kwargs={"mirostat": 0},
)

pass_embedding = ollama_embedding.get_text_embedding_batch(
    ["This is a passage!", "This is another passage"], show_progress=True
)
print(pass_embedding)

query_embedding = ollama_embedding.get_query_embedding("Where is blue?")
print(query_embedding)

In [7]:
from llama_index.graph_stores.neo4j import Neo4jPGStore

username="neo4j"
password="abc123abc123"
url="bolt://neo4j-1:7687"

graph_store = Neo4jPGStore(
    username=username,
    password=password,
    url=url,
)

In [None]:
from llama_index.core.node_parser import SimpleFileNodeParser
from llama_index.readers.file import FlatReader
from pathlib import Path

documents = FlatReader().load_data(Path("/data-transfer/iihf/rulebook.md"))

parser = SimpleFileNodeParser()
md_nodes = parser.get_nodes_from_documents(documents)
print (len(documents))
print (documents)

In [9]:
from typing import Literal
from llama_index.core.indices.property_graph import SchemaLLMPathExtractor

# best practice to use upper-case
entities = Literal["PERSON", "LOCATION", "ORGANIZATION", "EQUIPMENT", "TEAM"]
relations = Literal[
    "IS_PLAYER",
    "IS_GOALKEEPER",
]

In [10]:
# define which entities can have which relations
validation_schema = {
    "Person": ["WORKS_AT", "BOARD_MEMBER", "CEO", "HAS_EVENT"],
    "Organization": [
        "SUPPLIER_OF",
        "COMPETITOR",
        "PARTNERSHIP",
        "ACQUISITION",
        "WORKS_AT",
        "SUBSIDIARY",
        "BOARD_MEMBER",
        "CEO",
        "PROVIDES",
        "HAS_EVENT",
        "IN_LOCATION",
    ],
    "Product": ["PROVIDES"],
    "Event": ["HAS_EVENT", "IN_LOCATION"],
    "Location": ["HAPPENED_AT", "IN_LOCATION"],
}

In [14]:


from llama_index.core import PropertyGraphIndex

kg_extractor = SchemaLLMPathExtractor(
    llm=llm,
    possible_entities=entities,
    possible_relations=relations,
    kg_validation_schema=validation_schema,
    # if false, allows for values outside of the schema
    # useful for using the schema as a suggestion
    strict=True,
)

NUMBER_OF_ARTICLES = 250

index = PropertyGraphIndex.from_documents(
    documents[:1],
    kg_extractors=[kg_extractor],
    llm=llm,
    embed_model=ollama_embedding,
    property_graph_store=graph_store,
    show_progress=True,
)

Parsing nodes:   0%|          | 0/1 [00:00<?, ?it/s]


Extracting paths from text with schema:   0%|          | 0/31 [00:00<?, ?it/s][A
Extracting paths from text with schema:   3%|▎         | 1/31 [02:08<1:04:02, 128.07s/it][A
Extracting paths from text with schema:   6%|▋         | 2/31 [02:35<33:09, 68.59s/it]   [A
Extracting paths from text with schema:  10%|▉         | 3/31 [02:36<17:37, 37.75s/it][A
Extracting paths from text with schema:  13%|█▎        | 4/31 [03:05<15:35, 34.63s/it][A
Extracting paths from text with schema:  16%|█▌        | 5/31 [04:26<22:11, 51.21s/it][A
Extracting paths from text with schema:  19%|█▉        | 6/31 [04:35<15:23, 36.96s/it][A
Extracting paths from text with schema:  23%|██▎       | 7/31 [05:13<14:54, 37.27s/it][A
Extracting paths from text with schema:  26%|██▌       | 8/31 [06:07<16:18, 42.54s/it][A
Extracting paths from text with schema:  29%|██▉       | 9/31 [06:47<15:16, 41.65s/it][A
Extracting paths from text with schema:  32%|███▏      | 10/31 [07:17<13:18, 38.01s/it][A
Extracting