In [None]:
# ruff: noqa: E501

%pip install llama-index-embeddings-openai llama-index-core llama-index-graph-stores-neo4j git+https://github.com/btimothy-har/fargs.git

In [None]:
from enum import Enum

# You can define your own claim and entity types. Generally, this is an effective way to constrain the LLM to look at specific domains of knowledge.

class EntityTypes(Enum):
    PERSON = "person"
    ORGANIZATION = "organization"
    INDUSTRY = "industry"
    LOCATION = "location"
    LANGUAGE = "language"
    CURRENCY = "currency"
    GEOPOLITICAL_ENTITY = "geopolitical_entity"
    NORP = "nationality_or_religious_or_political_group"
    POSITION = "position"
    LEGAL = "legal_documents_or_laws_or_treaties"
    ART = "work_of_art"
    PRODUCT_OR_SERVICE = "product_or_service"
    EVENT = "event"
    INFRASTRUCTURE = "infrastructure"

class ClaimTypes(Enum):
    FACT = "fact"
    OPINION = "opinion"
    PREDICTION = "prediction"
    HYPOTHESIS = "hypothesis"
    DENIAL = "denial"
    CONFIRMATION = "confirmation"
    ACCUSATION = "accusation"
    PROMISE = "promise"
    WARNING = "warning"
    ANNOUNCEMENT = "announcement"

In [None]:
import os

from llama_index.core.embeddings import OpenAIEmbedding
from llama_index.core.embeddings import OpenAIEmbeddingMode
from llama_index.core.node_parser import SemanticSplitterNodeParser
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore

# If needed, define your own embeddings and splitter strategy. Fargs defaults to using a TokenTextSplitter with chunk_size 1024 and chunk_overlap 256.
# In this example, we use Neo4j as a graph store, which is needed if you want to use embeddings in your Graph.

embeddings = OpenAIEmbedding(
    mode=OpenAIEmbeddingMode.TEXT_SEARCH_MODE,
    model="text-embedding-3-small",
    dimensions=1536,
    api_key=os.getenv("OPENAI_API_KEY"),
)

splitter = SemanticSplitterNodeParser(
    buffer_size=2,
    embed_model=embeddings,
    breakpoint_percentile_threshold=95,
)

graph_store = Neo4jPropertyGraphStore(
    username="neo4j",
    password="neo4j",
    url="bolt://localhost:7687",
    refresh_schema=False,
)

In [None]:
import asyncio

from llama_index.core.schema import Document

from fargs import Fargs

fargs = Fargs(
    project_name="my_graph_project",
    pre_split_strategy=splitter,
    embedding_strategy=embeddings,
    graph_store=graph_store,
    extraction_llm_model={
        "model": "gpt-4o",
        "temperature": 0,
    },
    summarization_llm_model={
        "model": "gpt-4o",
        "temperature": 0,
    },
    entity_types=EntityTypes,
    claim_types=ClaimTypes,
)

documents = [
    Document(text="Hello, world!"),
]

asyncio.run(fargs.ingest(documents))
