# Import data to Neo4j 

## Import Environment Variables

In [22]:
from dotenv import load_dotenv
load_dotenv()

True

## Import Packages 

In [23]:
import os
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
from graphreader_agentic_rag.documents import Importer

## Instance Loader

In [24]:
loader = Importer()

### Text Import

This example shows how to import text data from Wikipedia and process it using the `Importer` class.

In [None]:
wikipedia = WikipediaQueryRun(
    api_wrapper=WikipediaAPIWrapper(doc_content_chars_max=10000)
)
text = wikipedia.run("Tetragonisca_angustula")

#print(text)

In [16]:
await loader.process_document(text=text, document_name="Tetragonisca_angustula")

Started extraction at: 2024-11-18 22:41:48.643418
Total text chunks: 1
Finished LLM extraction after: 0:00:28.686690
Finished import at: 0:00:30.184770


### Single File Import

This example shows how to import a single text file and process it using the `Importer` class.

In [None]:
wikipedia = WikipediaQueryRun(
    api_wrapper=WikipediaAPIWrapper(doc_content_chars_max=10000)
)
text = wikipedia.run("Melipona_quadrifasciata")

path = "input/text/"

# Save the text to a file
with open(os.path.join(path, "Melipona_quadrifasciata.txt"), "w") as f:
    f.write(text)

In [20]:
await loader.process_single_file(filepath=path, filename="Melipona_quadrifasciata.txt")

Processing Melipona_quadrifasciata
Started extraction at: 2024-11-18 22:44:38.466674
Total text chunks: 1
Finished LLM extraction after: 0:00:13.582610
Finished import at: 0:00:14.697455


### Directory Import

This example shows how to import all text files in a directory and process them using the `Importer` class.

You can also specify a separator for each chunk of text. 


In [27]:
path = "input/pages/"
await loader.process_all_files(filepath=path, separator = ["----------------------"])

Processing Western_honey_bee
Started extraction at: 2024-11-18 23:24:16.576298
Total text chunks: 13
Processing Stingless_bee
Started extraction at: 2024-11-18 23:24:16.577279
Total text chunks: 4
Finished LLM extraction after: 0:00:19.962224
Finished import at: 0:00:21.713879
Finished LLM extraction after: 0:00:23.484074
Finished import at: 0:00:25.594698
