# Shared Imports, Configuration and Helpers

Common libraries and configurations for the other notebooks.

## imports, constants, config

In [2]:
%load_ext dotenv
%dotenv -ov ../.env

In [None]:
cat ../.env

In [4]:
# import python libraries
import os

# Common data processing
import json
import textwrap

# import hash functionality
import hashlib

# Neo4j driver
from neo4j import GraphDatabase

# Langchain
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders import TextLoader
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import MarkdownTextSplitter
from langchain_text_splitters import MarkdownHeaderTextSplitter
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains import RetrievalQA

# Lancgchain for Ollama (for hosting a local LLM)
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.chat_models import ChatOllama

# Langchain for OpenAI (which requires an API key and internet connection)
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings

# Use Langchain Hub (to get prompts)
from langchain import hub

# Load from environment
NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
NEO4J_DATABASE = os.getenv('NEO4J_DATABASE') or 'neo4j'

# Global constants
VECTOR_INDEX_NAME = 'chunks_vector'
VECTOR_NODE_LABEL = 'Chunk'
VECTOR_SOURCE_PROPERTY = 'text'
VECTOR_EMBEDDING_PROPERTY = 'embedding'

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
EMBEDDING_API = os.getenv('EMBEDDING_API') or 'openai'
EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL') or 'text-embedding-ada-002'
CHAT_API = os.getenv('CHAT_API') or 'openai'
CHAT_MODEL = os.getenv('CHAT_MODEL') or 'gpt-3.5-turbo'
OLLAMA_URL = os.getenv('OLLAMA_URL')

# Data directory for Form 10-K json files and Form 13 csv file
#ROOT_DIR = os.path.dirname(os.path.abspath(os.path.join(os.getcwd(), '..')))
#DATA_DIR = f"{ROOT_DIR}/{os.getenv('DATA_DIR') or 'data/single'}"

print(f"Connecting to Neo4j at {NEO4J_URI} as {NEO4J_USERNAME}")
#print(f"Using data from {DATA_DIR}")
print(f"Embedding with {EMBEDDING_API} using {EMBEDDING_MODEL}")
print(f"Chatting with {CHAT_API} using {CHAT_MODEL}")

gdb = GraphDatabase.driver(uri=NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
graph = Neo4jGraph()

Connecting to Neo4j at bolt://neo4j-1:7687 as neo4j
Embedding with ollama using mxbai-embed-large
Chatting with ollama using llama3


In [None]:
embeddings_api = None
chat_api = None

match EMBEDDING_API: 
  case "ollama": 
    embeddings_api = OllamaEmbeddings(base_url=OLLAMA_URL, model=EMBEDDING_MODEL, temperature=0, top_k=10, top_p=0.5)
  case "openai": 
    if OPENAI_API_KEY is None:
      raise ValueError("OpenAI_API_KEY is not set. Please add it to your .env file.")
    embeddings_api = OpenAIEmbeddings(api_key=OPENAI_API_KEY)
match CHAT_API:
  case "ollama": 
    chat_api = ChatOllama(base_url=OLLAMA_URL, model=CHAT_MODEL, temperature=0)
  case "openai": 
    if OPENAI_API_KEY is None:
      raise ValueError("OpenAI_API_KEY is not set. Please add it to your .env file.")
    chat_api = ChatOpenAI()


In [None]:
def neo4j_vector_search(question, index_name):
  """Search for similar nodes using the Neo4j vector index"""
  vector_search_query = """
    CALL db.index.vector.queryNodes($index_name, $top_k, $question_embedding) 
        YIELD node, score
    RETURN score, node AS node
  """
  similar = []	

  print ("Using vector index: " + index_name)
    
  question_embedding = embeddings_api.embed_query(question)
  return gdb.execute_query(vector_search_query,
                      question=question, 
                      question_embedding=question_embedding, 
                      index_name=index_name, 
                      top_k=10
                    ).records

In [None]:
# helper function to pretty print the chain's response
def prettifyChain(chain):
  def prettychain(question:str):
    response = chain({"question": question, "query":question}, return_only_outputs=True,)
    answer = response['answer'] if ('answer' in response) else response['result']
    print(textwrap.fill(answer, 80))
  return prettychain