# Construct Knowledge Graph and store in Neo4j graph database 

Retrieve JSON file of web-scraped skincare products

In [23]:
import json

with open('graph.json', "r", encoding = "utf-8") as json_file:
    nodes = json.load(json_file)

In [32]:
#pip intall langchain

from langchain.chat_models import ChatOpenAI

import os
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

# Initialize LLM
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")

## Ingest graph into Neo4j

Connect to Neo4j database

In [45]:
from langchain.graphs import Neo4jGraph

graph = Neo4jGraph(
    url=os.getenv("NEO4J_URI"), 
    username=os.getenv("NEO4J_USERNAME"), 
    password=os.getenv("NEO4J_PASSWORD"),
    database=os.getenv("NEO4J_DATABASE")
)

In [47]:
def sanitize(text):
    text = str(text).replace("'","").replace('"','').replace('{','').replace('}', '').lower()
    return text

jsonData_relations = [obj for obj in nodes if 'title' not in obj]
jsonData_entities = [obj for obj in nodes if 'title' in obj]

# Loop through each JSON object and add them to the db
unique_ids = set(obj['product_id'] for obj in nodes)
for i in unique_ids:
    # Filter the array based on the specified ID
    filtered_relations = [obj for obj in jsonData_relations if obj['product_id'] == i]
    product = [obj for obj in jsonData_entities if obj['product_id'] == i][0]
    for obj in filtered_relations:
        query = f'''
            MERGE (product:Product {{id: {product['product_id']}}})
            ON CREATE SET product.product = "{product['product']}", 
                        product.title = "{product['title']}", 
                        product.url = "{product['product_url']}", 
                        product.fav_num = "{product['fav_num']}", 
                        product.sold_num = "{product['sold_num']}", 
                        product.volume = "{product['volume']}",
                        product.price = "{product['price']}"

            MERGE (entity:{obj['entity_type']} {{title: "{product['title']}", value: "{sanitize(obj['entity_value'])}"}})

            MERGE (product)-[:{obj['relationship']}]->(entity)
            '''
        graph.query(query)

## Creating vector indexes

In [37]:
from langchain.vectorstores.neo4j_vector import Neo4jVector
from langchain.embeddings.openai import OpenAIEmbeddings
embeddings_model = "text-embedding-ada-002"

In [49]:
vector_indexes = []

def embed_entities(entity_type):
    vector_index = Neo4jVector.from_existing_graph(
        OpenAIEmbeddings(model=embeddings_model),
        url=os.getenv("NEO4J_URI"), 
        username=os.getenv("NEO4J_USERNAME"), 
        password=os.getenv("NEO4J_PASSWORD"),
        database=os.getenv("NEO4J_DATABASE"),
        index_name='description',
        node_label=entity_type,
        text_node_properties=['value'],
        embedding_node_property='embedding'
    )
    vector_indexes.append(vector_index)
    
unique_entity_types = set(item.get("entity_type", None) for item in jsonData_relations if "entity_type" in item)

for t in unique_entity_types:
    embed_entities(t)

#If you have an existing vector index in Neo4j with populated data, you can use the from_existing_method to connect to it.
#existing_index = Neo4jVector.from_existing_index()