In [1]:
%pip install neo4j

Collecting neo4j
  Downloading neo4j-5.28.1-py3-none-any.whl.metadata (5.9 kB)
Downloading neo4j-5.28.1-py3-none-any.whl (312 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m312.3/312.3 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: neo4j
Successfully installed neo4j-5.28.1


In [13]:
import csv
import requests
from neo4j import GraphDatabase
import pandas as pd
import openai

# Neo4j credentials

In [3]:
NEO4J_URI = ""
NEO4J_USER = ""
NEO4J_PASSWORD = ""

In [15]:
# OpenAI setup
openai.api_key = ""

In [4]:
BASE_URL = "https://storage.googleapis.com/neo4j-workshop-data/genai-hm/"
files = {
    "department": "department.csv",
    "product": "product.csv",
    "article": "article.csv",
    "customer": "customer.csv",
    "transaction": "transaction.csv"
}

In [5]:
# Load as DataFrames
datasets = {name: pd.read_csv(BASE_URL + filename) for name, filename in files.items()}

In [6]:
# Example: preview data
print(datasets["department"].head(2))
print(datasets["product"].head(2))
print(datasets["article"].head(2))
print(datasets["customer"].head(2))
print(datasets["transaction"].head(2))

   departmentNo  departmentName  sectionNo             sectionName
0          1676    Jersey Basic         16  Womens Everyday Basics
1          1339  Clean Lingerie         61         Womens Lingerie
   productCode           prodName  productTypeNo productTypeName  \
0       108775          Strap top            253        Vest top   
1       110065  OP T-shirt (Idro)            306             Bra   

     productGroupName  garmentGroupNo   garmentGroupName  \
0  Garment Upper body            1002       Jersey Basic   
1           Underwear            1017  Under-, Nightwear   

                                          detailDesc  
0            Jersey top with narrow shoulder straps.  
1  Microfibre T-shirt bra with underwired, moulde...  
   articleId  productCode  departmentNo   prodName productTypeName  \
0  108775015       108775          1676  Strap top        Vest top   
1  108775044       108775          1676  Strap top        Vest top   

   graphicalAppearanceNo graphicalApp

In [7]:
# Replace with your actual Google Sheets published CSV links
CSV_URLS = {
    "departments": "https://storage.googleapis.com/neo4j-workshop-data/genai-hm/department.csv",
    "products":    "https://storage.googleapis.com/neo4j-workshop-data/genai-hm/product.csv",
    "articles":    "https://storage.googleapis.com/neo4j-workshop-data/genai-hm/article.csv",
    "customers":   "https://storage.googleapis.com/neo4j-workshop-data/genai-hm/customer.csv",
    "transactions":"https://storage.googleapis.com/neo4j-workshop-data/genai-hm/transaction.csv",
}

In [8]:
def fetch_csv(url):
    r = requests.get(url)
    r.raise_for_status()
    decoded = r.content.decode('utf-8')
    return list(csv.DictReader(decoded.splitlines()))

In [9]:
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))

In [10]:
def load_departments(tx, row):
    tx.run("""
        MERGE (d:Department {departmentNo: $departmentNo})
        SET d.departmentName = $departmentName
    """, **row)

def load_sections(tx, row):
    tx.run("""
        MERGE (d:Department {departmentNo: $departmentNo})
        MERGE (s:Section {sectionNo: $sectionNo})
        SET s.sectionName = $sectionName
        MERGE (d)-[:HAS_SECTION]->(s)
    """, **row)

def load_products(tx, row):
    tx.run("""
        MERGE (p:Product {productCode: $productCode})
        SET p.prodName = $prodName, p.productTypeNo = $productTypeNo, p.productTypeName = $productTypeName,
            p.productGroupName = $productGroupName, p.garmentGroupNo = $garmentGroupNo,
            p.garmentGroupName = $garmentGroupName, p.detailDesc = $detailDesc
    """, **row)

def load_articles(tx, row):
    tx.run("""
        MERGE (a:Article {articleId: $articleId})
        SET a.productCode = $productCode, a.prodName = $prodName, a.productTypeName = $productTypeName,
            a.graphicalAppearanceNo = $graphicalAppearanceNo, a.graphicalAppearanceName = $graphicalAppearanceName,
            a.colourGroupCode = $colourGroupCode, a.colourGroupName = $colourGroupName
        WITH a
        MATCH (p:Product {productCode: $productCode})
        MERGE (p)-[:HAS_ARTICLE]->(a)
    """, **row)

def load_customers(tx, row):
    tx.run("""
        MERGE (c:Customer {customerId: $customerId})
        SET c.clubMemberStatus = $clubMemberStatus, c.fashionNewsFrequency = $fashionNewsFrequency,
            c.age = $age, c.postalCode = $postalCode
    """, **row)

def load_transactions(tx, row):
    tx.run("""
        MATCH (c:Customer {customerId: $customerId})
        MATCH (a:Article {articleId: $articleId})
        MERGE (c)-[r:PURCHASED {txId: $txId}]->(a)
        SET r.price = $price, r.salesChannelId = $salesChannelId, r.tDat = $tDat
    """, **row)

In [11]:
with driver.session() as session:
    # Departments and Sections
    for row in fetch_csv(CSV_URLS["departments"]):
        session.write_transaction(load_departments, row)
        session.write_transaction(load_sections, row)
    # Products
    for row in fetch_csv(CSV_URLS["products"]):
        session.write_transaction(load_products, row)
    # Articles
    for row in fetch_csv(CSV_URLS["articles"]):
        session.write_transaction(load_articles, row)
    # Customers
    for row in fetch_csv(CSV_URLS["customers"]):
        session.write_transaction(load_customers, row)
    # Transactions
    for row in fetch_csv(CSV_URLS["transactions"]):
        session.write_transaction(load_transactions, row)

  session.write_transaction(load_departments, row)
  session.write_transaction(load_sections, row)
  session.write_transaction(load_products, row)


KeyboardInterrupt: 

In [12]:
driver.close()

In [16]:
def retrieve_context(question, driver):
    # Simple example: find articles purchased by customers with a certain product name in their history
    # Extend this with more advanced retrieval as needed
    with driver.session() as session:
        if "customer" in question.lower():
            cypher = """
            MATCH (c:Customer)-[:PURCHASED]->(a:Article)
            RETURN c.customerId AS customer, collect(a.prodName) AS products LIMIT 5
            """
            results = session.run(cypher)
            context = "\n".join([f"Customer {r['customer']} bought: {', '.join(r['products'])}" for r in results])
        else:
            # Just return some products
            cypher = "MATCH (p:Product) RETURN p.prodName AS name LIMIT 5"
            results = session.run(cypher)
            context = "Products: " + ", ".join([r['name'] for r in results])
    return context

In [23]:
def rag_query_llm(question):
    # Connect to Neo4j
    driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
    # Retrieve context from graph
    context = retrieve_context(question, driver)
    # Compose prompt
    prompt = f"""Answer the following question using this context from a fashion retail graph database:
Context:
{context}

Question:
{question}

Answer:"""
    # Call LLM (OpenAI GPT example)
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        temperature=0

    )
    print(response.choices[0].message.content.strip())
    driver.close()

if __name__ == "__main__":
    user_question = input("Ask your question: ")
    rag_query_llm(user_question)

Ask your question: what are T-shirt  aviable
The T-shirt available in the fashion retail graph database is the OP T-shirt (Idro).


# Implemetation for unstrcutre data

In [None]:
import csv
import requests
from neo4j import GraphDatabase
import openai

# CONFIGURATION
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "password"

OPENAI_API_KEY = "sk-..."
openai.api_key = OPENAI_API_KEY

# 1. Load unstructured data (example: product descriptions)
CSV_URL = "https://your-link/products.csv"  # must have 'productCode', 'prodName', 'detailDesc' columns

def fetch_csv(url):
    r = requests.get(url)
    r.raise_for_status()
    decoded = r.content.decode('utf-8')
    return list(csv.DictReader(decoded.splitlines()))

driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))

def embed_text(text):
    """Get OpenAI embedding for the text."""
    resp = openai.Embedding.create(
        input=text,
        model="text-embedding-ada-002"
    )
    return resp["data"][0]["embedding"]

# 2. Store products as nodes with embeddings
def store_product_with_embedding(tx, row, embedding):
    tx.run(
        """
        MERGE (p:Product {productCode: $productCode})
        SET p.prodName = $prodName,
            p.detailDesc = $detailDesc,
            p.embedding = $embedding
        """,
        productCode=row["productCode"],
        prodName=row["prodName"],
        detailDesc=row["detailDesc"],
        embedding=embedding
    )

# 3. Ingest and embed
for row in fetch_csv(CSV_URL):
    desc = row.get("detailDesc", "")
    if desc.strip():
        embedding = embed_text(desc)
        with driver.session() as session:
            session.write_transaction(store_product_with_embedding, row, embedding)

print("Products with embeddings stored in Neo4j.")

# 4. Simple RAG Query: Find similar products by description
def get_similar_products(query, top_k=3):
    # Embed the query
    query_emb = embed_text(query)
    with driver.session() as session:
        # Use Neo4j GDS vector index for ANN search if set up, or fallback to cosine similarity manually
        cypher = """
        MATCH (p:Product)
        WHERE exists(p.embedding)
        WITH p, gds.similarity.cosine(p.embedding, $query_emb) AS score
        RETURN p.prodName AS name, p.detailDesc AS desc, score
        ORDER BY score DESC LIMIT $top_k
        """
        results = session.run(cypher, query_emb=query_emb, top_k=top_k)
        return [(r["name"], r["desc"], r["score"]) for r in results]

# 5. Use with an LLM (simple prompt composition)
def answer_with_rag(user_query):
    context_items = get_similar_products(user_query, top_k=3)
    context = "\n".join([f"{name}: {desc}" for name, desc, _ in context_items])
    prompt = f"""Using the following product descriptions, answer the question:
{context}
Question: {user_query}
Answer:"""
    response = openai.Completion.create(
        engine="gpt-3.5-turbo-instruct",
        prompt=prompt,
        max_tokens=200
    )
    print(response.choices[0].text.strip())

if __name__ == "__main__":
    q = input("Ask your product-related question: ")
    answer_with_rag(q)