# Purchase recommendations with GDS

#### Import dependencies

In [53]:
from dotenv import load_dotenv
import os

import pandas as pd

# Langchain
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain.prompts import PromptTemplate
from langchain.schema import StrOutputParser


# Warning control
import warnings
warnings.filterwarnings("ignore")

In [2]:
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_colwidth', 500)
pd.set_option('display.width', 0)

In [3]:
load_dotenv('.env', override=True)
NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
NEO4J_DATABASE = os.getenv('NEO4J_DATABASE') or 'neo4j'

OPENAI_API_KEY = os.getenv('AZURE_OPENAI_API_KEY')
OPENAI_ENDPOINT = os.getenv('AZURE_OPENAI_ENDPOINT')

In [4]:
# Instantiate Neo4j

kg = Neo4jGraph(
    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE
)

In [5]:
from graphdatascience import GraphDataScience
from neo4j import GraphDatabase

driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
gds = GraphDataScience(driver)

print(gds.version())

2.12.0


In [6]:
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings

def get_llm():
    load_dotenv('.env', override=True)
    return AzureChatOpenAI(azure_deployment="gpt-4o-mini", api_version="2024-08-01-preview")

def get_embedding():
    load_dotenv('.env', override=True)
    return AzureOpenAIEmbeddings(azure_deployment="text-embedding-ada-002", api_version="2023-05-15")

embedding_model = get_embedding()
llm_model = get_llm()

#### Load Data

In [7]:
# get source data - it has been pre-formatted. If you would like to re-generate from source on Kaggle, see the data-prep.ipynb notebook
department_df = pd.read_csv('https://storage.googleapis.com/neo4j-workshop-data/genai-hm/department.csv')
product_df = pd.read_csv('https://storage.googleapis.com/neo4j-workshop-data/genai-hm/product.csv')
article_df = pd.read_csv('https://storage.googleapis.com/neo4j-workshop-data/genai-hm/article.csv')
customer_df = pd.read_csv('https://storage.googleapis.com/neo4j-workshop-data/genai-hm/customer.csv')
transaction_df = pd.read_csv('https://storage.googleapis.com/neo4j-workshop-data/genai-hm/transaction.csv')

#### Create uniqe constraints

In [8]:
# create constraints - one uniqueness constraint for each node label
gds.run_cypher('CREATE CONSTRAINT unique_department_no IF NOT EXISTS FOR (n:Department) REQUIRE n.departmentNo IS UNIQUE')
gds.run_cypher('CREATE CONSTRAINT unique_product_code IF NOT EXISTS FOR (n:Product) REQUIRE n.productCode IS UNIQUE')
gds.run_cypher('CREATE CONSTRAINT unique_article_id IF NOT EXISTS FOR (n:Article) REQUIRE n.articleId IS UNIQUE')
gds.run_cypher('CREATE CONSTRAINT unique_customer_id IF NOT EXISTS FOR (n:Customer) REQUIRE n.customerId IS UNIQUE')

#### Create nodes and relationships

In [8]:
from typing import Tuple, Union
from numpy.typing import ArrayLike


def make_map(x):
    if type(x) == str:
        return x, x
    elif type(x) == tuple:
        return x
    else:
        raise Exception("Entry must of type string or tuple")


def make_set_clause(prop_names: ArrayLike, element_name='n', item_name='rec'):
    clause_list = []
    for prop_name in prop_names:
        clause_list.append(f'{element_name}.{prop_name} = {item_name}.{prop_name}')
    return 'SET ' + ', '.join(clause_list)


def make_node_merge_query(node_key_name: str, node_label: str, cols: ArrayLike):
    template = f'''UNWIND $recs AS rec\nMERGE(n:{node_label} {{{node_key_name}: rec.{node_key_name}}})'''
    prop_names = [x for x in cols if x != node_key_name]
    if len(prop_names) > 0:
        template = template + '\n' + make_set_clause(prop_names)
    return template + '\nRETURN count(n) AS nodeLoadedCount'


def make_rel_merge_query(source_target_labels: Union[Tuple[str, str], str],
                         source_node_key: Union[Tuple[str, str], str],
                         target_node_key: Union[Tuple[str, str], str],
                         rel_type: str,
                         cols: ArrayLike,
                         rel_key: str = None):
    source_target_label_map = make_map(source_target_labels)
    source_node_key_map = make_map(source_node_key)
    target_node_key_map = make_map(target_node_key)

    merge_statement = f'MERGE(s)-[r:{rel_type}]->(t)'
    if rel_key is not None:
        merge_statement = f'MERGE(s)-[r:{rel_type} {{{rel_key}: rec.{rel_key}}}]->(t)'

    template = f'''\tUNWIND $recs AS rec
    MATCH(s:{source_target_label_map[0]} {{{source_node_key_map[0]}: rec.{source_node_key_map[1]}}})
    MATCH(t:{source_target_label_map[1]} {{{target_node_key_map[0]}: rec.{target_node_key_map[1]}}})\n\t''' + merge_statement
    prop_names = [x for x in cols if x not in [rel_key, source_node_key_map[1], target_node_key_map[1]]]
    if len(prop_names) > 0:
        template = template + '\n\t' + make_set_clause(prop_names, 'r')
    return template + '\n\tRETURN count(r) AS relLoadedCount'


def chunks(xs, n=10_000):
    n = max(1, n)
    return [xs[i:i + n] for i in range(0, len(xs), n)]


def load_nodes(node_df: pd.DataFrame, node_key_col: str, node_label: str, chunk_size=10_000):
    records = node_df.to_dict('records')
    print(f'======  loading {node_label} nodes  ======')
    total = len(records)
    print(f'staging {total:,} records')
    query = make_node_merge_query(node_key_col, node_label, node_df.columns.copy())
    print(f'\nUsing This Cypher Query:\n```\n{query}\n```\n')
    cumulative_count = 0
    for recs in chunks(records, chunk_size):
        res = gds.run_cypher(query, params={'recs': recs})
        cumulative_count += res.iloc[0, 0]
        print(f'Loaded {cumulative_count:,} of {total:,} nodes')


def load_rels(rel_df: pd.DataFrame,
              source_target_labels: Union[Tuple[str, str], str],
              source_node_key: Union[Tuple[str, str], str],
              target_node_key: Union[Tuple[str, str], str],
              rel_type: str,
              rel_key: str = None,
              chunk_size=10_000):
    records = rel_df.to_dict('records')
    print(f'======  loading {rel_type} relationships  ======')
    total = len(records)
    print(f'staging {total:,} records')
    query = make_rel_merge_query(source_target_labels, source_node_key,
                                 target_node_key, rel_type, rel_df.columns.copy(), rel_key)
    print(f'\nUsing This Cypher Query:\n```\n{query}\n```\n')
    cumulative_count = 0
    for recs in chunks(records, chunk_size):
        res = gds.run_cypher(query, params={'recs': recs})
        cumulative_count += res.iloc[0, 0]
        print(f'Loaded {cumulative_count:,} of {total:,} relationships')

In [9]:
# load nodes
load_nodes(department_df, 'departmentNo', 'Department')
load_nodes(article_df.drop(columns=['productCode', 'departmentNo']), 'articleId', 'Article')
load_nodes(product_df, 'productCode', 'Product')
load_nodes(customer_df, 'customerId', 'Customer')

staging 266 records

Using This Cypher Query:
```
UNWIND $recs AS rec
MERGE(n:Department {departmentNo: rec.departmentNo})
SET n.departmentName = rec.departmentName, n.sectionNo = rec.sectionNo, n.sectionName = rec.sectionName
RETURN count(n) AS nodeLoadedCount
```

Loaded 266 of 266 nodes
staging 13,351 records

Using This Cypher Query:
```
UNWIND $recs AS rec
MERGE(n:Article {articleId: rec.articleId})
SET n.prodName = rec.prodName, n.productTypeName = rec.productTypeName, n.graphicalAppearanceNo = rec.graphicalAppearanceNo, n.graphicalAppearanceName = rec.graphicalAppearanceName, n.colourGroupCode = rec.colourGroupCode, n.colourGroupName = rec.colourGroupName
RETURN count(n) AS nodeLoadedCount
```

Loaded 10,000 of 13,351 nodes
Loaded 13,351 of 13,351 nodes
staging 8,044 records

Using This Cypher Query:
```
UNWIND $recs AS rec
MERGE(n:Product {productCode: rec.productCode})
SET n.prodName = rec.prodName, n.productTypeNo = rec.productTypeNo, n.productTypeName = rec.productTypeName, 

In [19]:
# Load relationships
load_rels(article_df[['articleId', 'departmentNo']], source_target_labels=('Article', 'Department'),
                      source_node_key='articleId', target_node_key='departmentNo',
                      rel_type='FROM_DEPARTMENT')
load_rels(article_df[['articleId', 'productCode']], source_target_labels=('Article', 'Product'),
                      source_node_key='articleId',target_node_key='productCode',
                      rel_type='VARIANT_OF')
load_rels(transaction_df, source_target_labels=('Customer', 'Article'),
                      source_node_key='customerId', target_node_key='articleId', rel_key='txId',
                      rel_type='PURCHASED')

staging 13,351 records

Using This Cypher Query:
```
	UNWIND $recs AS rec
    MATCH(s:Article {articleId: rec.articleId})
    MATCH(t:Department {departmentNo: rec.departmentNo})
	MERGE(s)-[r:FROM_DEPARTMENT]->(t)
	RETURN count(r) AS relLoadedCount
```

Loaded 10,000 of 13,351 relationships
Loaded 13,351 of 13,351 relationships
staging 13,351 records

Using This Cypher Query:
```
	UNWIND $recs AS rec
    MATCH(s:Article {articleId: rec.articleId})
    MATCH(t:Product {productCode: rec.productCode})
	MERGE(s)-[r:VARIANT_OF]->(t)
	RETURN count(r) AS relLoadedCount
```

Loaded 10,000 of 13,351 relationships
Loaded 13,351 of 13,351 relationships
staging 23,199 records

Using This Cypher Query:
```
	UNWIND $recs AS rec
    MATCH(s:Customer {customerId: rec.customerId})
    MATCH(t:Article {articleId: rec.articleId})
	MERGE(s)-[r:PURCHASED {txId: rec.txId}]->(t)
	SET r.tDat = rec.tDat, r.price = rec.price, r.salesChannelId = rec.salesChannelId
	RETURN count(r) AS relLoadedCount
```

Loaded 1

In [10]:
# convert transaction dates
gds.run_cypher('''
MATCH (:Customer)-[r:PURCHASED]->()
SET r.tDat = date(r.tDat)
''')

# convert NaN product descriptions
gds.run_cypher('''
MATCH (n:Product) WHERE valueType(n.detailDesc) <> "STRING NOT NULL"
SET n.detailDesc = ""
RETURN n
''')

# create combined text property. This will help simplify later with semantic search and RAG
gds.run_cypher("""
    MATCH(p:Product)
    SET p.text = 'Product-- ' +
        'Name: ' + p.prodName + ' || ' +
        'Type: ' + p.productTypeName + ' || ' +
        'Group: ' + p.productGroupName + ' || ' +
        'Garment Type: ' + p.garmentGroupName + ' || ' +
        'Description: ' + p.detailDesc
    RETURN count(p) AS propertySetCount
    """)

# write dummy urls to illustrate sourcing in future retrieval
gds.run_cypher("""
MATCH(p:Product)
SET p.url = 'https://representative-domain/product/' + p.productCode
""")

#### Creating Text Embeddings & Vector Index

In [11]:
from time import sleep

# Total chunks to embed
total_nodes_to_embed = gds.run_cypher("""
    MATCH (p:Product) WHERE size(p.detailDesc) <> 0
    RETURN count(p) AS TotalProductsToEmbed
""")

# Function to get the number of nodes pending to embed
def pending_nodes_to_emded():
    number_embedded_nodes = gds.run_cypher(
      """
      MATCH (p:Product) WHERE p.textEmbedding IS NULL AND size(p.detailDesc) <> 0
      RETURN count(p) AS NotEmbedProducts                       
      """
    )
    return number_embedded_nodes.iloc[0, 0]

# Initialize pending_nodes variable
pending_nodes = pending_nodes_to_emded()
print(f"Total nodes to embed: {total_nodes_to_embed}")

while pending_nodes != 0:
  try:
    print("Embedding nodes...")
    gds.run_cypher("""
        MATCH (p:Product WHERE p.textEmbedding IS NULL AND size(p.detailDesc) <> 0)
        WITH p
        LIMIT $embedding_batch_size
        WITH p, genai.vector.encode(
          p.text, 
          "AzureOpenAI", 
          {
            token: $openAiApiKey,
            resource: "knowledge-graphs",
            deployment: "text-embedding-ada-002"
          }) AS vector
        CALL db.create.setNodeVectorProperty(p, "textEmbedding", vector)
        """, 
        params={"openAiApiKey": OPENAI_API_KEY, "embedding_batch_size": 120} 
        )
    
    pending_nodes = pending_nodes_to_emded()
    print(f"Number of pending nodes to embed: {pending_nodes}")
    if pending_nodes != 0:
        sleep(10)
  except Exception as e:
    print(e)
    sleep(30)
    continue

print("All nodes have been embeded!")

Total nodes to embed:    TotalProductsToEmbed
0                  8018
Embedding nodes...
Number of pending nodes to embed: 7898
Embedding nodes...
Number of pending nodes to embed: 7778
Embedding nodes...
Number of pending nodes to embed: 7658
Embedding nodes...
Number of pending nodes to embed: 7538
Embedding nodes...
Number of pending nodes to embed: 7418
Embedding nodes...
Number of pending nodes to embed: 7298
Embedding nodes...
Number of pending nodes to embed: 7178
Embedding nodes...
Number of pending nodes to embed: 7058
Embedding nodes...
Number of pending nodes to embed: 6938
Embedding nodes...
Number of pending nodes to embed: 6818
Embedding nodes...
Number of pending nodes to embed: 6698
Embedding nodes...
Number of pending nodes to embed: 6578
Embedding nodes...
Number of pending nodes to embed: 6458
Embedding nodes...
Number of pending nodes to embed: 6338
Embedding nodes...
Number of pending nodes to embed: 6218
Embedding nodes...
Number of pending nodes to embed: 6098
Em

In [12]:
gds.run_cypher("""
CREATE VECTOR INDEX `product_text_embedding` IF NOT EXISTS FOR (p:Product) ON (p.textEmbedding) 
OPTIONS { indexConfig: {
    `vector.dimensions`: 1536,
    `vector.similarity_function`: 'cosine' 
}}
""") 

#### Use similarity search to find relevant chunks

In [13]:
def neo4j_vector_search(question):
  """Search for similar nodes using the Neo4j vector index"""
  vector_search_query = """
    WITH genai.vector.encode(
      $question, 
      "AzureOpenAI", 
      {
        token: $openAiApiKey,
        resource: "knowledge-graphs",
        deployment: "text-embedding-ada-002"
      }) AS question_embedding
    CALL db.index.vector.queryNodes($index_name, $top_k, question_embedding) 
    YIELD node AS product, score
    RETURN product.productCode AS productCode,
        product.text as text,
        score
  """
  similar = gds.run_cypher(vector_search_query, 
                     params={
                      'question': question, 
                      'openAiApiKey':OPENAI_API_KEY,
                      'index_name': "product_text_embedding", 
                      'top_k': 10})
  return similar

In [14]:
pd.DataFrame.from_records(neo4j_vector_search('denim jeans'))

Unnamed: 0,productCode,text,score
0,252298,"Product-- Name: Didi denim || Type: Trousers || Group: Garment Lower body || Garment Type: Dresses Ladies || Description: Jeans in washed, stretch denim with hard-worn details, a regular waist, front and back pockets and skinny legs.",0.938049
1,598423,Product-- Name: Night Denim || Type: Trousers || Group: Garment Lower body || Garment Type: Dresses Ladies || Description: High-waisted jeans in washed stretch denim with a zip fly and button and decorative zips on the waistband. Front and back pockets and skinny legs with a zip down the sides.,0.937393
2,727804,"Product-- Name: Didi HW Skinny denim || Type: Trousers || Group: Garment Lower body || Garment Type: Trousers || Description: Jeans in washed, stretch denim with hard-worn details, a high waist, visible button fly, front and back pockets and skinny legs.",0.934525
3,810170,"Product-- Name: Skinny denim (D) || Type: Trousers || Group: Garment Lower body || Garment Type: Trousers || Description: Jeans in washed, stretch denim with fake front pockets, real back pockets and skinny legs. Wide ribbing at the waist for best fit over the tummy.",0.933243
4,652924,"Product-- Name: &DENIM Jeggings HW || Type: Trousers || Group: Garment Lower body || Garment Type: Trousers Denim || Description: High-waisted jeggings in stretch denim with a zip fly and button, fake front pockets and real back pockets.",0.932678
5,749656,"Product-- Name: &DENIM jen bermuda shorts || Type: Shorts || Group: Garment Lower body || Garment Type: Trousers Denim || Description: 5-pocket shorts in washed, stretch denim with a regular waist, zip fly and button, and sewn-in turn-ups at the hems. The cotton content of the shorts is partly recycled.",0.932419
6,620223,"Product-- Name: Beat denim || Type: Trousers || Group: Garment Lower body || Garment Type: Trousers || Description: High-waisted, ankle-length denim jeans with a zip fly and button, side pockets, welt back pockets and wide, straight legs with creases.",0.932159
7,698387,"Product-- Name: &DENIM+ Skinny Shaping HW || Type: Trousers || Group: Garment Lower body || Garment Type: Trousers Denim || Description: 5-pocket jeans in washed, stretch denim with a high waist, zip fly and button and skinny legs. Shaping – denim with a stretch function that holds in and shapes the waist, thighs and bum while keeping the jeans in shape.",0.9319
8,522754,"Product-- Name: Snake fancy denim || Type: Trousers || Group: Garment Lower body || Garment Type: Trousers || Description: 5-pocket superskinny-fit jeans in stretch denim with a regular waist, zip fly and skinny legs.",0.931213
9,571650,"Product-- Name: Taylor Fancy Denim || Type: Trousers || Group: Garment Lower body || Garment Type: Dresses Ladies || Description: Low-rise, ankle-length jeans in washed stretch denim with front and back pockets, a zip fly and button, and skinny legs with raw-edge hems.",0.931091


#### Define Customer Id for recommendations

In [17]:
customerId = 'daae10780ecd14990ea190a1e9917da33fe96cd8cfa5e80b67b4600171aa77e0'

#### Understanding shared customer behaviour

Query para ver las compras de dicho usuario ordenadas de mas recientes a más pasadas

In [20]:
gds.run_cypher("""
MATCH(c:Customer {customerId: $customerId})-[t:PURCHASED]->(:Article)-[:VARIANT_OF]->(p:Product)
RETURN p.productCode AS productCode,
    p.prodName AS prodName,
    p.productTypeName AS productTypeName,
    p.garmentGroupName AS garmentGroupName,
    p.detailDesc AS detailDesc,
    t.tDat AS purchaseDate
ORDER BY t.tDat DESC
""", params={"customerId": customerId})

Unnamed: 0,productCode,prodName,productTypeName,garmentGroupName,detailDesc,purchaseDate
0,733027,Tove,Top,Jersey Fancy,"Short top in soft cotton jersey with a round neckline, short sleeves and a seam at the hem with a decorative knot detail at the front.",2019-08-05
1,753724,Rosemary,Dress,Dresses Ladies,"Short dress in woven fabric with 3/4-length sleeves with an opening and ties at the cuffs, and a gently rounded hem. Unlined.",2019-08-05
2,713577,Malte r-neck,Sweater,Knitwear,"Jumper in soft, patterned, fine-knit cotton with ribbing around the neckline, cuffs and hem.",2019-06-27
3,731142,Lead Superskinny,Trousers,Trousers,"Chinos in stretch twill with a zip fly and button, side pockets, welt back pockets and skinny legs.",2019-06-27
4,606711,Rylee flatform,Heeled sandals,Shoes,"Sandals with imitation suede straps, an elastic heel strap and wedge heels. Satin insoles and thermoplastic rubber (TPR) soles. Platform front 2 cm, heel 6 cm.",2019-06-22
...,...,...,...,...,...,...
21,569974,DONT USE ROLAND HOOD,Hoodie,Jersey Basic,"Top in sweatshirt fabric with a lined drawstring hood, kangaroo pocket, long raglan sleeves and ribbing at the cuffs and hem.",2018-10-12
22,620425,Karin headband,Hairband,Accessories,Wide hairband in cotton jersey with a twisted detail.,2018-10-12
23,662328,Survivor,Blouse,Blouses,"Straight-cut blouse in a crêpe weave with a collar, concealed buttons down the front and fake flap front pockets. Yoke with a pleat at the back, long sleeves with pleats and buttoned cuffs, and a straight cut hem with slits in the sides.",2018-10-12
24,682848,Skinny RW Ankle Milo Zip,Trousers,Trousers Denim,"5-pocket, ankle-length jeans in washed stretch denim with hard-worn details, a regular waist, zip fly and button, and skinny legs with a zip at the hems. The jeans are made partly from recycled cotton.",2018-10-12


Query:

1. Obtener la fecha de su úñtima compra.
2. Dicha fecha nos data un articulo que el customer compro entonces.
3. A ese articulo le connectamos todos los customers del graph para saber y contabilizar cual fue el producto que más compraron aquellos customers que también compraron dicho artículo.

In [21]:
gds.run_cypher("""
MATCH(c:Customer {customerId: $customerId})-[t:PURCHASED]->()
WITH max(t.tDat) AS latestPurchases
MATCH(c:Customer {customerId: $customerId})-[:PURCHASED {tDat: latestPurchases}]->(:Article)<-[:PURCHASED]-(:Customer)-[:PURCHASED]->(:Article)-[:VARIANT_OF]->(p:Product)
RETURN p.productCode AS productCode,
    p.prodName AS prodName,
    p.productTypeName AS productTypeName,
    p.garmentGroupName AS garmentGroupName,
    count(*) AS commonPurchaseScore,
    p.detailDesc AS detailDesc
ORDER BY commonPurchaseScore DESC
""", params={"customerId": customerId})

Unnamed: 0,productCode,prodName,productTypeName,garmentGroupName,commonPurchaseScore,detailDesc
0,685813,PETAR SWEATSHIRT,Sweater,Jersey Basic,8,"Top in soft sweatshirt fabric. Slightly looser fit with dropped shoulders and ribbing around the neckline, cuffs and hem. Soft brushed inside."
1,685816,RONNY REG RN T-SHIRT,T-shirt,Jersey Basic,8,Round-necked T-shirt in soft cotton jersey.
2,599580,Timeless Midrise Brief,Swimwear bottom,Swimwear,6,Fully lined bikini bottoms with a mid waist and medium coverage at the back.
3,730683,HAVANA HW tights,Leggings/Tights,Jersey Fancy,5,Sports tights in fast-drying functional fabric with a high waist and wide waistband to hold in and shape the waist. Sculpting seams at the back that showcase the body’s physique. Concealed key pocket in the waistband.
4,768440,Tove top.,T-shirt,Jersey Fancy,4,"Short top in soft cotton jersey with a round neckline, short sleeves and a seam at the hem with a decorative knot detail at the front."
...,...,...,...,...,...,...
240,810419,Timeless Tanga,Swimwear bottom,Swimwear,1,"Fully lined bikini bottoms with narrow sides, cutaway coverage at the back and a low waist with covered elastication."
241,821398,Liza 3p thong micro,Underwear bottom,"Under-, Nightwear",1,Thong briefs in microfibre with a low waist and lined gusset.
242,823317,Rio OPT 2,Dress,Dresses Ladies,1,"Short dress in an airy, patterned weave with a small frilled collar and V-neck opening with ties at the front. Long, raglan balloon sleeves with a covered button at the cuffs, a gathered seam at the waist and a flared skirt. Jersey lining."
243,868309,Inga Brazilian Micro 7p,Underwear bottom,"Under-, Nightwear",1,"Brazilian briefs in microfibre with a low waist, lined gusset and high cut at the back."


Como podemos ver, hubieron 8 compras de producto 685813 las cuales fueron compradas por otros customers que tambien compraron el articulo en cuestion.

#### Personalizing Results Based on Customer Behavior in the Graph

En la siguiente query queremos encontrar por similarity search productos similares a la busqueda "denim jeans" del customer en cuestion los cuales hayan sido comprados por otros customers que a la vez también hayan comprado los mismos articulos que el customer en cuestion.

In [22]:
kg_personalized_search = Neo4jVector.from_existing_index(
    embedding=embedding_model,
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    index_name='product_text_embedding',
    retrieval_query="""
    WITH node AS product, score AS searchScore
    OPTIONAL MATCH(product)<-[:VARIANT_OF]-(:Article)<-[:PURCHASED]-(:Customer)-[:PURCHASED]->(a:Article)<-[:PURCHASED]-(:Customer {customerId: $customerId})
    WITH count(a) AS purchaseScore, product.text AS text, searchScore, product.productCode AS productCode
    RETURN text,
        (1+purchaseScore)*searchScore AS score,
        {productCode: productCode, purchaseScore:purchaseScore, searchScore:searchScore} AS metadata
    ORDER BY purchaseScore DESC, searchScore DESC LIMIT 15
    """)

In [28]:
res = kg_personalized_search.similarity_search("denim jeans", k=100, params={'customerId': customerId})

# Visualize as a dataframe
pd.DataFrame([{'productCode': d.metadata['productCode'],
               'document': d.page_content,
               'searchScore': d.metadata['searchScore'],
               'purchaseScore': d.metadata['purchaseScore']} for d in res])

Unnamed: 0,productCode,document,searchScore,purchaseScore
0,706016,"Product-- Name: Jade HW Skinny Denim TRS || Type: Trousers || Group: Garment Lower body || Garment Type: Trousers || Description: High-waisted jeans in washed superstretch denim with a zip fly and button, fake front pockets, real back pockets and super-skinny legs.",0.926941,11
1,777038,"Product-- Name: Bono NW slim denim || Type: Trousers || Group: Garment Lower body || Garment Type: Trousers || Description: 5-pocket, ankle-length jeans in washed slightly stretch denim with a high waist, zip fly and button and tapered legs.",0.925903,8
2,448509,"Product-- Name: Perrie Slim Mom Denim TRS || Type: Trousers || Group: Garment Lower body || Garment Type: Trousers || Description: 5-pocket, ankle-length jeans in washed, sturdy cotton denim with a high waist, button fly and slim, straight legs with raw-edge hems.",0.923431,5
3,539723,"Product-- Name: Jade Denim TRS || Type: Trousers || Group: Garment Lower body || Garment Type: Trousers || Description: High-waisted jeans in washed superstretch denim with a zip fly and button, fake front pockets, real back pockets and super-skinny legs.",0.929199,3
4,714790,"Product-- Name: Mom Fit || Type: Trousers || Group: Garment Lower body || Garment Type: Trousers Denim || Description: 5-pocket, ankle-length jeans in washed, stretch cotton denim with an extra-high waist. Slightly looser fit with straight legs. The cotton content of the jeans is partly recycled.",0.925354,2
...,...,...,...,...
10,706590,"Product-- Name: Tapered Premiumprice || Type: Trousers || Group: Garment Lower body || Garment Type: Trousers Denim || Description: 5-pocket jeans in washed denim with a regular waist, button fly and tapered legs.",0.923325,1
11,557241,"Product-- Name: Vintage Slim H.W || Type: Trousers || Group: Garment Lower body || Garment Type: Trousers Denim || Description: 5-pocket ankle-length jeans in washed denim with a high waist, button fly and slim legs with sparkly stones at the front.",0.922958,1
12,252298,"Product-- Name: Didi denim || Type: Trousers || Group: Garment Lower body || Garment Type: Dresses Ladies || Description: Jeans in washed, stretch denim with hard-worn details, a regular waist, front and back pockets and skinny legs.",0.938049,0
13,598423,Product-- Name: Night Denim || Type: Trousers || Group: Garment Lower body || Garment Type: Dresses Ladies || Description: High-waisted jeans in washed stretch denim with a zip fly and button and decorative zips on the waistband. Front and back pockets and skinny legs with a zip down the sides.,0.937393,0


#### Augmenting Semantic Search with Knowledge Graph Inference & ML

In [29]:
%%time
#clear past GDS analysis in the case of re-running
def clear_all_graphs():
    g_names = gds.graph.list().graphName.tolist()
    for g_name in g_names:
        g = gds.graph.get(g_name)
        g.drop()

clear_all_graphs()

gds.run_cypher('''
    MATCH(:Article)-[r:CUSTOMERS_ALSO_LIKE]->()
    CALL {
        WITH r
        DELETE r
    } IN TRANSACTIONS OF 1000 ROWS
    ''')

# graph projection - project co-purchase graph into analytics workspace
gds.run_cypher('''
  MATCH (a1:Article)<-[:PURCHASED]-(:Customer)-[:PURCHASED]->(a2:Article)
  WITH gds.graph.project("proj", a1, a2,
       {relationshipType: "COPURCHASE"}) AS g
  RETURN g.graphName
   ''')
# This results in an in-memory graph with the structure (Article)-[:COPURCHASE]->(Article)

g = gds.graph.get("proj")

# create FastRP node embeddings
gds.fastRP.mutate(g, mutateProperty='nodeEmbedding', embeddingDimension=128, randomSeed=7474, concurrency=4, iterationWeights=[0.0, 1.0, 1.0])

# write embeddings back to database to introspect later
gds.graph.writeNodeProperties(g, ['nodeEmbedding'])

CPU times: total: 0 ns
Wall time: 3.82 s


writeMillis                                                                                                                                                                  242
graphName                                                                                                                                                                   proj
nodeProperties                                                                                                                                                   [nodeEmbedding]
propertiesWritten                                                                                                                                                          13296
configuration        {'jobId': 'ab7ee884-43ac-45a7-a1cc-d2c593daf07e', 'logProgress': True, 'concurrency': 4, 'sudo': False, 'writeToResultStore': False, 'writeConcurrency': 4}
Name: 0, dtype: object

#### K-Nearest Neighbors (KNN) Relationships

In [30]:
# draw KNN
knn_stats = gds.knn.write(g, nodeProperties=['nodeEmbedding'],
                  writeRelationshipType='CUSTOMERS_ALSO_LIKE', writeProperty='score',
                  sampleRate=1.0, initialSampler='randomWalk', concurrency=1, similarityCutoff=0.75, randomSeed=7474)

# clear graph projection once done
g.drop()

# output knn stats
knn_stats

K-Nearest Neighbours:   0%|          | 0/100 [00:00<?, ?%/s]

ranIterations                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               7
didConverge                                                                                                                                                                                                                                                                                                                                                                                                                                                                               

#### Personalized Recommendations

In [46]:

res = kg.query('''
    MATCH(:Customer {customerId:$customerId})-[:PURCHASED]->(:Article)-[r:CUSTOMERS_ALSO_LIKE]->(:Article)-[:VARIANT_OF]->(product:Product)
    RETURN product.productCode AS productCode,
        product.prodName AS prodName,
        product.productTypeName AS productType,
        product.text AS document,
        sum(r.score) AS recommenderScore
    ORDER BY recommenderScore DESC LIMIT $k
    ''', params={'customerId': customerId, 'k':15})

#visualize as dataframe. result is list of dict
pd.DataFrame(res)

Unnamed: 0,productCode,prodName,productType,document,recommenderScore
0,569974,DONT USE ROLAND HOOD,Hoodie,"Product-- Name: DONT USE ROLAND HOOD || Type: Hoodie || Group: Garment Upper body || Garment Type: Jersey Basic || Description: Top in sweatshirt fabric with a lined drawstring hood, kangaroo pocket, long raglan sleeves and ribbing at the cuffs and hem.",28.685492
1,656401,PASTRY SWEATER,Sweater,"Product-- Name: PASTRY SWEATER || Type: Sweater || Group: Garment Upper body || Garment Type: Knitwear || Description: Jumper in soft, textured-knit cotton with long raglan sleeves and ribbing around the neckline, cuffs and hem.",17.892415
2,642498,Bubble Bum Bandeau (1),Bikini top,"Product-- Name: Bubble Bum Bandeau (1) || Type: Bikini top || Group: Swimwear || Garment Type: Swimwear || Description: Fully lined bandeau bikini top with padded cups and removable inserts. Detachable ties at the back of the neck, ties at the back, side support and a silicone trim at the top.",16.789007
3,660519,Haven back detail,Bra,"Product-- Name: Haven back detail || Type: Bra || Group: Underwear || Garment Type: Under-, Nightwear || Description: Push-up bra in lace and mesh with underwired, moulded, padded cups for a larger bust and fuller cleavage. Lace racer back, narrow adjustable shoulder straps, a wide mesh strap at the back and a metal fastener at the front.",15.896369
4,662328,Survivor,Blouse,"Product-- Name: Survivor || Type: Blouse || Group: Garment Upper body || Garment Type: Blouses || Description: Straight-cut blouse in a crêpe weave with a collar, concealed buttons down the front and fake flap front pockets. Yoke with a pleat at the back, long sleeves with pleats and buttoned cuffs, and a straight cut hem with slits in the sides.",14.789096
...,...,...,...,...,...
10,682848,Skinny RW Ankle Milo Zip,Trousers,"Product-- Name: Skinny RW Ankle Milo Zip || Type: Trousers || Group: Garment Lower body || Garment Type: Trousers Denim || Description: 5-pocket, ankle-length jeans in washed stretch denim with hard-worn details, a regular waist, zip fly and button, and skinny legs with a zip at the hems. The jeans are made partly from recycled cotton.",9.999887
11,598806,Dixie tee,T-shirt,Product-- Name: Dixie tee || Type: T-shirt || Group: Garment Upper body || Garment Type: Jersey Fancy || Description: Short top in soft cotton jersey with short sleeves. Contrasting colour trims around the neckline and sleeves.,9.892483
12,752193,Banks,Hoodie,"Product-- Name: Banks || Type: Hoodie || Group: Garment Upper body || Garment Type: Jersey Basic || Description: Long-sleeved top in sweatshirt fabric made from a cotton blend with a double-layered hood, gently dropped shoulders and ribbing at the cuffs and hem. Soft brushed inside.",8.896614
13,606711,Rylee flatform,Heeled sandals,"Product-- Name: Rylee flatform || Type: Heeled sandals || Group: Shoes || Garment Type: Shoes || Description: Sandals with imitation suede straps, an elastic heel strap and wedge heels. Satin insoles and thermoplastic rubber (TPR) soles. Platform front 2 cm, heel 6 cm.",7.999907


### LLM for generating Grounded Content

#### Create Knowledge Graph Stores for Retrieval

In [48]:
# We will use a mock URL for our sources in the metadata
kg_personalized_search_store = Neo4jVector.from_existing_index(
        embedding=embedding_model,
        url=NEO4J_URI,
        username=NEO4J_USERNAME,
        password=NEO4J_PASSWORD,
        index_name='product_text_embedding',
        retrieval_query="""
        WITH node AS product, score AS searchScore
        OPTIONAL MATCH(product)<-[:VARIANT_OF]-(:Article)<-[:PURCHASED]-(:Customer)-[:PURCHASED]->(a:Article)<-[:PURCHASED]-(:Customer {customerId: $customerId})
        WITH count(a) AS purchaseScore, product, searchScore
        RETURN product.text + '\nurl: ' + product.url  AS text,
            (1.0+purchaseScore)*searchScore AS score,
            {source: product.url} AS metadata
        ORDER BY purchaseScore DESC, searchScore DESC LIMIT 10
    """
    )

# This will be a function so we can change per customer id
def kg_personalized_search(search_prompt, customer_id, k=100):
    docs = kg_personalized_search_store.similarity_search(search_prompt, k, params={'customerId': customer_id})
    return "\n\n".join([d.page_content for d in docs])

# Use the same personalized recommendations as above but with a smaller limit
def kg_recommendations_app(customer_id, k=30):
    res = kg.query("""
    MATCH(:Customer {customerId:$customerId})-[:PURCHASED]->(:Article)-[r:CUSTOMERS_ALSO_LIKE]->(:Article)-[:VARIANT_OF]->(product)
    RETURN product.text + '\nurl: ' + product.url   AS text,
        sum(r.score) AS recommenderScore
    ORDER BY recommenderScore DESC LIMIT $k
    """, params={'customerId': customer_id, 'k':k})

    return "\n\n".join([d['text'] for d in res])

#### Prompt Engineering

In [None]:
prompt = PromptTemplate.from_template('You are a personal assistant named Sally '
'for a fashion, home, and beauty company called HRM.'
'write an engaging email to {customerName}, one of your customers, '
'to promote and summarize products relevant for them given: '
'- The current season / time of year: {timeOfYear}'
'- Recent searches/interests: {customerInterests}'
'Please only mention the products listed below. '
'Do not come up with or add any new products to the list.'
'Each product comes with an https `url` field. '
'Make sure to provide that https url with descriptive name text '
'in markdown for each product.'
'''

# RelevantProducts:
These are products from the HRM store the customer may be interested in based
on their recent searches/interests: {customerInterests}
{searchProds}

# Customer May Also Be Interested In the following
The below candidates are recommended based on the shared purchase patterns of
other customers in the HRM database.
Select the best 4 to 5 product subset from the context that best match the
time of year: {timeOfYear} and to pair with the RelevantProducts above.
For example, even if scarfs are listed here, they may not be appropriate for a
summer time of year so best not to include those.
{recProds} 
''')

#### Create Chain

In [57]:
chain = ({'searchProds': (lambda x: kg_personalized_search(x['customerInterests'], x['customerId'])),
          'recProds': (lambda x :kg_recommendations_app(x['customerId'])),
          'customerName': lambda x: x['customerName'],
          'timeOfYear': lambda x:x['timeOfYear'],
          "customerInterests":  lambda x:x['customerInterests']}
         | prompt
         | llm_model
         | StrOutputParser())

In [58]:

print(chain.invoke({'customerInterests': "denim jeans",
                    'customerId': customerId,
                    'customerName':'Alex Smith',
                    'timeOfYear':'Jun, 2024'}))

Subject: Elevate Your Summer Wardrobe with Our Denim Picks! 🌞

Dear Alex,

I hope this email finds you well! As we embrace the warmth of June, it’s the perfect time to refresh your wardrobe with some stylish and versatile denim options that are perfect for the season. Given your recent interest in denim jeans, I’ve curated a selection that I think you’ll love.

### Denim Essentials for Summer:

1. **[Jade HW Skinny Denim TRS](https://representative-domain/product/706016)**  
   High-waisted and crafted from washed superstretch denim, these jeans are designed for a snug fit with super-skinny legs. Perfect for pairing with your favorite summer tops!

2. **[Bono NW Slim Denim](https://representative-domain/product/777038)**  
   These ankle-length jeans feature a high waist and a slightly stretchy fabric, making them comfortable yet fashionable. The tapered legs add a chic touch to any outfit!

3. **[Mom Fit](https://representative-domain/product/714790)**  
   With a slightly looser fit 