In [49]:
#Import word2vec
from gensim.models import Word2Vec

# Define Neo4j connections
from neo4j import GraphDatabase
import pandas as pd

host = 'bolt://localhost:7687'
user = 'neo4j'
password = '1234'
driver = GraphDatabase.driver(host,auth=(user, password))

In [45]:
graph_schema_query = """

CALL apoc.schema.assert( 
    // define indexes 
    null, 
    // define unique constraints 
    {Ingredient:['name'], Dish:['id'], DishType:['name']})

"""

graph_import_query = """

LOAD CSV WITH HEADERS FROM "file:///newfood.csv" as row 
CREATE (d:Dish{id:row.id}) 
SET d += apoc.map.clean(row, ['id','dishTypes','ingredients'],[]) 
FOREACH (i in split(row.ingredients,',') | MERGE (in:Ingredient{name:toLower(replace(i,'-',' '))}) 
                                           MERGE (in)<-[:HAS_INGREDIENT]-(d)) 
FOREACH (dt in split(row.dishTypes,',')  | MERGE (dts:DishType{name:dt}) 
                                           MERGE (dts)<-[:DISH_TYPE]-(d))

"""

with driver.session() as session:
    session.run(graph_schema_query)
    session.run(graph_import_query)

In [8]:
with driver.session() as session:
    results = session.run("""
    MATCH (n:Ingredient)
    RETURN n.name as ingredient, size((n)<--()) as mentions 
    ORDER BY mentions DESC
    LIMIT 10
    """)
pd.DataFrame([dict(result) for result in results])

Unnamed: 0,ingredient,mentions
0,olive oil,954
1,garlic,425
2,salt,405
3,butter,397
4,lemon juice,373
5,salt and pepper,347
6,anchovy,341
7,unsalted butter,305
8,garlic cloves,293
9,capers,267


In [9]:
with driver.session() as session:
    results = session.run("""
    WITH ["feta cheese", "zucchini"] as ingredients 
    MATCH (d:Dish) 
    WHERE all(i in ingredients WHERE exists( 
        (d)-[:HAS_INGREDIENT]->(:Ingredient {name: i}))) 
    RETURN d.title AS dish, 
           [(d)-[:HAS_INGREDIENT]->(i) | i.name] AS ingredients 
    ORDER BY size(ingredients)
    LIMIT 10
    """)
pd.DataFrame([dict(result) for result in results])

Unnamed: 0,dish,ingredients
0,Striped Bass en Papillote,"[lemon juice, striped bass, salt, black pepper..."
1,Mediterranean Salad Platter,"[feta cheese, red onions, zucchini, salt, pepp..."


In [46]:
with driver.session() as session:
    session.run("""CALL gds.graph.create('all', 
    '*', 
    {ALL_UNDIRECTED: {type:'*', orientation:'UNDIRECTED'}})""")

In [47]:
# Define random walk query
random_walks_query = """

MATCH (node)
CALL gds.alpha.randomWalk.stream('all', {
  start: id(node),
  steps: 15,
  walks: 5
})
YIELD nodeIds
// Return the names or the titles
RETURN [id in nodeIds | 
    coalesce(gds.util.asNode(id).name, 
             gds.util.asNode(id).title)] as walks

"""
# Fetch data from Neo4j
with driver.session() as session:
    walks = session.run(random_walks_query)
# Train the word2vec model
clean_walks = [row['walks'] for row in walks]
model = Word2Vec(clean_walks, sg=1, window=5, size=100)
# Inspect results
model.wv.most_similar('olive oil')

[('red chilis', 0.6657441854476929),
 ('lamb loin chops', 0.6576498746871948),
 ('dried chilli flakes', 0.6574801206588745),
 ('dried tomatoes', 0.6480619311332703),
 ('parlsley', 0.6417162418365479),
 ('skinless boneless chicken breasts', 0.6388673782348633),
 ('ricotta salata', 0.6376425623893738),
 ('aleppo pepper', 0.6358935832977295),
 ('chicken livers', 0.6358893513679504),
 ('corn bread mix', 0.635177493095398)]

In [34]:
# Define random walk query
random_walks_query = """
MATCH (node)
CALL gds.alpha.randomWalk.stream('all', {
  start: id(node),
  steps: 15,
  walks: 5,
  mode:'node2vec',
  inOut:0.6,
  return:1.0
})
YIELD nodeIds
// return the string of internal ID of nodes
RETURN [id in nodeIds | toString(id)] as walks
"""
# fetch data from Neo4j
with driver.session() as session:
    walks = session.run(random_walks_query)
# Train model
clean_walks = [row['walks'] for row in walks]
model = Word2Vec(clean_walks, window=5, size=100, sg=1)

In [35]:
store_embedding = """
UNWIND $data as row
MATCH (n)
WHERE id(n) = row.id
SET n.embedding = row.embedding
"""
embeddings = []
with driver.session() as session:
    for record in model.wv.vocab:
        id = record
        # Prepare data
        embeddings.append({'id':int(id), 'embedding': [float(x) for x in list(model.wv[id])]})
    # Store embeddings to Neo4j    
    session.run(store_embedding, {'data': embeddings})

In [37]:
cosine_similarity_algorithm = """

MATCH (node) 
WITH id(node) as id, node.embedding as weights 
WITH {item:id, weights: weights} as dishData 
WITH collect(dishData) as data 
CALL gds.alpha.similarity.cosine.write({
    nodeProjection: '*', 
    relationshipProjection: '*', 
    similarityCutoff:0.5, 
    topK:5, 
    data: data,
    writeRelationshipType:'COSINE_SIMILARITY'}) 
YIELD nodes, similarityPairs 
RETURN nodes, similarityPairs

"""

with driver.session() as session:
    results = session.run(cosine_similarity_algorithm)
pd.DataFrame([dict(result) for result in results])

Unnamed: 0,nodes,similarityPairs
0,3682,18406
