In [9]:
#Import word2vec
from gensim.models import Word2Vec

# Define Neo4j connections
from neo4j import GraphDatabase
import pandas as pd

host = 'bolt://localhost:7687'
user = 'neo4j'
password = 'wowhi223'
driver = GraphDatabase.driver(host,auth=(user, password))

In [10]:
graph_schema_query = """

CALL apoc.schema.assert( 
    // define indexes 
    null, 
    // define unique constraints 
    {Ingredient:['name'], Dish:['id'], DishType:['name']})

"""

graph_import_query = """

LOAD CSV WITH HEADERS FROM "file:///newfood.csv" as row 
CREATE (d:Dish{id:row.id}) 
SET d += apoc.map.clean(row, ['id','dishTypes','ingredients'],[]) 
FOREACH (i in split(row.ingredients,',') | MERGE (in:Ingredient{name:toLower(replace(i,'-',' '))}) 
                                           MERGE (in)<-[:HAS_INGREDIENT]-(d)) 
FOREACH (dt in split(row.dishTypes,',')  | MERGE (dts:DishType{name:dt}) 
                                           MERGE (dts)<-[:DISH_TYPE]-(d))

"""

with driver.session() as session:
    session.run(graph_schema_query)
    session.run(graph_import_query)

In [11]:
with driver.session() as session:
    session.run("""CALL gds.graph.create('all', 
    '*', 
    {ALL_UNDIRECTED: {type:'*', orientation:'UNDIRECTED'}})""")

In [43]:
# Define random walk query
random_walks_query = """

MATCH (node)
CALL gds.alpha.randomWalk.stream('all', {
  start: id(node),
  steps: 15,
  walks: 5
})
YIELD nodeIds
// Return the names or the titles
RETURN [id in nodeIds | 
    coalesce(gds.util.asNode(id).name, 
             gds.util.asNode(id).title)] as walks

"""
# Fetch data from Neo4j
with driver.session() as session:
    walks = session.run(random_walks_query)
# Train the word2vec model
clean_walks = [row['walks'] for row in walks]
model = Word2Vec(clean_walks, sg=1, window=5, size=100)
# Inspect results
model.wv.most_similar('우리은행')

[('KT', 0.9990994930267334),
 ('서울도시가스', 0.9990705251693726),
 ('제공', 0.9990437626838684),
 ('온누리약국', 0.9988678097724915),
 ('국토교통부', 0.9988675713539124),
 ('졸업증명서', 0.9988021850585938),
 ('강민석', 0.9987440705299377),
 ('가공', 0.9987295866012573),
 ('인터파크', 0.9986906051635742),
 ('여권', 0.9986632466316223)]

In [42]:
model.wv.similarity('우리은행','이상우')

0.99799913

In [56]:
model.wv.most_similar(positive=['우리은행','이상우','차번호'], topn=5)

[('서울도시가스', 0.9993897080421448),
 ('온누리약국', 0.9993473887443542),
 ('강민석', 0.999250054359436),
 ('졸업증명서', 0.9991940855979919),
 ('네이버', 0.9991650581359863)]

In [55]:
model.wv.most_similar(positive=['이상우','차번호'], negative=['우리은행'])

[('주민번호', 0.9973477721214294),
 ('김수정', 0.9972724914550781),
 ('주소', 0.9968128204345703),
 ('네이버', 0.9966692924499512),
 ('온누리약국', 0.9966692328453064),
 ('강민석', 0.9966251254081726),
 ('서울도시가스', 0.9963914155960083),
 ('졸업증명서', 0.9963417053222656),
 ('가공', 0.9959931373596191),
 ('제공', 0.9955015182495117)]