- https://www.lyonwj.com/2015/06/16/nlp-with-neo4j/
- 他の内容は以下にある
  
  1.https://github.com/johnymontana/nlp-graph-notebooks

## 文章の中で、2つの単語がどれくらい似ているかを単語を直接入力することで比較する

In [207]:
from py2neo import Graph
import re, string
from neo4j.v1 import GraphDatabase

In [208]:
driver = GraphDatabase.driver('bolt://127.0.0.1:7687', auth=('neo4j', 'Mizuchan123'))
#graphdb = Graph('http://neo4j:neo4j@localhost:7474/db/data')

In [209]:
INSERT_QUERY = '''
FOREACH (t IN $wordPairs |
    MERGE (w0:Word {word: t[0]})
    MERGE (w1:Word {word: t[1]})
    CREATE (w0)-[:NEXT_WORD]->(w1)
    )

'''

LEFT1_QUERY = '''
MATCH (s:Word {word: {word}})
MATCH (w:Word)-[:NEXT_WORD]->(s)
RETURN w.word as word

'''

RIGHT1_QUERY = '''
MATCH (s:Word {word: {word}})
MATCH (w:Word)<-[:NEXT_WORS]-(s)
RETURN w.word as word

'''

In [254]:
def insert_query(tx, wordPairs):
    tx.run('FOREACH (t IN $wordPairs | MERGE (w0:Word {word: t[0]}) MERGE (w1:Word {word: t[1]}) CREATE (w0)-[:NEXT_WORD]->(w1))',
           wordPairs=wordPairs)
    
def left1_query(tx, word):
    words = []
    for result in tx.run('MATCH (s:Word {word: $word})'
                       'MATCH (w:Word)-[:NEXT_WORD]->(s)'
                       'RETURN w.word as word', word=word):
        for line in result:
            #print(line)
            words.append(line)
    words = set(words)
    return words

In [211]:
def arrifySentence(sentence):
    sentence = sentence.lower()
    sentence = sentence.strip()
    exclude = set(string.punctuation)
    regex = re.compile('[%s]' % re.escape(string.punctuation))
    sentence = regex.sub('', sentence)
    wordArray = sentence.split()
    tupleList = []
    for i, word in enumerate(wordArray):
        if i+1 == len(wordArray):
            break
        tupleList.append([word, wordArray[i+1]])
    return tupleList

In [212]:
print(arrifySentence("This is a pen"))

[['this', 'is'], ['is', 'a'], ['a', 'pen']]


In [213]:
def loadFile():
    with driver.session() as session:
        with open("ceeaus.dat", encoding='ISO-8859-1') as f:
            count = 0
            for l in f:

                params = {"wordPairs": arrifySentence(l)}
                session.write_transaction(insert_query, params["wordPairs"])
                #print(params)

In [214]:
loadFile()

In [279]:
def left1(word):
    with driver.session() as session:
        params = {
            'word': word.lower()
        }
        result = session.read_transaction(left1_query, params["word"])
        #関数で呼び出すと、set（集合）ではなくなるため、再度実施する。
        result = set(result)
        return result

In [258]:
def right1_query(tx, word):
    words = []
    for result in tx.run('MATCH (s:Word {word: $word})'
                       'MATCH (w:Word)<-[:NEXT_WORD]-(s)'
                       'RETURN w.word as word', word=word):
        for line in result:
            #print(line)
            words.append(line)
    words = set(words)
    return words

In [266]:
def right1(word):
    with driver.session() as session:
        params = {
            'word': word.lower()
        }
        
        result = session.read_transaction(right1_query, params["word"])
        result = set(result)
        return result

https://mieruca-ai.com/ai/jaccard_dice_simpson/

In [260]:
def jaccard(a,b):
    intSize = len(a.intersection(b))
    unionSize = len(a.union(b))
    return intSize/unionSize

In [221]:
jaccard({'This', 'is'}, {'That', 'is'})

0.3333333333333333

In [238]:
type({'This', 'is'})

set

In [223]:
len({'This', 'is'}.intersection({'That', 'is'}))

1

In [224]:
len({'This', 'is'}.union({'That', 'is'}))

3

In [277]:
def paradigSimilarity(w1, w2):
    return (jaccard(left1(w1), left1(w2)) + jaccard(right1(w1),right1(w2)))/2.0

In [278]:
paradigSimilarity("school", "university")

0.20997973657548125

## 以下は、Neo4jを簡易的に試行するためのもの

In [1]:
from neo4j.v1 import GraphDatabase

In [4]:
driver = GraphDatabase.driver('bolt://127.0.0.1:7687', auth=('neo4j', 'Mizuchan123'))

In [5]:
def clear_db(tx):
    tx.run("MATCH (n) DETACH DELETE n")

In [6]:
def add_friend(tx, name, friend_name=None):
    if not friend_name:
        return tx.run("CREATE (p:Person {name: $name}) RETURN p", name=name)
    return tx.run('MATCH (p:Person {name: $name})'
                 'CREATE (p)-[:FRIEND]->(:Person {name: $friend_name})', name=name, friend_name=friend_name)

In [12]:
def print_friend(tx, name):
    for record in tx.run('MATCH (p {name: $name})-[:FRIEND]-(yourFriends)'
                        'RETURN p, yourFriends', name=name):
        print(record)

In [15]:
with driver.session() as session:
    #session.write_transaction(clear_db)
    session.write_transaction(add_friend, 'Jun')
    for f in ["Mike", "Nancy"]:
        session.write_transaction(add_friend, 'Jun', f)
    session.read_transaction(print_friend, "Jun")

<Record p=<Node id=6 labels={'Person'} properties={'name': 'Jun'}> yourFriends=<Node id=47 labels={'Person'} properties={'name': 'Nancy'}>>
<Record p=<Node id=6 labels={'Person'} properties={'name': 'Jun'}> yourFriends=<Node id=7 labels={'Person'} properties={'name': 'Mike'}>>
<Record p=<Node id=9 labels={'Person'} properties={'name': 'Jun'}> yourFriends=<Node id=48 labels={'Person'} properties={'name': 'Nancy'}>>
<Record p=<Node id=9 labels={'Person'} properties={'name': 'Jun'}> yourFriends=<Node id=46 labels={'Person'} properties={'name': 'Mike'}>>
<Record p=<Node id=9 labels={'Person'} properties={'name': 'Jun'}> yourFriends=<Node id=12 labels={'Person'} properties={'name': 'Nancy'}>>
<Record p=<Node id=9 labels={'Person'} properties={'name': 'Jun'}> yourFriends=<Node id=10 labels={'Person'} properties={'name': 'Mike'}>>


In [16]:
def add_friend(tx, name, yourFriend):
    tx.run("MATCH (p: Person {name: $name})"
          "CREATE (p)-[:FRIEND]-(:Person {name: $yourFriend})", name=name, yourFriend=yourFriend)