In [1]:
from neo4j import GraphDatabase

In [6]:
driver=GraphDatabase.driver(uri="bolt://192.168.0.158:7687",auth=("jin","password"))

In [7]:
session=driver.session()

# get results from graphdb

In [8]:
query="""
match (n:Student{name:$name}) return n.name as name
"""

In [10]:
from neo4j import GraphDatabase
import re
from ast import literal_eval
""" make node & relationship"""
def add_corp(tx, name, corp_code, stock_code, date, report_idx, keyword):
    tx.run("MERGE (c:Corp {name: $name , corp_code : $corp_code, stock_code:$stock_code, date: $date, report_idx: $report_idx, keyword: $keyword})",
           name=name, corp_code=corp_code, stock_code=stock_code, date=date,  report_idx=report_idx, keyword=keyword)

def add_product(tx):
    tx.run("MATCH (c:Corp) "
           "UNWIND c.keyword as k "
           "MERGE (b:Keyword {name:k}) "
           "MERGE (c)-[r:Product]->(b)")


""" 한자와 공백 제거 """
# Neo4j -> Gephi 에서 parsing error의 원인이 될 수 있음
def clean_text_for_neo4j(row):
    text = row['주요 사업']
    text_list = eval(text)
    answer = [] 
    for text in text_list : 
        text = re.sub(pattern='[^a-zA-Z0-9ㄱ-ㅣ가-힣]', repl='', string=text)
        answer.append(text) 
    # print("영어, 숫자, 한글만 포함 : ", text )
    return answer


In [15]:
import os

import pandas as pd 
df = pd.read_csv("./filterd_data_v1.csv", encoding='CP949')
df['주요 사업'] = df.apply(clean_text_for_neo4j, axis=1)




TypeError: eval() arg 1 must be a string, bytes or code object

# create new node

In [86]:
query="""
create(n:Student{name:"Stephan",city:"Cape Town"})
RETURN n.name as name ,n.city as city
"""

In [87]:
results=session.run(query)

In [88]:
for result in results:
    print(f'Name:{result["name"]},City:{result["city"]}')

Name:Stephan,City:Cape Town


# update new node by adding new properties

In [89]:
query="""
Match(n:Student{name:"Stephan"}) 
set n.company="Neo4j"
RETURN n.name as name,n.company as company,n.city as city
"""

In [90]:
results=session.run(query)

In [91]:
for result in results:
    print(f'Name:{result["name"]},City:{result["city"]},Company:{result["company"]}')

Name:Stephan,City:Cape Town,Company:Neo4j


# add relationship between nodes


In [92]:
query="""match(a:Student{name:"Roni"}),(b:Student{name:"Stephan"}) 
create (a)-[r1:friend_of]->(b)
"""

In [93]:
results=session.run(query)

In [7]:
from neo4j import GraphDatabase
import re
from ast import literal_eval
import os
import pandas as pd 
import networkx as nx
import matplotlib.pyplot as plt
""" make node & relationship"""
def add_corp(tx, name, corp_code, stock_code, date, report_idx, keyword):
    tx.run("MERGE (c:Corp {name: $name , corp_code : $corp_code, stock_code:$stock_code, date: $date, report_idx: $report_idx, keyword: $keyword})",
           name=name, corp_code=corp_code, stock_code=stock_code, date=date,  report_idx=report_idx, keyword=keyword)

def add_product(tx):
    tx.run("MATCH (c:Corp) "
           "UNWIND c.keyword as k "
           "MERGE (b:Keyword {name:k}) "
           "MERGE (c)-[r:Product]->(b)")


""" 한자와 공백 제거 """
# Neo4j -> Gephi 에서 parsing error의 원인이 될 수 있음
def clean_text_for_neo4j(row):
    text = row['주요 사업']
    try :
        text_list = eval(text)
        answer = [] 
        for text in text_list : 
            text = re.sub(pattern='[^a-zA-Z0-9ㄱ-ㅣ가-힣]', repl='', string=text)
            answer.append(text) 
        # print("영어, 숫자, 한글만 포함 : ", text )
        return answer
    except:
        return [] 




""" 입력 """
# Cyper code를 이용,  크롤링한 Data를 DB에 입력


class neo4jDB:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))  

    def close(self):
        self.driver.close()    

    def create_graph(self) :
        df = pd.read_csv("./flaskr/filterd_data_v1.csv", encoding='CP949')
        df['주요 사업'] = df.apply(clean_text_for_neo4j, axis=1)
        with self.driver.session() as session:
            """ make node """
            for idx in range(len(df)):        
                session.write_transaction(add_corp, name=df.iloc[idx]['기업 이름'], corp_code=str(df.iloc[idx]['기업 코드']), stock_code=str(df.iloc[idx]['주식 코드']), 
                date=str(df.iloc[idx]['수정 일자']),  report_idx=str(df.iloc[idx]['보고서 ID']), keyword=df.iloc[idx]['주요 사업'])    
            session.write_transaction(add_product)

    def print_product(self):
        with self.driver.session() as session:
            return session.write_transaction(self._return_product_relation)
            

    @staticmethod
    def _return_product_relation(tx):
        result = tx.run("MATCH p=(n1:)-[r:Product]->(n2)"
                        " RETURN n1, r, n2"
                        " LIMIT 5")
        DG = nx.DiGraph()
        for i, path in enumerate(result):            
            n1_dict = {
                'name':path['n1']['name'],
                'id': path['n1'].id, 
                'labels':path['n1'].labels, 
                'properties':dict(path['n1'])                
            }
            n2_dict = {
                'name':path['n2']['name'],
                'id': path['n2'].id, 
                'labels':path['n2'].labels, 
                'properties':dict(path['n2'])
            }
            # 마찬가지로, edge의 경우도 아래와 같이 정보를 저장한다.
            e_dict = {
                'name': "주요사업",
                'id':path['r'].id, 
                'type':path['r'].type, 
                'properties':dict(path['r'])
            }
            # print(e_dict)
            # 해당 노드를 넣은 적이 없으면 넣는다.
            if n1_dict['name'] not in DG:
                if 'corp_code' in n1_dict['properties'].keys() : 
                    DG.add_nodes_from([
                        (n1_dict['name'], n1_dict)
                    ], color='blue')
                else : 
                    DG.add_nodes_from([
                    (n1_dict['name'], n1_dict)
                ], color='red')
            # 해당 노드를 넣은 적이 없으면 넣는다.
            if n2_dict['name'] not in DG:
                DG.add_nodes_from([
                    (n2_dict['name'], n2_dict)
                ], color='red')
            # edge를 넣어준다. 노드의 경우 중복으로 들어갈 수 있으므로 중복을 체크해서 넣어주지만, 
            # edge의 경우 중복을 체크하지 않아도 문제없다.
            DG.add_edges_from([
                (n1_dict['name'], n2_dict['name'], e_dict)
            ])
        return DG

In [8]:
import matplotlib.font_manager as fm

from matplotlib import rc

font_name = fm.FontProperties(fname="./font/malgun.ttf").get_name()

rc('font', family=font_name)





In [15]:
greeter = neo4jDB("bolt://localhost:7687", "neo4j", "password")
DG = greeter.print_product()

nx.draw(DG, with_labels=True, node_size=1000, alpha=0.9)
plt.show()  
greeter.close()

AttributeError: 'str' object has no attribute 'id'

In [17]:
import matplotlib
matplotlib.get_cachedir()

'/opt/ml/.cache/matplotlib'