In [None]:
# 필요한 라이브러리 설치
!pip install langchain openai faiss-cpu networkx matplotlib spacy tiktoken


In [None]:
import spacy
import networkx as nx
import matplotlib.pyplot as plt
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.docstore.document import Document
from langchain.chat_models import ChatOpenAI


In [None]:
# SpaCy로 NER을 활용해 개체 추출
nlp = spacy.load("en_core_web_sm")

# 샘플 문서
docs = [
    "Apple was founded by Steve Jobs in California.",
    "Steve Jobs created the iPhone.",
    "iPhone is a popular smartphone product."
]

# 1. 문서 → Chunk
text_splitter = CharacterTextSplitter(chunk_size=50, chunk_overlap=0)
documents = [Document(page_content=chunk) for doc in docs for chunk in text_splitter.split_text(doc)]

# 2. 벡터 DB 구축
embeddings = OpenAIEmbeddings()  # OPENAI_API_KEY 필요
vectorstore = FAISS.from_documents(documents, embeddings)
retriever = vectorstore.as_retriever()

# 3. 개체 추출 → Graph 생성
G = nx.DiGraph()
for doc in docs:
    ents = [ent.text for ent in nlp(doc).ents if ent.label_ in ["PERSON", "ORG", "PRODUCT", "GPE"]]
    if len(ents) >= 2:
        for i in range(len(ents)-1):
            G.add_edge(ents[i], ents[i+1])

In [None]:
# 그래프 시각화
plt.figure(figsize=(8, 6))
pos = nx.spring_layout(G, seed=42)
nx.draw(G, pos, with_labels=True, node_size=2000, node_color="lightblue", arrowsize=20, font_size=12)
plt.show()


In [None]:
# 4. Graph-aware 검색 함수
def graph_rag_query(query):
    # 1. 벡터 검색
    similar_docs = retriever.get_relevant_documents(query)
    base_context = " ".join([d.page_content for d in similar_docs])

    # 2. 그래프 탐색 (질문 내 개체와 연결된 노드 추가)
    extra_context = ""
    for node in G.nodes:
        if node.lower() in query.lower():
            neighbors = list(G.neighbors(node))
            extra_context += " ".join(neighbors)
    
    return base_context + " " + extra_context


# 5. LLM 질의응답
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

query = "Which product was created by Steve Jobs?"
context = graph_rag_query(query)
answer = llm.predict(f"Question: {query}\n\nKnowledge: {context}\n\nAnswer:")
print("질문:", query)
print("답변:", answer)
