In [5]:
import os
from dotenv import load_dotenv
import json
from llama_index.core import StorageContext, VectorStoreIndex
from llama_index.vector_stores.weaviate import WeaviateVectorStore
from llama_index.embeddings.openai import OpenAIEmbedding
import weaviate
from weaviate.classes import config
from llama_index.core.query_engine import RetrieverQueryEngine
import openai

# .env 파일 로드
load_dotenv()

# OpenAI API Key 설정
OPENAI_KEY = os.getenv("OPENAI_KEY")

# API Key 설정
if OPENAI_KEY is None:
    raise ValueError("OPENAI_KEY is not set in the .env file.")
openai.api_key = OPENAI_KEY

# movie_list.txt에서 ID 읽기
with open('movie_list.txt', 'r') as file:
    ids = [line.strip() for line in file.readlines() if line.strip()]
    
host = os.getenv("WEAVIATE_HOST", "localhost")  # 단일 host 설정
http_port = int(os.getenv("WEAVIATE_HTTP_PORT", 8081)) # int casting
grpc_port = int(os.getenv("WEAVIATE_GRPC_PORT", 50052))

print(f"Server Address and Ports:")
print(f"Host: {host}")
print(f"HTTP Port: {http_port}")
print(f"GRPC Port: {grpc_port}")

# Weaviate 클라이언트 설정
client = weaviate.WeaviateClient(
    connection_params=weaviate.ConnectionParams.from_params(
        http_host=host,
        http_port=http_port,
        http_secure=False,
        grpc_host=host,  # 동일한 host 재사용
        grpc_port=grpc_port,
        grpc_secure=False,
    )
)
class_name = "MovieSynopsis"

client.connect()

# 클래스가 존재하지 않으면 생성
if not client.collections.exists(class_name):
    client.collections.create(
        name=class_name,
        properties=[
            config.Property(name="content", data_type=config.DataType.TEXT),
            config.Property(name="movie_id", data_type=config.DataType.TEXT)
        ],
        vectorizer_config=config.Configure.Vectorizer.text2vec_openai()
    )

# OpenAI 텍스트 임베딩 모델 설정
text_embed_model = OpenAIEmbedding(api_key=OPENAI_KEY)

# 데이터 추가
collection = client.collections.get(class_name)
for id in ids:
    file_path = f'movie_synopsis/{id}.json'

    if not os.path.exists(file_path):
        print(f"파일 {file_path}이 존재하지 않습니다. 다음 ID로 이동합니다.")
        continue

    # JSON 파일 열기
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)

    # synopsis 부분 추출
    synopsis = data.get("synopsis", "Synopsis not found.")
    if synopsis == "Synopsis not found.":
        print(f"ID {id}: Synopsis not found. 다음 ID로 이동합니다.")
        continue

    # "\n\n" 기준으로 텍스트 분리
    text_segments = synopsis.split("\n\n")

    for segment in text_segments:
        if not segment.strip():
            continue  # 빈 텍스트는 건너뜀

        # Weaviate에 데이터 추가
        try:
            collection.data.insert({
                "content": segment,
                "movie_id": id
            })
        except Exception as e:
            print(f"ID {id}: 데이터 저장 중 오류 발생: {e}")
            print(f"오류 타입: {type(e)}")
            import traceback
            print(traceback.format_exc())

    print(f"ID {id}: 데이터 저장 완료.")

# WeaviateVectorStore 생성
vector_store = WeaviateVectorStore(weaviate_client=client, index_name=class_name)

# StorageContext 및 VectorStoreIndex 생성
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_vector_store(vector_store)

# Retriever 설정
retriever = index.as_retriever(retriever_mode="default")

# Query Engine 생성
query_engine = RetrieverQueryEngine(retriever=retriever)

Server Address and Ports:
Host: localhost
HTTP Port: 8080
GRPC Port: 50051
ID tt0241527: 데이터 저장 완료.
ID tt0295297: 데이터 저장 완료.
ID tt0304141: 데이터 저장 완료.
ID tt0330373: 데이터 저장 완료.
ID tt0373889: 데이터 저장 완료.
ID tt0417741: 데이터 저장 완료.
ID tt0926084: 데이터 저장 완료.
ID tt1201607: 데이터 저장 완료.


In [6]:
response = query_engine.query("Your query here")
response

Response(response="Harry, Ron, and their friends enter the Department of Mysteries where they come across a prophecy involving Harry and Voldemort. They are then ambushed by Death Eaters, including Lucius Malfoy and Bellatrix Lestrange. Lucius tries to manipulate Harry into giving him the prophecy by claiming it holds the secret to why Voldemort couldn't kill Harry as a baby. However, Harry refuses, leading to a confrontation between Dumbledore's Army and the Death Eaters.", source_nodes=[NodeWithScore(node=TextNode(id_='ff9dbb4b-b9fe-4a34-94c4-e78f7ffbc0c5', embedding=[-0.019321933388710022, 0.03957504406571388, -0.035066183656454086, -0.00709410198032856, -0.009017718955874443, -0.006579503882676363, 0.03793323040008545, 0.059938423335552216, -0.024149352684617043, -0.0164671391248703, 0.007504555396735668, 0.007902756333351135, -0.003513357136398554, 0.05915427580475807, -0.021735642105340958, -0.011743863113224506, -0.007529059890657663, 0.04795563966035843, -0.0368795283138752, 0.

In [10]:
collection = client.collections.get(class_name)
for item in collection.iterator():
    print(item.uuid, item.properties)

0044215b-2ff5-407c-bb8d-504a94f09b6e {'content': "Ron becomes Keeper of the Gryffindor Quidditch team (after beating the strapping Cormac, whom Hermoine made sure he lost by using her spells to disorient him) and forms a romantic relationship with Lavender Brown (Jessie Cave), upsetting Hermione. Harry consoles Hermione, revealing that he now has feelings for Ron's younger sister, Ginny Weasley (Bonnie Wright). Harry gives his Liquid Luck potion to Ron for his first Quidditch match. Ron wins the game for his team and turns into an overnight hero.", 'movie_id': 'tt0417741'}
036c6400-28bb-4f77-b612-54f41389fc6d {'content': "Harry soon finds he is the unwanted center of attention of three people: the vain new Defense Against the Dark Arts Professor, Gilderoy Lockhart (Kenneth Branagh), admirer Colin Creevey (Hugh Mitchell), and Ron's sister, Ginny Weasley (Bonnie Wright), who fancies Harry. Events take a turn for the worse when the Chamber of Secrets is opened and a monster stalks the cas