벡터 db 초기화 코드

In [78]:
import os
from dotenv import load_dotenv
import weaviate
import openai

# .env 파일 로드
load_dotenv()

# OpenAI API Key 설정
OPENAI_KEY = os.getenv("OPENAI_KEY")

# API Key 설정
if OPENAI_KEY is None:
    raise ValueError("OPENAI_KEY is not set in the .env file.")
openai.api_key = OPENAI_KEY

# movie_list.txt에서 ID 읽기
with open('movie_list.txt', 'r') as file:
    ids = [line.strip() for line in file.readlines() if line.strip()]
    
host = os.getenv("WEAVIATE_HOST", "localhost")  # 단일 host 설정
http_port = int(os.getenv("WEAVIATE_HTTP_PORT", 8081)) # int casting
grpc_port = int(os.getenv("WEAVIATE_GRPC_PORT", 50052))

print(f"Server Address and Ports:")
print(f"Host: {host}")
print(f"HTTP Port: {http_port}")
print(f"GRPC Port: {grpc_port}")

# Weaviate 클라이언트 설정
client = weaviate.WeaviateClient(
    connection_params=weaviate.ConnectionParams.from_params(
        http_host=host,
        http_port=http_port,
        http_secure=False,
        grpc_host=host,  # 동일한 host 재사용
        grpc_port=grpc_port,
        grpc_secure=False,
    )
)
class_name = "MovieSynopsis"

client.connect()

# 기존 클래스 삭제
if client.collections.exists(class_name):
    print(f"Deleting existing class '{class_name}' and its data...")
    client.collections.delete(class_name)
    print(f"Class '{class_name}' deleted.")


Server Address and Ports:
Host: localhost
HTTP Port: 8080
GRPC Port: 50051




Deleting existing class 'MovieSynopsis' and its data...
Class 'MovieSynopsis' deleted.


In [79]:
client.close()

벡터 db 구축 코드

In [None]:
!pip install --upgrade weaviate-client

In [None]:
import os
from dotenv import load_dotenv
import json
from llama_index.core import StorageContext, VectorStoreIndex
from llama_index.vector_stores.weaviate import WeaviateVectorStore
import weaviate
from llama_index.core.query_engine import RetrieverQueryEngine
import openai
from weaviate.classes.query import Filter
import weaviate.classes.config as wc

# .env 파일 로드
load_dotenv()

# OpenAI API Key 설정
OPENAI_KEY = os.getenv("OPENAI_KEY")

# API Key 설정
if OPENAI_KEY is None:
    raise ValueError("OPENAI_KEY is not set in the .env file.")
openai.api_key = OPENAI_KEY

# movie_list.txt에서 ID 읽기
with open('movie_list.txt', 'r') as file:
    ids = [line.strip() for line in file.readlines() if line.strip()]
    
host = os.getenv("WEAVIATE_HOST", "localhost")  # 단일 host 설정
http_port = int(os.getenv("WEAVIATE_HTTP_PORT", 8081)) # int casting
grpc_port = int(os.getenv("WEAVIATE_GRPC_PORT", 50052))

print(f"Server Address and Ports:")
print(f"Host: {host}")
print(f"HTTP Port: {http_port}")
print(f"GRPC Port: {grpc_port}")

# Weaviate 클라이언트 설정
client = weaviate.WeaviateClient(
    connection_params=weaviate.ConnectionParams.from_params(
        http_host=host,
        http_port=http_port,
        http_secure=False,
        grpc_host=host,  # 동일한 host 재사용
        grpc_port=grpc_port,
        grpc_secure=False,
    )
)
class_name = "MovieSynopsis"

client.connect()

# 클래스가 존재하지 않으면 생성
if not client.collections.exists(class_name):
    client.collections.create(
        name=class_name,
        properties=[
            wc.Property(
                name="content",
                data_type=wc.DataType.TEXT,
                vectorize_property_name=True
            ),
            wc.Property(
                name="movie_id",
                data_type=wc.DataType.TEXT,
                vectorize_property_name=False
            )
        ],
        vectorizer_config=wc.Configure.Vectorizer.text2vec_openai()
    )


# 데이터 추가
collection = client.collections.get(class_name)
for id in ids:
    file_path = f'movie_synopsis/{id}.json'

    if not os.path.exists(file_path):
        print(f"파일 {file_path}이 존재하지 않습니다. 다음 ID로 이동합니다.")
        continue

    # JSON 파일 열기
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)

    # synopsis 부분 추출
    synopsis = data.get("synopsis", "Synopsis not found.")
    if synopsis == "Synopsis not found.":
        print(f"ID {id}: Synopsis not found. 다음 ID로 이동합니다.")
        continue

    # "\n\n" 기준으로 텍스트 분리
    text_segments = synopsis.split("\n\n")

    for segment in text_segments:
        if not segment.strip():
            continue  # 빈 텍스트는 건너뜀

        response = collection.query.fetch_objects(
            filters=Filter.by_property("content").equal(segment)
        )

        if response.objects:
            continue

        # Weaviate에 데이터 추가
        try:
            collection.data.insert({
                "content": segment,
                "movie_id": id
            })
        except Exception as e:
            print(f"ID {id}: 데이터 저장 중 오류 발생: {e}")
            print(f"오류 타입: {type(e)}")
            import traceback
            print(traceback.format_exc())

    print(f"ID {id}: 데이터 저장 완료.")

Server Address and Ports:
Host: localhost
HTTP Port: 8080
GRPC Port: 50051


            Please make sure to close the connection using `client.close()`.


ID tt0241527: 데이터 저장 완료.
ID tt0295297: 데이터 저장 완료.
ID tt0304141: 데이터 저장 완료.
ID tt0330373: 데이터 저장 완료.




ID tt0373889: 데이터 저장 완료.
ID tt0417741: 데이터 저장 완료.
ID tt0926084: 데이터 저장 완료.
ID tt1201607: 데이터 저장 완료.


In [81]:
config = collection.config.get()
print(config)

_CollectionConfig(name='MovieSynopsis', description=None, generative_config=None, inverted_index_config=_InvertedIndexConfig(bm25=_BM25Config(b=0.75, k1=1.2), cleanup_interval_seconds=60, index_null_state=False, index_property_length=False, index_timestamps=False, stopwords=_StopwordsConfig(preset=<StopwordsPreset.EN: 'en'>, additions=None, removals=None)), multi_tenancy_config=_MultiTenancyConfig(enabled=False, auto_tenant_creation=False, auto_tenant_activation=False), properties=[_Property(name='content', description=None, data_type=<DataType.TEXT: 'text'>, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, tokenization=<Tokenization.WORD: 'word'>, vectorizer_config=_PropertyVectorizerConfig(skip=False, vectorize_property_name=True), vectorizer='text2vec-openai'), _Property(name='movie_id', description=None, data_type=<DataType.TEXT: 'text'>, index_filterable=True, index_range_filters=False, index_searchable=True, nested_properties=None, 

In [82]:
# WeaviateVectorStore 생성
vector_store = WeaviateVectorStore(weaviate_client=client, index_name=class_name)

# StorageContext 및 VectorStoreIndex 생성
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_vector_store(vector_store)

# Retriever 설정
retriever = index.as_retriever(retriever_mode="default")

# Query Engine 생성
query_engine = RetrieverQueryEngine(retriever=retriever)

## vector (near text) search

In [89]:
response = collection.query.near_text(
    query="harry potter and hermione",  # The model provider integration will automatically vectorize the query
    include_vector=True,
    limit=2
)

for obj in response.objects:
    print(obj.properties)

{'content': "Ron becomes Keeper of the Gryffindor Quidditch team (after beating the strapping Cormac, whom Hermoine made sure he lost by using her spells to disorient him) and forms a romantic relationship with Lavender Brown (Jessie Cave), upsetting Hermione. Harry consoles Hermione, revealing that he now has feelings for Ron's younger sister, Ginny Weasley (Bonnie Wright). Harry gives his Liquid Luck potion to Ron for his first Quidditch match. Ron wins the game for his team and turns into an overnight hero.", 'movie_id': 'tt0417741'}
{'movie_id': 'tt0417741', 'content': "While recovering, Ron murmurs Hermione's name, causing Lavender to end their relationship. Harry confronts Draco and severely injures him with a Sectumsempra curse taken from the textbook of the Half-Blood Prince. Snape enters and quickly heals Draco's wound. Fearing the book may be filled with more Dark Magic, Ginny and Harry hide it in the Room of Requirement and share their first kiss."}


## vector similarity search - 기본

limit을 걸어서 반환되는 객체를 제한을 줄 수 있고 offset을 통해 첫번째 결과를 건너뒤고 두번째 결과부터 반환할 수 있다.

In [66]:
from weaviate.classes.query import MetadataQuery

response = collection.query.near_text(
    query="animals in movies",
    limit=2,  # return 2 objects
    offset=1,  # With an offset of 1
    return_metadata=MetadataQuery(distance=True)
)

for o in response.objects:
    print(o.properties)
    print(o.metadata.distance)

{'content': "Inside, Harry finally confronts Sirius Black. As an illegal Animagus, Black can transform into any animal at will, thus he is also the infamous black dog. Professor Lupin, who had spotted the group on the confiscated Marauder's Map, suddenly bursts in and embraces his old friend Black. Confronted by Hermione, Lupin admits to being a werewolf. Lupin and Black then explain that Black is not the one who betrayed the Potters, rather it was Peter Pettigrew (Timothy Spall), who has been hiding for twelve years in his Animagus form as Scabbers, Ron's rat. He is Voldemort's servant, not Black, and he framed Black for his crimes. Meanwhile Snape bursts in also confronting Black but is disposed of by Harry. Harry is skeptical until Black and Lupin force Pettigrew back into his human form, and after some squealing resistance Pettigrew finally confesses that it is true. Black on discovering Pettigrew was still alive, he escaped Azkaban in order to kill him.", 'movie_id': 'tt0304141'}


### distance

기본적으로 cosine distance를 사용함.
distance 값이 낮을 수록 더 유사하다는 의미를 지닌다.

최대 허용할 수 있는 distance를 설정하는 파라미터이다.

In [98]:
response = collection.query.near_text(
    query="animals in movies",
    distance=0.7, # max accepted distance
    return_metadata=MetadataQuery(distance=True)
)

for o in response.objects:
    print(o.properties)
    print(o.metadata.distance)

{'content': "When Harry, Ron and Hermione learn that Buckbeak is to be executed, they visit Hagrid in his hut to console him. Hagrid has also found Ron's lost rat Scabbers and returns him to Ron. Ron had wrongly believed that his rat had been eaten by Hermione's cat. As the execution party approaches the hut, stones fly in through the window and they leave and run back to the edge of the castle, watching the execution from afar. Scabbers then bites Ron, who chases after him, finally collecting him under the Whomping Willow. The black dog makes another appearance and attacks Ron and drags him, along with Scabbers, into a hole at the tree's base. Harry and Hermione follow, finding a tunnel which leads them to the Shrieking Shack.", 'movie_id': 'tt0304141'}
0.6448028087615967
{'content': "Inside, Harry finally confronts Sirius Black. As an illegal Animagus, Black can transform into any animal at will, thus he is also the infamous black dog. Professor Lupin, who had spotted the group on th

### filter 걸기

특정 속성에 해당되는 값을 필터링한 후 유사성 검색 가능

In [99]:
from weaviate.classes.query import MetadataQuery, Filter

response = collection.query.near_text(
    query="animals in movies",
    filters=Filter.by_property("content").equal("harry"),
    limit=2,
    return_metadata=MetadataQuery(distance=True),
)

for o in response.objects:
    print(o.properties)
    print(o.metadata.distance)

{'content': "When Harry, Ron and Hermione learn that Buckbeak is to be executed, they visit Hagrid in his hut to console him. Hagrid has also found Ron's lost rat Scabbers and returns him to Ron. Ron had wrongly believed that his rat had been eaten by Hermione's cat. As the execution party approaches the hut, stones fly in through the window and they leave and run back to the edge of the castle, watching the execution from afar. Scabbers then bites Ron, who chases after him, finally collecting him under the Whomping Willow. The black dog makes another appearance and attacks Ron and drags him, along with Scabbers, into a hole at the tree's base. Harry and Hermione follow, finding a tunnel which leads them to the Shrieking Shack.", 'movie_id': 'tt0304141'}
0.6448028087615967
{'movie_id': 'tt0304141', 'content': "Inside, Harry finally confronts Sirius Black. As an illegal Animagus, Black can transform into any animal at will, thus he is also the infamous black dog. Professor Lupin, who ha

## Hybrid search

하이브리드 검색은 벡터검색과 키워드(BM25) 검색을 수행한 후 결과를 혼합해 데이터베이스에서 가장 일치하는 개체를 반환하는 것이다.

그래서 하이브리드 검색을 수행하면 동일하게 텍스트 쿼리를 임베딩으로 변환하고 데이터베이스에서 최고점수를 받은 개체를 반환한다.

In [None]:
response = collection.query.hybrid(
    query="harry potter and hermione",
    limit=2
)

for obj in response.objects:
    print(obj.properties["content"])

Nineteen years later, Harry and Ginny Potter and Ron and Hermione Weasley, along with Draco and his wife Astoria Malfoy, proudly watch their own children leaving for Hogwarts from King's Cross station.
Ron becomes Keeper of the Gryffindor Quidditch team (after beating the strapping Cormac, whom Hermoine made sure he lost by using her spells to disorient him) and forms a romantic relationship with Lavender Brown (Jessie Cave), upsetting Hermione. Harry consoles Hermione, revealing that he now has feelings for Ron's younger sister, Ginny Weasley (Bonnie Wright). Harry gives his Liquid Luck potion to Ron for his first Quidditch match. Ron wins the game for his team and turns into an overnight hero.


In [None]:
response = collection.query.fetch_objects(
    include_vector=True,  # 벡터 반환 설정
    limit=10
)

for obj in response.objects:
    print(f"Object ID: {obj.uuid}")
    print(f"Vector: {obj.vector}")

Object ID: 001a66cc-b721-44b7-a5cd-56f213501140
Vector: {'default': [-0.02834993042051792, 0.032153476029634476, -0.013541040942072868, 0.009160727262496948, -0.019381459802389145, -0.04797041043639183, 0.045226868242025375, 0.0379938967525959, -0.01620144583284855, -0.007097874768078327, -0.009451708756387234, 0.018955379724502563, -0.014580260962247849, 0.01359300222247839, -0.0028890324756503105, 0.0072849346324801445, 0.007773368153721094, 0.04813668504357338, -0.017188703641295433, 0.035894669592380524, -0.027414632961153984, 0.037515852600336075, 0.014538692310452461, -0.01493359636515379, 0.0016276788664981723, -0.005037620663642883, 0.011701621115207672, -0.017999297007918358, 0.07287012785673141, 0.009119158610701561, -0.031280532479286194, -0.006271694786846638, 0.0709579661488533, 0.02807973325252533, 0.008526802994310856, -0.03685075417160988, 0.015432422049343586, -0.04755472391843796, 0.02182362787425518, 0.0013139642542228103, 0.015806540846824646, -0.03940723463892937, 

### score 확인

score=True를 통해 score를 확인할 수 있음.

In [70]:
from weaviate.classes.query import MetadataQuery

response = collection.query.hybrid(
    query="food",
    alpha=0.5,
    return_metadata=MetadataQuery(score=True, explain_score=True),
    limit=3
)

for o in response.objects:
    print(o.properties)
    print(o.metadata.score)
    print(o.metadata.explain_score)

{'content': 'Finally, at the end-of-year feast, the House Points totals are given: Gryffindor is in last place. However, Dumbledore gives a few "last-minute additions", granting points to Harry, Ron, and Hermione, so that Gryffindor wins the House Cup.', 'movie_id': 'tt0241527'}
0.5

Hybrid (Result Set vector,hybridVector) Document 4e18ba8a-a0a6-4155-b852-5f8552ad8792: original score 0.14515448, normalized score: 0.5
{'content': "When Harry, Ron and Hermione learn that Buckbeak is to be executed, they visit Hagrid in his hut to console him. Hagrid has also found Ron's lost rat Scabbers and returns him to Ron. Ron had wrongly believed that his rat had been eaten by Hermione's cat. As the execution party approaches the hut, stones fly in through the window and they leave and run back to the edge of the castle, watching the execution from afar. Scabbers then bites Ron, who chases after him, finally collecting him under the Whomping Willow. The black dog makes another appearance and attack

### 가중치 설정

`alpha` 값을 조정하면서 어떤 검색에 가중치를 줄지를 결정할 수 있음.

1에 가까워질수록 벡터 유사도 서치에 더 가중치를 주는 것이고
0에 가까워질수록 키워드 매칭 서치에 더 가중치를 주는 것이다.

In [71]:
response = collection.query.hybrid(
    query="food",
    alpha=0.25,
    limit=3,
)

for o in response.objects:
    print(o.properties)

{'content': 'Finally, at the end-of-year feast, the House Points totals are given: Gryffindor is in last place. However, Dumbledore gives a few "last-minute additions", granting points to Harry, Ron, and Hermione, so that Gryffindor wins the House Cup.', 'movie_id': 'tt0241527'}
{'content': "When Harry, Ron and Hermione learn that Buckbeak is to be executed, they visit Hagrid in his hut to console him. Hagrid has also found Ron's lost rat Scabbers and returns him to Ron. Ron had wrongly believed that his rat had been eaten by Hermione's cat. As the execution party approaches the hut, stones fly in through the window and they leave and run back to the edge of the castle, watching the execution from afar. Scabbers then bites Ron, who chases after him, finally collecting him under the Whomping Willow. The black dog makes another appearance and attacks Ron and drags him, along with Scabbers, into a hole at the tree's base. Harry and Hermione follow, finding a tunnel which leads them to the

### 가중치 설정 2

`content`^2를 통해 해당 속성에 두 배의 가중치를 줄 수 있다.  
현재는 movie_id보다 content에 더 중요도를 준 것이다. 

weaviate의 `hybrid` 쿼리는 벡터 검색 + 키워드 기반 검색을 결합해서 결과를 도출한다.

1. vector search
`query="food"`를 벡터로 변환 후에 `content` 필드 벡터와 비교 후 유사도 계산  
`content^2`는 2배의 가중치를 준다는 의미임.
2. keyword search
`movie_id`는 vectorize하지 않았기에 키워드 검색에만 사용됨.  
`content`는 텍스트 데이터기에 keyword search에도 사용됨.
3. `alpha` 가중치 조정
현재 `alpha`값을 0.25로 주었기 때문에 키워드 기반 검색 : 벡터 검색 = 3 : 1


In [72]:
response = collection.query.hybrid(
    query="food",
    query_properties=["content^2", "movie_id"],
    alpha=0.25,
    limit=3,
)

for o in response.objects:
    print(o.properties)

{'content': 'Finally, at the end-of-year feast, the House Points totals are given: Gryffindor is in last place. However, Dumbledore gives a few "last-minute additions", granting points to Harry, Ron, and Hermione, so that Gryffindor wins the House Cup.', 'movie_id': 'tt0241527'}
{'content': "When Harry, Ron and Hermione learn that Buckbeak is to be executed, they visit Hagrid in his hut to console him. Hagrid has also found Ron's lost rat Scabbers and returns him to Ron. Ron had wrongly believed that his rat had been eaten by Hermione's cat. As the execution party approaches the hut, stones fly in through the window and they leave and run back to the edge of the castle, watching the execution from afar. Scabbers then bites Ron, who chases after him, finally collecting him under the Whomping Willow. The black dog makes another appearance and attacks Ron and drags him, along with Scabbers, into a hole at the tree's base. Harry and Hermione follow, finding a tunnel which leads them to the

In [73]:
response = collection.query.hybrid(
    query="food",
    query_properties=["content"],
    alpha=0.25,
    limit=3,
)

for o in response.objects:
    print(o.properties)

{'content': 'Finally, at the end-of-year feast, the House Points totals are given: Gryffindor is in last place. However, Dumbledore gives a few "last-minute additions", granting points to Harry, Ron, and Hermione, so that Gryffindor wins the House Cup.', 'movie_id': 'tt0241527'}
{'content': "When Harry, Ron and Hermione learn that Buckbeak is to be executed, they visit Hagrid in his hut to console him. Hagrid has also found Ron's lost rat Scabbers and returns him to Ron. Ron had wrongly believed that his rat had been eaten by Hermione's cat. As the execution party approaches the hut, stones fly in through the window and they leave and run back to the edge of the castle, watching the execution from afar. Scabbers then bites Ron, who chases after him, finally collecting him under the Whomping Willow. The black dog makes another appearance and attacks Ron and drags him, along with Scabbers, into a hole at the tree's base. Harry and Hermione follow, finding a tunnel which leads them to the

# 전체 벡터 db 확인 코드

In [84]:
# After your existing code, add the following:

# Fetch all objects with their vectors
response = collection.query.fetch_objects(
    include_vector=True,
    limit=10000  # Adjust this value based on your data size
)

# Print the vectors
for obj in response.objects:
    print(f"Object ID: {obj.uuid}")
    print(f"Movie ID: {obj.properties['movie_id']}")
    print(f"Content: {obj.properties['content'][:50]}...")  # Print first 50 characters
    #print(f"Vector: {obj.vector}...")
    print("---")

# Print total number of objects retrieved
print(f"Total objects retrieved: {len(response.objects)}")

Object ID: 00b82064-e6d3-456a-81f5-85efcadb94b7
Movie ID: tt0926084
Content: Scrimgeour arrives at the Burrow with Albus Dumble...
---
Object ID: 0350eca3-9aed-4599-93da-82e861394200
Movie ID: tt0295297
Content: A few days later, Fred, George (James and Oliver P...
---
Object ID: 0561f4e1-580e-4183-8278-5ff2b4223163
Movie ID: tt0304141
Content: They throw stones through the window of Hagrid's h...
---
Object ID: 05c5452b-5bb3-434d-9395-54645e6807c5
Movie ID: tt0304141
Content: During a Divination class, Professor Trelawney (Em...
---
Object ID: 10c080ea-e8f5-4737-a4a0-6a55cdb8fe82
Movie ID: tt0241527
Content: Upon arrival, the Sorting Hat places Harry, Ron an...
---
Object ID: 10d15668-348f-4f21-a703-5351104749c2
Movie ID: tt0241527
Content: Harry wakes up in the hospital wing. Dumbledore re...
---
Object ID: 10fafa0e-7b8e-426c-82d7-61cadfb8d2d3
Movie ID: tt0417741
Content: A necklace is discovered in the tavern outside of ...
---
Object ID: 1a21e36b-ff16-4a73-ac0b-84d1080505f5
Movie I

movie_id로 filtering한 벡터 db 확인

In [38]:
# Filter by movie_id = 'tt0241527'
filter_condition = Filter.by_property("movie_id").equal("tt0241527")

# Fetch objects with the specified filter
response = collection.query.fetch_objects(
    filters=filter_condition,
    include_vector=True,
    limit=100  # Adjust the limit based on your needs
)

# Print the filtered results
if response.objects:
    for obj in response.objects:
        print(f"Object ID: {obj.uuid}")
        print(f"Movie ID: {obj.properties['movie_id']}")
        print(f"Content: {obj.properties['content'][:50]}...")  # Print first 50 characters
        print("---")

    # Print total number of filtered objects
    print(f"Total objects retrieved for movie_id 'tt0241527': {len(response.objects)}")
else:
    print("No objects found for movie_id 'tt0241527'.")

Object ID: 2ba8461e-c8cc-4c8a-81e7-a7d3c37d0400
Movie ID: tt0241527
Content: Lord Voldemort, an evil and powerful dark wizard, ...
---
Object ID: d0a17e88-c10b-4423-8a60-b19a94e7a2f6
Movie ID: tt0241527
Content: Shortly before Harry's 11th birthday, he receives ...
---
Object ID: 430e0f21-1193-458f-9828-672c881e6dde
Movie ID: tt0241527
Content: Upon arrival, the Sorting Hat places Harry, Ron an...
---
Object ID: 7cbad01b-4eee-4b47-8a70-c96c860fe31d
Movie ID: tt0241527
Content: Harry, Ron, Hermione explore Hogwarts late at nigh...
---
Object ID: 454cb712-d565-4c6d-a750-86c6af45f59f
Movie ID: tt0241527
Content: At Christmas, Harry receives an Invisibility Cloak...
---
Object ID: 6b68eef4-c96d-4ca0-848b-e7bd2ea2822f
Movie ID: tt0241527
Content: Harry sees Snape trying to get information from Qu...
---
Object ID: cb769526-779e-4a3c-9b8d-0387fb54772d
Movie ID: tt0241527
Content: Harry, Hermione, Ron and Draco are caught out late...
---
Object ID: 920002ae-be09-4bf0-bfa3-f99a0c14490f
Movie I

In [29]:
searchword="harry"

In [30]:
from weaviate.classes.query import MetadataQuery

response = collection.query.bm25(
            query=searchword,
            return_metadata=MetadataQuery(score=True),
            query_properties=["content", "movie_id"],
            limit=10
        )
res = []
# 오브젝트가 있으면
if response.objects:
   for object in response.objects:
      res.append(object.properties) # 반환 데이터에 추가

In [31]:
res

[{'movie_id': 'tt0241527',
  'content': "Harry wakes up in the hospital wing. Dumbledore reveals to Harry that Harry's mother died to protect Harry as an infant. Her pure, loving sacrifice provides Harry with an ancient magical protection from Voldemort's lethal spells and also prevents Voldemort from touching Harry without suffering terribly. Dumbledore also says that the Sorcerer's Stone has been destroyed to prevent future attempts by Voldemort to steal it."},
 {'content': "Harry learns from Snape's memories that Snape loved Harry's late mother, Lily, but despised his father, James, who had bullied him. Following her death, Snape worked secretly with Dumbledore to protect Harry from Voldemort because of his deep feelings for Lily. Harry also learns that Dumbledore's death at Snape's hands was planned between them, and that the Patronus doe he saw in the woods that led him to the sword had been conjured by Snape. Harry discovers that he himself became a Horcrux when Voldemort origina

In [41]:
response = query_engine.query("Dumbledore then takes Harry to the Burrow, where Harry reunites with his best friends Ron Weasley")

# embedding vector를 제외한 응답 데이터를 출력
def filter_response(response):
    if isinstance(response, dict):
        # 딕셔너리인 경우, "embedding" 키를 제외하고 반환
        return {k: filter_response(v) for k, v in response.items() if k != "embedding"}
    elif isinstance(response, list):
        # 리스트인 경우, 각 항목에 대해 재귀적으로 필터 적용
        return [filter_response(item) for item in response]
    else:
        return response  # 기본적으로 원래 값을 반환

# 필터링된 응답 출력
filtered_response = filter_response(response)
print(filtered_response)

Dumbledore then takes Harry to the Burrow, where Harry reunites with his best friends Ron Weasley.


In [42]:
response = collection.query.fetch_objects(
    filters=Filter.by_property("content").equal("Inside, Harry finally confronts Sirius Black.")
)
response

QueryReturn(objects=[Object(uuid=_WeaviateUUIDInt('02fa84f6-5078-4445-851b-897fd66b3360'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None), properties={'content': "Inside, Harry finally confronts Sirius Black. As an illegal Animagus, Black can transform into any animal at will, thus he is also the infamous black dog. Professor Lupin, who had spotted the group on the confiscated Marauder's Map, suddenly bursts in and embraces his old friend Black. Confronted by Hermione, Lupin admits to being a werewolf. Lupin and Black then explain that Black is not the one who betrayed the Potters, rather it was Peter Pettigrew (Timothy Spall), who has been hiding for twelve years in his Animagus form as Scabbers, Ron's rat. He is Voldemort's servant, not Black, and he framed Black for his crimes. Meanwhile Snape bursts in also confronting Black but is disposed of by Harry. Harry is

전체 정보 확인 (embedding 제외)

In [43]:
collection = client.collections.get(class_name)
for item in collection.iterator():
    print(item.uuid, item.properties)

01619600-9fb2-48b6-957c-e3716a6a692c {'content': "Cedric Diggory, the other Hogwarts champion, informs Harry to submerge the egg in water and open it. There he will hear the clue. Harry does as told in the prefects' enormous collective bath. The clue mentions that Merpeople have taken something of Harry's and that they must retrieve it from the Black Lake. Just as the three are trying to find a way to breathe underwater, Ron and Hermione are called to McGonagall's office. Neville tells Harry about Gillyweed, which, if eaten, can let you grow webbed hands and feet and gills. During the second task, Harry follows a mermaid to where they have chained Ron, Hermione, Cho Chang, and Fleur's sister. Cho Chang is retrieved by Cedric, Victor, the Durmstrang champion, takes Hermione. Harry takes both Ron and Fleur's sister, since she did not show up.", 'movie_id': 'tt0330373'}
02fa84f6-5078-4445-851b-897fd66b3360 {'content': "Inside, Harry finally confronts Sirius Black. As an illegal Animagus, 