In [None]:
!pip install weaviate-client pandas requests tqdm

In [None]:
import weaviate

print(f"Your Weaviate client library version is: {weaviate.__version__}.")

In [None]:
import weaviate
import os

headers = {
    "X-Cohere-Api-Key": "sXzSSVZ4S5Y9L2ykxlLcsKlfd9i0eNS7hnU9JAeG"
}  # Replace with your Cohere API key

client = weaviate.connect_to_local(headers=headers)

In [None]:
# Instantiate your client (not shown). e.g.:
# client = weaviate.connect_to_weaviate_cloud(...) or
# client = weaviate.connect_to_local(...)

assert client.is_ready()  # This will raise an exception if the client is not ready

In [None]:
import json

# Instantiate your client (not shown). e.g.:
# client = weaviate.connect_to_weaviate_cloud(...) or
# client = weaviate.connect_to_local(...)

metainfo = client.get_meta()
print(json.dumps(metainfo, indent=2))  # Print the meta information in a readable format

In [None]:
import weaviate
from weaviate.classes.config import Configure, Property, DataType
import os


# Instantiate your client (not shown). e.g.:
# client = weaviate.connect_to_weaviate_cloud(...) or
# client = weaviate.connect_to_local(...)

client.collections.create(
    name="Movies",
    properties=[
        Property(name="title", data_type=DataType.TEXT),
        Property(name="overview", data_type=DataType.TEXT),
        Property(name="vote_average", data_type=DataType.NUMBER),
        Property(name="genre_ids", data_type=DataType.INT_ARRAY),
        Property(name="release_date", data_type=DataType.DATE),
        Property(name="tmdb_id", data_type=DataType.INT),
    ],
    # Define the vectorizer module
    vector_config=Configure.Vectors.text2vec_cohere(model="embed-v4.0"),
    # Define the generative module
    generative_config=Configure.Generative.cohere(model="command-a-03-2025")
)

In [None]:
client = weaviate.connect_to_local(headers=headers)

In [None]:
import weaviate
import pandas as pd
import requests
from datetime import datetime, timezone
import json
from weaviate.util import generate_uuid5
from tqdm import tqdm
import os

# Instantiate your client (not shown). e.g.:
# client = weaviate.connect_to_weaviate_cloud(...) or
# client = weaviate.connect_to_local(...)

data_url = "https://raw.githubusercontent.com/weaviate-tutorials/edu-datasets/main/movies_data_1990_2024.json"
resp = requests.get(data_url)
df = pd.DataFrame(resp.json())

# Configure collection object
movies = client.collections.use("Movies")

# Enter context manager
with movies.batch.fixed_size(batch_size=200) as batch:
    # Loop through the data
    for i, movie in tqdm(df.iterrows()):
        # Convert data types
        # Convert a JSON date to `datetime` and add time zone information
        release_date = datetime.fromisoformat(movie["release_date"]).replace(tzinfo=timezone.utc)
        # Convert a JSON array to a list of integers
        genre_ids = json.loads(movie["genre_ids"])

        # Build the object payload
        movie_obj = {
            "title": movie["title"],
            "overview": movie["overview"],
            "vote_average": movie["vote_average"],
            "genre_ids": genre_ids,
            "release_date": release_date,
            "tmdb_id": movie["id"],
        }

        # Add object to batch queue
        batch.add_object(
            properties=movie_obj,
            uuid=generate_uuid5(movie["id"])
        )
        # Batcher automatically sends batches

# Check for failed objects
if len(movies.batch.failed_objects) > 0:
    print(f"Failed to import {len(movies.batch.failed_objects)} objects")

client.close()

In [None]:
client = weaviate.connect_to_local(headers=headers)

In [None]:
import weaviate
from weaviate.classes.query import Filter, MetadataQuery
import os


# Instantiate your client (not shown). e.g.:
# client = weaviate.connect_to_weaviate_cloud(...) or
# client = weaviate.connect_to_local(...)

# Configure collection object
movies = client.collections.use("Movies")

# Perform query
response = movies.query.bm25(
    query="history", limit=5, return_metadata=MetadataQuery(score=True)
)

# Inspect the response
for o in response.objects:
    print(o.properties["title"], o.properties["release_date"].year)  # Print the title and release year (note the release date is a datetime object)
    print(f"BM25 score: {o.metadata.score:.3f}\n")  # Print the BM25 score of the object from the query

client.close()

In [None]:
client = weaviate.connect_to_local(headers=headers)

In [None]:
import weaviate
from weaviate.classes.query import Filter, MetadataQuery
import os


# Instantiate your client (not shown). e.g.:
# client = weaviate.connect_to_weaviate_cloud(...) or
# client = weaviate.connect_to_local(...)

# Configure collection object
movies = client.collections.use("Movies")

# Perform query
response = movies.query.hybrid(
    query="history", limit=5, return_metadata=MetadataQuery(score=True)
)

# Inspect the response
for o in response.objects:
    print(o.properties["title"], o.properties["release_date"].year)  # Print the title and release year (note the release date is a datetime object)
    print(f"Hybrid score: {o.metadata.score:.3f}\n")  # Print the hybrid search score of the object from the query

client.close()

In [None]:
client = weaviate.connect_to_local(headers=headers)


In [None]:
import weaviate
from weaviate.classes.query import Filter, MetadataQuery
import os

from datetime import datetime


# Instantiate your client (not shown). e.g.:
# client = weaviate.connect_to_weaviate_cloud(...) or
# client = weaviate.connect_to_local(...)

# Configure collection object
movies = client.collections.use("Movies")

# Perform query
response = movies.query.near_text(
    query="dystopian future",
    limit=5,
    return_metadata=MetadataQuery(distance=True),
    filters=Filter.by_property("release_date").greater_than(datetime(2020, 1, 1))
)

# Inspect the response
for o in response.objects:
    print(o.properties["title"], o.properties["release_date"].year)  # Print the title and release year (note the release date is a datetime object)
    print(f"Distance to query: {o.metadata.distance:.3f}\n")  # Print the distance of the object from the query

client.close()

In [None]:
client = weaviate.connect_to_local(headers=headers)


In [None]:
import os
import weaviate

# Instantiate your client (not shown). e.g.:
# client = weaviate.connect_to_weaviate_cloud(...) or
# client = weaviate.connect_to_local(...)

# Configure collection object
movies = client.collections.use("Movies")

# Perform query
response = movies.generate.near_text(
    query="dystopian future",
    limit=5,
    single_prompt="Translate this into French: {title}"
)

# Inspect the response
for o in response.objects:
    print(o.properties["title"])  # Print the title
    print(o.generated)  # Print the generated text (the title, in French)

client.close()

In [None]:
client = weaviate.connect_to_local(headers=headers)

In [None]:
import os
import weaviate

# Instantiate your client (not shown). e.g.:
# client = weaviate.connect_to_weaviate_cloud(...) or
# client = weaviate.connect_to_local(...)

# Configure collection object
movies = client.collections.use("Movies")

# Perform query
response = movies.generate.near_text(
    query="dystopian future",
    limit=5,
    grouped_task="What do these movies have in common?",
    # grouped_properties=["title", "overview"]  # Optional parameter; for reducing prompt length
)

# Inspect the response
for o in response.objects:
    print(o.properties["title"])  # Print the title
print(response.generated)  # Print the generated text (the commonalities between them)

client.close()