# Importação
Defina as variáveis `WEAVIATE_URL` e `WEAVIATE_APIKEY` para utilizar o Weaviate cloud ou execute uma [instância docker local](https://weaviate.io/developers/weaviate/installation/docker-compose)

In [None]:
import weaviate
import os

client = weaviate.connect_to_weaviate_cloud(
    cluster_url=os.getenv("WEAVIATE_URL"),
    auth_credentials=weaviate.auth.AuthApiKey(os.getenv("WEAVIATE_APIKEY")),
    headers={
        "X-Openai-Api-Key": os.getenv("OPENAI_API_KEY")
    }
)

# client = weaviate.connect_to_local(
#     headers={
#         "X-Openai-Api-Key": os.getenv("OPENAI_API_KEY")
#     }
# )
print(f"Client: {weaviate.__version__}, Server: {client.get_meta().get('version')}")

Client: 4.11.1, Server: 1.28.6


In [7]:
from weaviate import classes as wvc
client.collections.delete("Index")
client.collections.create(
    "Index",
    vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_openai(
        model="text-embedding-3-small",
    ),
    generative_config=wvc.config.Configure.Generative.openai(
        model="gpt-4o"
    )
)

<weaviate.collections.collection.sync.Collection at 0x107033b00>

In [8]:
# read csv file
import pandas as pd
from weaviate.util import generate_uuid5
collection = client.collections.get("Index")
df = pd.read_csv("trabalhos_60700009.csv")
df.columns = map(str.lower, df.columns)
# this can be used to test the import with a smaller dataset
# df = df.head(500)
with collection.batch.dynamic() as batch:
    for index, row in df.iterrows():
        batch.add_object(
            dict(row),
            uuid=generate_uuid5(row["indice"]),
        )
if collection.batch.failed_objects:
    print(collection.batch.failed_objects)
else:
    print("Imported with success")

Imported with success


In [9]:
collection.aggregate.over_all()

AggregateReturn(properties={}, total_count=4785)

Agora abra o arquivo [queries.ipynb](queries.ipynb)