Nothing special here - just a file to experiment with.

If you have some data in weaviate, no matter the schema, 
you can run this script to view and explore the data in nomic ATLAS.

In [2]:
import weaviate
import json

print("Let's a go!")
client = weaviate.Client("http://localhost:8080")
print("Client created")

# list the schemas
schema = client.schema.get()
print(json.dumps(schema, indent=4))

result = (
    client.query
    .aggregate("DriftBenchmark")
    .with_fields("meta { count }")
    .do()
)

print(json.dumps(result, indent=4))



Let's a go!
Client created
{
    "classes": [
        {
            "class": "DriftBenchmark",
            "invertedIndexConfig": {
                "bm25": {
                    "b": 0.75,
                    "k1": 1.2
                },
                "cleanupIntervalSeconds": 60,
                "stopwords": {
                    "additions": null,
                    "preset": "en",
                    "removals": null
                }
            },
            "moduleConfig": {
                "text2vec-transformers": {
                    "model": "sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
                    "options": {
                        "waitForModel": false
                    },
                    "poolingStrategy": "masked_mean",
                    "vectorizeClassName": false
                }
            },
            "multiTenancyConfig": {
                "enabled": false
            },
            "properties": [
                {
                    

In [3]:
import weaviate
from nomic import AtlasProject
import numpy as np
import nomic
import os
from dotenv import load_dotenv
load_dotenv()

api_key = os.getenv('NOMIC_API_KEY')
nomic.login(api_key)  
nomic.__init__(name="DriftDbDev0")

print("Let's a go!")

print("Connecting to weaviate instance on localhost:8080...")
client = weaviate.Client("http://localhost:8080")
print("Client created")

schema = client.schema.get()

classes = []
props = []
for c in schema["classes"]:
    classes.append(c["class"])
    temp = []
    for p in c["properties"]:
        if p["dataType"] == ["text"]:
            temp.append(p["name"])
    props.append(temp)


def get_batch_with_cursor(
    client, class_name, class_properties, batch_size, cursor=None
):
    query = (
        client.query.get(class_name, class_properties)
        .with_additional(["vector", "id"])
        .with_limit(batch_size)
    )

    if cursor is not None:
        return query.with_after(cursor).do()
    else:
        return query.do()


for c, p in zip(classes, props):
    project = AtlasProject(
        name="BenchmarkDev0",
        unique_id_field="id",
        modality="embedding",
    )
    count = 0
    cursor = None
    while True:
        response = get_batch_with_cursor(client, c, p, 10000, cursor)
        count += 1
        if len(response["data"]["Get"][c]) == 0:
            break
        cursor = response["data"]["Get"][c][-1]["_additional"]["id"]
        vectors = []
        for i in response["data"]["Get"][c]:
            vectors.append(i["_additional"]["vector"])

        embeddings = np.array(vectors)
        data = []
        not_data = ["_additional"]
        un_data = ["vector"]
        for i in response["data"]["Get"][c]:
            j = {key: value for key, value in i.items() if key not in not_data}
            k = {
                key: value
                for key, value in i["_additional"].items()
                if key not in un_data
            }
            j = j | k
            data.append(j)
        with project.wait_for_project_lock():
            project.add_embeddings(
                embeddings=embeddings,
                data=data,
            )
    project.rebuild_maps()
    project.create_index(
        name=c,
        colorable_fields=p,
        build_topic_model=True,
    )

Let's a go!
Connecting to weaviate instance on localhost:8080...
Client created


[32m2023-08-02 23:03:48.677[0m | [1mINFO    [0m | [36mnomic.project[0m:[36m_create_project[0m:[36m779[0m - [1mCreating project `BenchmarkDev1` in organization `zaidanseiko`[0m


TypeError: Unknown type: completion_sent_len null